numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,84 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ This file provides internal compiler utilities that support certain special
6
+ operations with numpy.
7
+ """
8
+
9
+ from numba.cuda import types
10
+ from numba.cuda.cgutils import unpack_tuple
11
+ from numba.cuda.extending import intrinsic
12
+ from numba.cuda import typing
13
+ from numba.cuda.core.imputils import impl_ret_new_ref
14
+ from numba.cuda.core.errors import RequireLiteralValue, TypingError
15
+
16
+ from numba.cuda.cpython.unsafe.tuple import tuple_setitem
17
+
18
+
19
+ @intrinsic
20
+ def empty_inferred(typingctx, shape):
21
+ """A version of numpy.empty whose dtype is inferred by the type system.
22
+
23
+ Expects `shape` to be a int-tuple.
24
+
25
+ There is special logic in the type-inferencer to handle the "refine"-ing
26
+ of undefined dtype.
27
+ """
28
+ from numba.cuda.np.arrayobj import _empty_nd_impl
29
+
30
+ def codegen(context, builder, signature, args):
31
+ # check that the return type is now defined
32
+ arrty = signature.return_type
33
+ assert arrty.is_precise()
34
+ shapes = unpack_tuple(builder, args[0])
35
+ # redirect implementation to np.empty
36
+ res = _empty_nd_impl(context, builder, arrty, shapes)
37
+ return impl_ret_new_ref(context, builder, arrty, res._getvalue())
38
+
39
+ # make function signature
40
+ nd = len(shape)
41
+ array_ty = types.Array(ndim=nd, layout="C", dtype=types.undefined)
42
+ sig = array_ty(shape)
43
+ return sig, codegen
44
+
45
+
46
+ @intrinsic
47
+ def to_fixed_tuple(typingctx, array, length):
48
+ """Convert *array* into a tuple of *length*
49
+
50
+ Returns ``UniTuple(array.dtype, length)``
51
+
52
+ ** Warning **
53
+ - No boundchecking.
54
+ If *length* is longer than *array.size*, the behavior is undefined.
55
+ """
56
+ if not isinstance(length, types.IntegerLiteral):
57
+ raise RequireLiteralValue("*length* argument must be a constant")
58
+
59
+ if array.ndim != 1:
60
+ raise TypingError("Not supported on array.ndim={}".format(array.ndim))
61
+
62
+ # Determine types
63
+ tuple_size = int(length.literal_value)
64
+ tuple_type = types.UniTuple(dtype=array.dtype, count=tuple_size)
65
+ sig = tuple_type(array, length)
66
+
67
+ def codegen(context, builder, signature, args):
68
+ def impl(array, length, empty_tuple):
69
+ out = empty_tuple
70
+ for i in range(length):
71
+ out = tuple_setitem(out, i, array[i])
72
+ return out
73
+
74
+ inner_argtypes = [signature.args[0], types.intp, tuple_type]
75
+ inner_sig = typing.signature(tuple_type, *inner_argtypes)
76
+ ll_idx_type = context.get_value_type(types.intp)
77
+ # Allocate an empty tuple
78
+ empty_tuple = context.get_constant_undef(tuple_type)
79
+ inner_args = [args[0], ll_idx_type(tuple_size), empty_tuple]
80
+
81
+ res = context.compile_internal(builder, impl, inner_sig, inner_args)
82
+ return res
83
+
84
+ return sig, codegen
@@ -0,0 +1,254 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import itertools
5
+ from llvmlite import ir
6
+ from numba.cuda.core import targetconfig
7
+ from numba.cuda import cgutils
8
+ from .cudadrv import nvvm
9
+
10
+
11
+ def declare_atomic_cas_int(lmod, isize):
12
+ fname = "___numba_atomic_i" + str(isize) + "_cas_hack"
13
+ fnty = ir.FunctionType(
14
+ ir.IntType(isize),
15
+ (
16
+ ir.PointerType(ir.IntType(isize)),
17
+ ir.IntType(isize),
18
+ ir.IntType(isize),
19
+ ),
20
+ )
21
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
22
+
23
+
24
+ def atomic_cmpxchg(builder, lmod, isize, ptr, cmp, val):
25
+ out = builder.cmpxchg(ptr, cmp, val, "monotonic", "monotonic")
26
+ return builder.extract_value(out, 0)
27
+
28
+
29
+ def declare_atomic_add_float32(lmod):
30
+ fname = "llvm.nvvm.atomic.load.add.f32.p0f32"
31
+ fnty = ir.FunctionType(
32
+ ir.FloatType(), (ir.PointerType(ir.FloatType(), 0), ir.FloatType())
33
+ )
34
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
35
+
36
+
37
+ def declare_atomic_add_float64(lmod):
38
+ flags = targetconfig.ConfigStack().top()
39
+ if flags.compute_capability >= (6, 0):
40
+ fname = "llvm.nvvm.atomic.load.add.f64.p0f64"
41
+ else:
42
+ fname = "___numba_atomic_double_add"
43
+ fnty = ir.FunctionType(
44
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
45
+ )
46
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
47
+
48
+
49
+ def declare_atomic_sub_float32(lmod):
50
+ fname = "___numba_atomic_float_sub"
51
+ fnty = ir.FunctionType(
52
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
53
+ )
54
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
55
+
56
+
57
+ def declare_atomic_sub_float64(lmod):
58
+ fname = "___numba_atomic_double_sub"
59
+ fnty = ir.FunctionType(
60
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
61
+ )
62
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
63
+
64
+
65
+ def declare_atomic_inc_int32(lmod):
66
+ fname = "llvm.nvvm.atomic.load.inc.32.p0i32"
67
+ fnty = ir.FunctionType(
68
+ ir.IntType(32), (ir.PointerType(ir.IntType(32)), ir.IntType(32))
69
+ )
70
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
71
+
72
+
73
+ def declare_atomic_inc_int64(lmod):
74
+ fname = "___numba_atomic_u64_inc"
75
+ fnty = ir.FunctionType(
76
+ ir.IntType(64), (ir.PointerType(ir.IntType(64)), ir.IntType(64))
77
+ )
78
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
79
+
80
+
81
+ def declare_atomic_dec_int32(lmod):
82
+ fname = "llvm.nvvm.atomic.load.dec.32.p0i32"
83
+ fnty = ir.FunctionType(
84
+ ir.IntType(32), (ir.PointerType(ir.IntType(32)), ir.IntType(32))
85
+ )
86
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
87
+
88
+
89
+ def declare_atomic_dec_int64(lmod):
90
+ fname = "___numba_atomic_u64_dec"
91
+ fnty = ir.FunctionType(
92
+ ir.IntType(64), (ir.PointerType(ir.IntType(64)), ir.IntType(64))
93
+ )
94
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
95
+
96
+
97
+ def declare_atomic_max_float32(lmod):
98
+ fname = "___numba_atomic_float_max"
99
+ fnty = ir.FunctionType(
100
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
101
+ )
102
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
103
+
104
+
105
+ def declare_atomic_max_float64(lmod):
106
+ fname = "___numba_atomic_double_max"
107
+ fnty = ir.FunctionType(
108
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
109
+ )
110
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
111
+
112
+
113
+ def declare_atomic_min_float32(lmod):
114
+ fname = "___numba_atomic_float_min"
115
+ fnty = ir.FunctionType(
116
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
117
+ )
118
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
119
+
120
+
121
+ def declare_atomic_min_float64(lmod):
122
+ fname = "___numba_atomic_double_min"
123
+ fnty = ir.FunctionType(
124
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
125
+ )
126
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
127
+
128
+
129
+ def declare_atomic_nanmax_float32(lmod):
130
+ fname = "___numba_atomic_float_nanmax"
131
+ fnty = ir.FunctionType(
132
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
133
+ )
134
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
135
+
136
+
137
+ def declare_atomic_nanmax_float64(lmod):
138
+ fname = "___numba_atomic_double_nanmax"
139
+ fnty = ir.FunctionType(
140
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
141
+ )
142
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
143
+
144
+
145
+ def declare_atomic_nanmin_float32(lmod):
146
+ fname = "___numba_atomic_float_nanmin"
147
+ fnty = ir.FunctionType(
148
+ ir.FloatType(), (ir.PointerType(ir.FloatType()), ir.FloatType())
149
+ )
150
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
151
+
152
+
153
+ def declare_atomic_nanmin_float64(lmod):
154
+ fname = "___numba_atomic_double_nanmin"
155
+ fnty = ir.FunctionType(
156
+ ir.DoubleType(), (ir.PointerType(ir.DoubleType()), ir.DoubleType())
157
+ )
158
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
159
+
160
+
161
+ def declare_cudaCGGetIntrinsicHandle(lmod):
162
+ fname = "cudaCGGetIntrinsicHandle"
163
+ fnty = ir.FunctionType(ir.IntType(64), (ir.IntType(32),))
164
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
165
+
166
+
167
+ def declare_cudaCGSynchronize(lmod):
168
+ fname = "cudaCGSynchronize"
169
+ fnty = ir.FunctionType(ir.IntType(32), (ir.IntType(64), ir.IntType(32)))
170
+ return cgutils.get_or_insert_function(lmod, fnty, fname)
171
+
172
+
173
+ def declare_string(builder, value):
174
+ lmod = builder.basic_block.function.module
175
+ cval = cgutils.make_bytearray(value.encode("utf-8") + b"\x00")
176
+ gl = cgutils.add_global_variable(
177
+ lmod, cval.type, name="_str", addrspace=nvvm.ADDRSPACE_CONSTANT
178
+ )
179
+ gl.linkage = "internal"
180
+ gl.global_constant = True
181
+ gl.initializer = cval
182
+
183
+ return builder.addrspacecast(gl, ir.PointerType(ir.IntType(8)), "generic")
184
+
185
+
186
+ def declare_vprint(lmod):
187
+ voidptrty = ir.PointerType(ir.IntType(8))
188
+ # NOTE: the second argument to vprintf() points to the variable-length
189
+ # array of arguments (after the format)
190
+ vprintfty = ir.FunctionType(ir.IntType(32), [voidptrty, voidptrty])
191
+ vprintf = cgutils.get_or_insert_function(lmod, vprintfty, "vprintf")
192
+ return vprintf
193
+
194
+
195
+ # -----------------------------------------------------------------------------
196
+
197
+ SREG_MAPPING = {
198
+ "tid.x": "llvm.nvvm.read.ptx.sreg.tid.x",
199
+ "tid.y": "llvm.nvvm.read.ptx.sreg.tid.y",
200
+ "tid.z": "llvm.nvvm.read.ptx.sreg.tid.z",
201
+ "ntid.x": "llvm.nvvm.read.ptx.sreg.ntid.x",
202
+ "ntid.y": "llvm.nvvm.read.ptx.sreg.ntid.y",
203
+ "ntid.z": "llvm.nvvm.read.ptx.sreg.ntid.z",
204
+ "ctaid.x": "llvm.nvvm.read.ptx.sreg.ctaid.x",
205
+ "ctaid.y": "llvm.nvvm.read.ptx.sreg.ctaid.y",
206
+ "ctaid.z": "llvm.nvvm.read.ptx.sreg.ctaid.z",
207
+ "nctaid.x": "llvm.nvvm.read.ptx.sreg.nctaid.x",
208
+ "nctaid.y": "llvm.nvvm.read.ptx.sreg.nctaid.y",
209
+ "nctaid.z": "llvm.nvvm.read.ptx.sreg.nctaid.z",
210
+ "warpsize": "llvm.nvvm.read.ptx.sreg.warpsize",
211
+ "laneid": "llvm.nvvm.read.ptx.sreg.laneid",
212
+ }
213
+
214
+
215
+ def call_sreg(builder, name):
216
+ module = builder.module
217
+ fnty = ir.FunctionType(ir.IntType(32), ())
218
+ fn = cgutils.get_or_insert_function(module, fnty, SREG_MAPPING[name])
219
+ return builder.call(fn, ())
220
+
221
+
222
+ class SRegBuilder(object):
223
+ def __init__(self, builder):
224
+ self.builder = builder
225
+
226
+ def tid(self, xyz):
227
+ return call_sreg(self.builder, "tid.%s" % xyz)
228
+
229
+ def ctaid(self, xyz):
230
+ return call_sreg(self.builder, "ctaid.%s" % xyz)
231
+
232
+ def ntid(self, xyz):
233
+ return call_sreg(self.builder, "ntid.%s" % xyz)
234
+
235
+ def nctaid(self, xyz):
236
+ return call_sreg(self.builder, "nctaid.%s" % xyz)
237
+
238
+ def getdim(self, xyz):
239
+ i64 = ir.IntType(64)
240
+ tid = self.builder.sext(self.tid(xyz), i64)
241
+ ntid = self.builder.sext(self.ntid(xyz), i64)
242
+ nctaid = self.builder.sext(self.ctaid(xyz), i64)
243
+ res = self.builder.add(self.builder.mul(ntid, nctaid), tid)
244
+ return res
245
+
246
+
247
+ def get_global_id(builder, dim):
248
+ sreg = SRegBuilder(builder)
249
+ it = (sreg.getdim(xyz) for xyz in "xyz")
250
+ seq = list(itertools.islice(it, None, dim))
251
+ if dim == 1:
252
+ return seq[0]
253
+ else:
254
+ return seq
@@ -0,0 +1,126 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from functools import singledispatch
5
+ from llvmlite import ir
6
+ from numba.cuda import types
7
+ from numba.cuda import cgutils
8
+ from numba.cuda.core.errors import NumbaWarning
9
+ from numba.cuda.core.imputils import Registry
10
+ from numba.cuda import nvvmutils
11
+ from numba.cuda.types.ext_types import Dim3, Bfloat16
12
+ from warnings import warn
13
+
14
+ registry = Registry("printimpl")
15
+ lower = registry.lower
16
+
17
+ voidptr = ir.PointerType(ir.IntType(8))
18
+
19
+
20
+ # NOTE: we don't use @lower here since print_item() doesn't return a LLVM value
21
+
22
+
23
+ @singledispatch
24
+ def print_item(ty, context, builder, val):
25
+ """
26
+ Handle printing of a single value of the given Numba type.
27
+ A (format string, [list of arguments]) is returned that will allow
28
+ forming the final printf()-like call.
29
+ """
30
+ raise NotImplementedError(
31
+ "printing unimplemented for values of type %s" % (ty,)
32
+ )
33
+
34
+
35
+ @print_item.register(types.Integer)
36
+ @print_item.register(types.IntegerLiteral)
37
+ def int_print_impl(ty, context, builder, val):
38
+ if ty in types.unsigned_domain:
39
+ rawfmt = "%llu"
40
+ dsttype = types.uint64
41
+ else:
42
+ rawfmt = "%lld"
43
+ dsttype = types.int64
44
+ lld = context.cast(builder, val, ty, dsttype)
45
+ return rawfmt, [lld]
46
+
47
+
48
+ @print_item.register(types.Float)
49
+ def real_print_impl(ty, context, builder, val):
50
+ lld = context.cast(builder, val, ty, types.float64)
51
+ return "%f", [lld]
52
+
53
+
54
+ @print_item.register(Bfloat16)
55
+ def bfloat16_print_impl(ty, context, builder, val):
56
+ # Hand rolled bfloat16 -> float32 -> double conversion with zero-ext
57
+ bits32 = builder.zext(val, ir.IntType(32))
58
+ shift = builder.shl(bits32, ir.Constant(ir.IntType(32), 16))
59
+ f32 = builder.bitcast(shift, ir.FloatType())
60
+ # printf("%f") expects a double; promote to f64 to match vararg expectation
61
+ f64 = builder.fpext(f32, ir.DoubleType())
62
+ return "%f", [f64]
63
+
64
+
65
+ @print_item.register(types.StringLiteral)
66
+ def const_print_impl(ty, context, builder, sigval):
67
+ pyval = ty.literal_value
68
+ assert isinstance(pyval, str) # Ensured by lowering
69
+ rawfmt = "%s"
70
+ val = context.insert_string_const_addrspace(builder, pyval)
71
+ return rawfmt, [val]
72
+
73
+
74
+ @print_item.register(Dim3)
75
+ def dim3_print_impl(ty, context, builder, val):
76
+ rawfmt = "(%d, %d, %d)"
77
+ x = builder.extract_value(val, 0)
78
+ y = builder.extract_value(val, 1)
79
+ z = builder.extract_value(val, 2)
80
+ return rawfmt, [x, y, z]
81
+
82
+
83
+ @print_item.register(types.Boolean)
84
+ def bool_print_impl(ty, context, builder, val):
85
+ true_string = context.insert_string_const_addrspace(builder, "True")
86
+ false_string = context.insert_string_const_addrspace(builder, "False")
87
+ res_ptr = cgutils.alloca_once_value(builder, false_string)
88
+ with builder.if_then(val):
89
+ builder.store(true_string, res_ptr)
90
+ rawfmt = "%s"
91
+ return rawfmt, [builder.load(res_ptr)]
92
+
93
+
94
+ @lower(print, types.VarArg(types.Any))
95
+ def print_varargs(context, builder, sig, args):
96
+ """This function is a generic 'print' wrapper for arbitrary types.
97
+ It dispatches to the appropriate 'print' implementations above
98
+ depending on the detected real types in the signature."""
99
+
100
+ vprint = nvvmutils.declare_vprint(builder.module)
101
+
102
+ formats = []
103
+ values = []
104
+
105
+ for i, (argtype, argval) in enumerate(zip(sig.args, args)):
106
+ argfmt, argvals = print_item(argtype, context, builder, argval)
107
+ formats.append(argfmt)
108
+ values.extend(argvals)
109
+
110
+ rawfmt = " ".join(formats) + "\n"
111
+ if len(args) > 32:
112
+ msg = (
113
+ "CUDA print() cannot print more than 32 items. "
114
+ "The raw format string will be emitted by the kernel instead."
115
+ )
116
+ warn(msg, NumbaWarning)
117
+
118
+ rawfmt = rawfmt.replace("%", "%%")
119
+ fmt = context.insert_string_const_addrspace(builder, rawfmt)
120
+ array = cgutils.make_anonymous_struct(builder, values)
121
+ arrayptr = cgutils.alloca_once_value(builder, array)
122
+
123
+ vprint = nvvmutils.declare_vprint(builder.module)
124
+ builder.call(vprint, (fmt, builder.bitcast(arrayptr, voidptr)))
125
+
126
+ return context.get_dummy_value()