numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,72 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Hints to wrap Kernel arguments to indicate how to manage host-device
6
+ memory transfers before & after the kernel call.
7
+ """
8
+
9
+ from numba.cuda.typing.typeof import typeof, Purpose
10
+
11
+
12
+ class ArgHint:
13
+ def __init__(self, value):
14
+ self.value = value
15
+
16
+ def to_device(self, retr, stream=0):
17
+ """
18
+ :param stream: a stream to use when copying data
19
+ :param retr:
20
+ a list of clean-up work to do after the kernel's been run.
21
+ Append 0-arg lambdas to it!
22
+ :return: a value (usually an `DeviceNDArray`) to be passed to
23
+ the kernel
24
+ """
25
+
26
+ @property
27
+ def _numba_type_(self):
28
+ return typeof(self.value, Purpose.argument)
29
+
30
+
31
+ class In(ArgHint):
32
+ def to_device(self, retr, stream=0):
33
+ from .cudadrv.devicearray import auto_device
34
+
35
+ devary, _ = auto_device(self.value, stream=stream)
36
+ # A dummy writeback functor to keep devary alive until the kernel
37
+ # is called.
38
+ retr.append(lambda: devary)
39
+ return devary
40
+
41
+
42
+ class Out(ArgHint):
43
+ def to_device(self, retr, stream=0):
44
+ from .cudadrv.devicearray import auto_device
45
+
46
+ devary, conv = auto_device(self.value, copy=False, stream=stream)
47
+ if conv:
48
+ retr.append(lambda: devary.copy_to_host(self.value, stream=stream))
49
+ return devary
50
+
51
+
52
+ class InOut(ArgHint):
53
+ def to_device(self, retr, stream=0):
54
+ from .cudadrv.devicearray import auto_device
55
+
56
+ devary, conv = auto_device(self.value, stream=stream)
57
+ if conv:
58
+ retr.append(lambda: devary.copy_to_host(self.value, stream=stream))
59
+ return devary
60
+
61
+
62
+ def wrap_arg(value, default=InOut):
63
+ return value if isinstance(value, ArgHint) else default(value)
64
+
65
+
66
+ __all__ = [
67
+ "In",
68
+ "Out",
69
+ "InOut",
70
+ "ArgHint",
71
+ "wrap_arg",
72
+ ]
@@ -0,0 +1,397 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+ import sys
4
+
5
+ from numba.cuda._internal.cuda_bf16 import (
6
+ typing_registry,
7
+ target_registry,
8
+ nv_bfloat16 as bfloat16,
9
+ # Arithmetic intrinsics
10
+ __habs as habs,
11
+ __hadd as hadd,
12
+ __hsub as hsub,
13
+ __hmul as hmul,
14
+ __hadd_rn as hadd_rn,
15
+ __hsub_rn as hsub_rn,
16
+ __hmul_rn as hmul_rn,
17
+ __hdiv as hdiv,
18
+ __hadd_sat as hadd_sat,
19
+ __hsub_sat as hsub_sat,
20
+ __hmul_sat as hmul_sat,
21
+ __hfma as hfma,
22
+ __hfma_sat as hfma_sat,
23
+ __hneg as hneg,
24
+ __hfma_relu as hfma_relu,
25
+ # Comparison intrinsics
26
+ __heq as heq,
27
+ __hne as hne,
28
+ __hge as hge,
29
+ __hgt as hgt,
30
+ __hle as hle,
31
+ __hlt as hlt,
32
+ __hmax as hmax,
33
+ __hmin as hmin,
34
+ __hmax_nan as hmax_nan,
35
+ __hmin_nan as hmin_nan,
36
+ __hisinf as hisinf,
37
+ __hisnan as hisnan,
38
+ # Unordered comparison intrinsics
39
+ __hequ as hequ,
40
+ __hneu as hneu,
41
+ __hgeu as hgeu,
42
+ __hgtu as hgtu,
43
+ __hleu as hleu,
44
+ __hltu as hltu,
45
+ # Precision conversion and data movement
46
+ # - floating-point family
47
+ __bfloat162float as bfloat162float,
48
+ __float2bfloat16 as float2bfloat16,
49
+ __double2bfloat16 as double2bfloat16,
50
+ __float2bfloat16_rn as float2bfloat16_rn,
51
+ __float2bfloat16_rz as float2bfloat16_rz,
52
+ __float2bfloat16_rd as float2bfloat16_rd,
53
+ __float2bfloat16_ru as float2bfloat16_ru,
54
+ # - char family
55
+ __bfloat162char_rz as bfloat162char_rz,
56
+ __bfloat162uchar_rz as bfloat162uchar_rz,
57
+ # - int family (signed 32-bit)
58
+ __int2bfloat16_rn as int2bfloat16_rn,
59
+ __int2bfloat16_rz as int2bfloat16_rz,
60
+ __int2bfloat16_rd as int2bfloat16_rd,
61
+ __int2bfloat16_ru as int2bfloat16_ru,
62
+ __bfloat162int_rn as bfloat162int_rn,
63
+ __bfloat162int_rz as bfloat162int_rz,
64
+ __bfloat162int_rd as bfloat162int_rd,
65
+ __bfloat162int_ru as bfloat162int_ru,
66
+ # - short family (signed 16-bit)
67
+ __short2bfloat16_rn as short2bfloat16_rn,
68
+ __short2bfloat16_rz as short2bfloat16_rz,
69
+ __short2bfloat16_rd as short2bfloat16_rd,
70
+ __short2bfloat16_ru as short2bfloat16_ru,
71
+ __bfloat162short_rn as bfloat162short_rn,
72
+ __bfloat162short_rz as bfloat162short_rz,
73
+ __bfloat162short_rd as bfloat162short_rd,
74
+ __bfloat162short_ru as bfloat162short_ru,
75
+ # - ushort family (unsigned 16-bit)
76
+ __ushort2bfloat16_rn as ushort2bfloat16_rn,
77
+ __ushort2bfloat16_rz as ushort2bfloat16_rz,
78
+ __ushort2bfloat16_rd as ushort2bfloat16_rd,
79
+ __ushort2bfloat16_ru as ushort2bfloat16_ru,
80
+ __bfloat162ushort_rn as bfloat162ushort_rn,
81
+ __bfloat162ushort_rz as bfloat162ushort_rz,
82
+ __bfloat162ushort_rd as bfloat162ushort_rd,
83
+ __bfloat162ushort_ru as bfloat162ushort_ru,
84
+ # - uint family (unsigned 32-bit)
85
+ __uint2bfloat16_rn as uint2bfloat16_rn,
86
+ __uint2bfloat16_rz as uint2bfloat16_rz,
87
+ __uint2bfloat16_rd as uint2bfloat16_rd,
88
+ __uint2bfloat16_ru as uint2bfloat16_ru,
89
+ __bfloat162uint_rn as bfloat162uint_rn,
90
+ __bfloat162uint_rz as bfloat162uint_rz,
91
+ __bfloat162uint_rd as bfloat162uint_rd,
92
+ __bfloat162uint_ru as bfloat162uint_ru,
93
+ # - ll family (signed 64-bit)
94
+ __ll2bfloat16_rn as ll2bfloat16_rn,
95
+ __ll2bfloat16_rz as ll2bfloat16_rz,
96
+ __ll2bfloat16_rd as ll2bfloat16_rd,
97
+ __ll2bfloat16_ru as ll2bfloat16_ru,
98
+ __bfloat162ll_rn as bfloat162ll_rn,
99
+ __bfloat162ll_rz as bfloat162ll_rz,
100
+ __bfloat162ll_rd as bfloat162ll_rd,
101
+ __bfloat162ll_ru as bfloat162ll_ru,
102
+ # - ull family (unsigned 64-bit)
103
+ __ull2bfloat16_rn as ull2bfloat16_rn,
104
+ __ull2bfloat16_rz as ull2bfloat16_rz,
105
+ __ull2bfloat16_rd as ull2bfloat16_rd,
106
+ __ull2bfloat16_ru as ull2bfloat16_ru,
107
+ __bfloat162ull_rn as bfloat162ull_rn,
108
+ __bfloat162ull_rz as bfloat162ull_rz,
109
+ __bfloat162ull_rd as bfloat162ull_rd,
110
+ __bfloat162ull_ru as bfloat162ull_ru,
111
+ # - bit reinterpret casts
112
+ __bfloat16_as_short as bfloat16_as_short,
113
+ __bfloat16_as_ushort as bfloat16_as_ushort,
114
+ __short_as_bfloat16 as short_as_bfloat16,
115
+ __ushort_as_bfloat16 as ushort_as_bfloat16,
116
+ htrunc,
117
+ hceil,
118
+ hfloor,
119
+ hrint,
120
+ hsqrt,
121
+ hrsqrt,
122
+ hrcp,
123
+ hlog,
124
+ hlog2,
125
+ hlog10,
126
+ hcos,
127
+ hsin,
128
+ hexp,
129
+ hexp2,
130
+ hexp10,
131
+ htanh,
132
+ htanh_approx,
133
+ )
134
+ from numba.cuda.extending import overload
135
+
136
+ import math
137
+
138
+
139
+ def _make_unary(a, func):
140
+ if a == bfloat16:
141
+ return lambda a: func(a)
142
+
143
+
144
+ # Bind low++ bindings to math APIs
145
+ @overload(math.trunc, target="cuda")
146
+ def trunc_ol(a):
147
+ return _make_unary(a, htrunc)
148
+
149
+
150
+ @overload(math.ceil, target="cuda")
151
+ def ceil_ol(a):
152
+ return _make_unary(a, hceil)
153
+
154
+
155
+ @overload(math.floor, target="cuda")
156
+ def floor_ol(a):
157
+ return _make_unary(a, hfloor)
158
+
159
+
160
+ @overload(math.sqrt, target="cuda")
161
+ def sqrt_ol(a):
162
+ return _make_unary(a, hsqrt)
163
+
164
+
165
+ @overload(math.log, target="cuda")
166
+ def log_ol(a):
167
+ return _make_unary(a, hlog)
168
+
169
+
170
+ @overload(math.log10, target="cuda")
171
+ def log10_ol(a):
172
+ return _make_unary(a, hlog10)
173
+
174
+
175
+ @overload(math.cos, target="cuda")
176
+ def cos_ol(a):
177
+ return _make_unary(a, hcos)
178
+
179
+
180
+ @overload(math.sin, target="cuda")
181
+ def sin_ol(a):
182
+ return _make_unary(a, hsin)
183
+
184
+
185
+ @overload(math.tanh, target="cuda")
186
+ def tanh_ol(a):
187
+ return _make_unary(a, htanh)
188
+
189
+
190
+ @overload(math.exp, target="cuda")
191
+ def exp_ol(a):
192
+ return _make_unary(a, hexp)
193
+
194
+
195
+ if sys.version_info >= (3, 11):
196
+
197
+ @overload(math.exp2, target="cuda")
198
+ def exp2_ol(a):
199
+ return _make_unary(a, hexp2)
200
+
201
+
202
+ ## Public aliases using Numba/Numpy-style type names
203
+ # Floating-point
204
+ float32_to_bfloat16 = float2bfloat16
205
+ float64_to_bfloat16 = double2bfloat16
206
+ bfloat16_to_float32 = bfloat162float
207
+ float32_to_bfloat16_rn = float2bfloat16_rn
208
+ float32_to_bfloat16_rz = float2bfloat16_rz
209
+ float32_to_bfloat16_rd = float2bfloat16_rd
210
+ float32_to_bfloat16_ru = float2bfloat16_ru
211
+
212
+ # Char (8-bit)
213
+ bfloat16_to_int8_rz = bfloat162char_rz
214
+ bfloat16_to_uint8_rz = bfloat162uchar_rz
215
+
216
+ # Int16 / UInt16
217
+ int16_to_bfloat16_rn = short2bfloat16_rn
218
+ int16_to_bfloat16_rz = short2bfloat16_rz
219
+ int16_to_bfloat16_rd = short2bfloat16_rd
220
+ int16_to_bfloat16_ru = short2bfloat16_ru
221
+ bfloat16_to_int16_rn = bfloat162short_rn
222
+ bfloat16_to_int16_rz = bfloat162short_rz
223
+ bfloat16_to_int16_rd = bfloat162short_rd
224
+ bfloat16_to_int16_ru = bfloat162short_ru
225
+
226
+ uint16_to_bfloat16_rn = ushort2bfloat16_rn
227
+ uint16_to_bfloat16_rz = ushort2bfloat16_rz
228
+ uint16_to_bfloat16_rd = ushort2bfloat16_rd
229
+ uint16_to_bfloat16_ru = ushort2bfloat16_ru
230
+ bfloat16_to_uint16_rn = bfloat162ushort_rn
231
+ bfloat16_to_uint16_rz = bfloat162ushort_rz
232
+ bfloat16_to_uint16_rd = bfloat162ushort_rd
233
+ bfloat16_to_uint16_ru = bfloat162ushort_ru
234
+
235
+ # Int32 / UInt32
236
+ int32_to_bfloat16_rn = int2bfloat16_rn
237
+ int32_to_bfloat16_rz = int2bfloat16_rz
238
+ int32_to_bfloat16_rd = int2bfloat16_rd
239
+ int32_to_bfloat16_ru = int2bfloat16_ru
240
+ bfloat16_to_int32_rn = bfloat162int_rn
241
+ bfloat16_to_int32_rz = bfloat162int_rz
242
+ bfloat16_to_int32_rd = bfloat162int_rd
243
+ bfloat16_to_int32_ru = bfloat162int_ru
244
+
245
+ uint32_to_bfloat16_rn = uint2bfloat16_rn
246
+ uint32_to_bfloat16_rz = uint2bfloat16_rz
247
+ uint32_to_bfloat16_rd = uint2bfloat16_rd
248
+ uint32_to_bfloat16_ru = uint2bfloat16_ru
249
+ bfloat16_to_uint32_rn = bfloat162uint_rn
250
+ bfloat16_to_uint32_rz = bfloat162uint_rz
251
+ bfloat16_to_uint32_rd = bfloat162uint_rd
252
+ bfloat16_to_uint32_ru = bfloat162uint_ru
253
+
254
+ # Int64 / UInt64
255
+ int64_to_bfloat16_rn = ll2bfloat16_rn
256
+ int64_to_bfloat16_rz = ll2bfloat16_rz
257
+ int64_to_bfloat16_rd = ll2bfloat16_rd
258
+ int64_to_bfloat16_ru = ll2bfloat16_ru
259
+ bfloat16_to_int64_rn = bfloat162ll_rn
260
+ bfloat16_to_int64_rz = bfloat162ll_rz
261
+ bfloat16_to_int64_rd = bfloat162ll_rd
262
+ bfloat16_to_int64_ru = bfloat162ll_ru
263
+
264
+ uint64_to_bfloat16_rn = ull2bfloat16_rn
265
+ uint64_to_bfloat16_rz = ull2bfloat16_rz
266
+ uint64_to_bfloat16_rd = ull2bfloat16_rd
267
+ uint64_to_bfloat16_ru = ull2bfloat16_ru
268
+ bfloat16_to_uint64_rn = bfloat162ull_rn
269
+ bfloat16_to_uint64_rz = bfloat162ull_rz
270
+ bfloat16_to_uint64_rd = bfloat162ull_rd
271
+ bfloat16_to_uint64_ru = bfloat162ull_ru
272
+
273
+ # Bit reinterpret casts
274
+ bfloat16_as_int16 = bfloat16_as_short
275
+ bfloat16_as_uint16 = bfloat16_as_ushort
276
+ int16_as_bfloat16 = short_as_bfloat16
277
+ uint16_as_bfloat16 = ushort_as_bfloat16
278
+
279
+ __all__ = [
280
+ "typing_registry",
281
+ "target_registry",
282
+ "bfloat16",
283
+ # Arithmetic intrinsics
284
+ "habs",
285
+ "hadd",
286
+ "hsub",
287
+ "hmul",
288
+ "hadd_rn",
289
+ "hsub_rn",
290
+ "hmul_rn",
291
+ "hdiv",
292
+ "hadd_sat",
293
+ "hsub_sat",
294
+ "hmul_sat",
295
+ "hfma",
296
+ "hfma_sat",
297
+ "hneg",
298
+ "hfma_relu",
299
+ # Comparison intrinsics
300
+ "heq",
301
+ "hne",
302
+ "hge",
303
+ "hgt",
304
+ "hle",
305
+ "hlt",
306
+ "hmax",
307
+ "hmin",
308
+ "hmax_nan",
309
+ "hmin_nan",
310
+ "hisinf",
311
+ "hisnan",
312
+ "hequ",
313
+ "hneu",
314
+ "hgeu",
315
+ "hgtu",
316
+ "hleu",
317
+ "hltu",
318
+ # Precision conversion and data movement
319
+ "float32_to_bfloat16",
320
+ "float64_to_bfloat16",
321
+ "bfloat16_to_float32",
322
+ "float32_to_bfloat16_rn",
323
+ "float32_to_bfloat16_rz",
324
+ "float32_to_bfloat16_rd",
325
+ "float32_to_bfloat16_ru",
326
+ "bfloat16_to_int8_rz",
327
+ "bfloat16_to_uint8_rz",
328
+ "int16_to_bfloat16_rn",
329
+ "int16_to_bfloat16_rz",
330
+ "int16_to_bfloat16_rd",
331
+ "int16_to_bfloat16_ru",
332
+ "bfloat16_to_int16_rn",
333
+ "bfloat16_to_int16_rz",
334
+ "bfloat16_to_int16_rd",
335
+ "bfloat16_to_int16_ru",
336
+ "uint16_to_bfloat16_rn",
337
+ "uint16_to_bfloat16_rz",
338
+ "uint16_to_bfloat16_rd",
339
+ "uint16_to_bfloat16_ru",
340
+ "bfloat16_to_uint16_rn",
341
+ "bfloat16_to_uint16_rz",
342
+ "bfloat16_to_uint16_rd",
343
+ "bfloat16_to_uint16_ru",
344
+ "int32_to_bfloat16_rn",
345
+ "int32_to_bfloat16_rz",
346
+ "int32_to_bfloat16_rd",
347
+ "int32_to_bfloat16_ru",
348
+ "bfloat16_to_int32_rn",
349
+ "bfloat16_to_int32_rz",
350
+ "bfloat16_to_int32_rd",
351
+ "bfloat16_to_int32_ru",
352
+ "uint32_to_bfloat16_rn",
353
+ "uint32_to_bfloat16_rz",
354
+ "uint32_to_bfloat16_rd",
355
+ "uint32_to_bfloat16_ru",
356
+ "bfloat16_to_uint32_rn",
357
+ "bfloat16_to_uint32_rz",
358
+ "bfloat16_to_uint32_rd",
359
+ "bfloat16_to_uint32_ru",
360
+ "int64_to_bfloat16_rn",
361
+ "int64_to_bfloat16_rz",
362
+ "int64_to_bfloat16_rd",
363
+ "int64_to_bfloat16_ru",
364
+ "bfloat16_to_int64_rn",
365
+ "bfloat16_to_int64_rz",
366
+ "bfloat16_to_int64_rd",
367
+ "bfloat16_to_int64_ru",
368
+ "uint64_to_bfloat16_rn",
369
+ "uint64_to_bfloat16_rz",
370
+ "uint64_to_bfloat16_rd",
371
+ "uint64_to_bfloat16_ru",
372
+ "bfloat16_to_uint64_rn",
373
+ "bfloat16_to_uint64_rz",
374
+ "bfloat16_to_uint64_rd",
375
+ "bfloat16_to_uint64_ru",
376
+ "bfloat16_as_int16",
377
+ "bfloat16_as_uint16",
378
+ "int16_as_bfloat16",
379
+ "uint16_as_bfloat16",
380
+ "htrunc",
381
+ "hceil",
382
+ "hfloor",
383
+ "hrint",
384
+ "hsqrt",
385
+ "hrsqrt",
386
+ "hrcp",
387
+ "hlog",
388
+ "hlog2",
389
+ "hlog10",
390
+ "hcos",
391
+ "hsin",
392
+ "htanh",
393
+ "htanh_approx",
394
+ "hexp",
395
+ "hexp2",
396
+ "hexp10",
397
+ ]