numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,543 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.core import errors
5
+ from numba.cuda import types
6
+ from numba.cuda.typing.npydecl import (
7
+ parse_dtype,
8
+ parse_shape,
9
+ )
10
+ from numba.cuda.typing.templates import (
11
+ AttributeTemplate,
12
+ ConcreteTemplate,
13
+ AbstractTemplate,
14
+ CallableTemplate,
15
+ signature,
16
+ Registry,
17
+ )
18
+ from numba.cuda.types.ext_types import dim3
19
+ from numba import cuda
20
+
21
+ registry = Registry()
22
+ register = registry.register
23
+ register_attr = registry.register_attr
24
+ register_global = registry.register_global
25
+
26
+
27
+ class Cuda_array_decl(CallableTemplate):
28
+ def generic(self):
29
+ def typer(shape, dtype, alignment=None):
30
+ # Only integer literals and tuples of integer literals are valid
31
+ # shapes
32
+ if isinstance(shape, types.Integer):
33
+ if not isinstance(shape, types.IntegerLiteral):
34
+ return None
35
+ elif isinstance(shape, (types.Tuple, types.UniTuple)):
36
+ if any(
37
+ [not isinstance(s, types.IntegerLiteral) for s in shape]
38
+ ):
39
+ return None
40
+ else:
41
+ return None
42
+
43
+ if alignment is not None:
44
+ permitted = (types.IntegerLiteral, types.NoneType)
45
+ if not isinstance(alignment, permitted):
46
+ msg = "alignment must be a constant integer"
47
+ raise errors.RequireLiteralValue(msg)
48
+
49
+ # N.B. We don't use alignment for typing; it's not part of
50
+ # types.Array. The value supplied to the array declaration
51
+ # is handled in the lowering.
52
+
53
+ ndim = parse_shape(shape)
54
+ nb_dtype = parse_dtype(dtype)
55
+ if nb_dtype is not None and ndim is not None:
56
+ return types.Array(dtype=nb_dtype, ndim=ndim, layout="C")
57
+
58
+ return typer
59
+
60
+
61
+ @register
62
+ class Cuda_shared_array(Cuda_array_decl):
63
+ key = cuda.shared.array
64
+
65
+
66
+ @register
67
+ class Cuda_local_array(Cuda_array_decl):
68
+ key = cuda.local.array
69
+
70
+
71
+ @register
72
+ class Cuda_const_array_like(CallableTemplate):
73
+ key = cuda.const.array_like
74
+
75
+ def generic(self):
76
+ def typer(ndarray):
77
+ return ndarray
78
+
79
+ return typer
80
+
81
+
82
+ @register
83
+ class Cuda_threadfence_device(ConcreteTemplate):
84
+ key = cuda.threadfence
85
+ cases = [signature(types.none)]
86
+
87
+
88
+ @register
89
+ class Cuda_threadfence_block(ConcreteTemplate):
90
+ key = cuda.threadfence_block
91
+ cases = [signature(types.none)]
92
+
93
+
94
+ @register
95
+ class Cuda_threadfence_system(ConcreteTemplate):
96
+ key = cuda.threadfence_system
97
+ cases = [signature(types.none)]
98
+
99
+
100
+ @register
101
+ class Cuda_syncwarp(ConcreteTemplate):
102
+ key = cuda.syncwarp
103
+ cases = [signature(types.none), signature(types.none, types.i4)]
104
+
105
+
106
+ @register
107
+ class Cuda_match_any_sync(ConcreteTemplate):
108
+ key = cuda.match_any_sync
109
+ cases = [
110
+ signature(types.i4, types.i4, types.i4),
111
+ signature(types.i4, types.i4, types.i8),
112
+ signature(types.i4, types.i4, types.f4),
113
+ signature(types.i4, types.i4, types.f8),
114
+ ]
115
+
116
+
117
+ @register
118
+ class Cuda_match_all_sync(ConcreteTemplate):
119
+ key = cuda.match_all_sync
120
+ cases = [
121
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.i4),
122
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.i8),
123
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.f4),
124
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.f8),
125
+ ]
126
+
127
+
128
+ @register
129
+ class Cuda_activemask(ConcreteTemplate):
130
+ key = cuda.activemask
131
+ cases = [signature(types.uint32)]
132
+
133
+
134
+ @register
135
+ class Cuda_lanemask_lt(ConcreteTemplate):
136
+ key = cuda.lanemask_lt
137
+ cases = [signature(types.uint32)]
138
+
139
+
140
+ @register
141
+ class Cuda_popc(ConcreteTemplate):
142
+ """
143
+ Supported types from `llvm.popc`
144
+ [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
145
+ """
146
+
147
+ key = cuda.popc
148
+ cases = [
149
+ signature(types.int8, types.int8),
150
+ signature(types.int16, types.int16),
151
+ signature(types.int32, types.int32),
152
+ signature(types.int64, types.int64),
153
+ signature(types.uint8, types.uint8),
154
+ signature(types.uint16, types.uint16),
155
+ signature(types.uint32, types.uint32),
156
+ signature(types.uint64, types.uint64),
157
+ ]
158
+
159
+
160
+ @register
161
+ class Cuda_fma(ConcreteTemplate):
162
+ """
163
+ Supported types from `llvm.fma`
164
+ [here](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#standard-c-library-intrinics)
165
+ """
166
+
167
+ key = cuda.fma
168
+ cases = [
169
+ signature(types.float32, types.float32, types.float32, types.float32),
170
+ signature(types.float64, types.float64, types.float64, types.float64),
171
+ ]
172
+
173
+
174
+ @register
175
+ class Cuda_cbrt(ConcreteTemplate):
176
+ key = cuda.cbrt
177
+ cases = [
178
+ signature(types.float32, types.float32),
179
+ signature(types.float64, types.float64),
180
+ ]
181
+
182
+
183
+ @register
184
+ class Cuda_brev(ConcreteTemplate):
185
+ key = cuda.brev
186
+ cases = [
187
+ signature(types.uint32, types.uint32),
188
+ signature(types.uint64, types.uint64),
189
+ ]
190
+
191
+
192
+ @register
193
+ class Cuda_clz(ConcreteTemplate):
194
+ """
195
+ Supported types from `llvm.ctlz`
196
+ [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
197
+ """
198
+
199
+ key = cuda.clz
200
+ cases = [
201
+ signature(types.int8, types.int8),
202
+ signature(types.int16, types.int16),
203
+ signature(types.int32, types.int32),
204
+ signature(types.int64, types.int64),
205
+ signature(types.uint8, types.uint8),
206
+ signature(types.uint16, types.uint16),
207
+ signature(types.uint32, types.uint32),
208
+ signature(types.uint64, types.uint64),
209
+ ]
210
+
211
+
212
+ @register
213
+ class Cuda_ffs(ConcreteTemplate):
214
+ """
215
+ Supported types from `llvm.cttz`
216
+ [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
217
+ """
218
+
219
+ key = cuda.ffs
220
+ cases = [
221
+ signature(types.uint32, types.int8),
222
+ signature(types.uint32, types.int16),
223
+ signature(types.uint32, types.int32),
224
+ signature(types.uint32, types.int64),
225
+ signature(types.uint32, types.uint8),
226
+ signature(types.uint32, types.uint16),
227
+ signature(types.uint32, types.uint32),
228
+ signature(types.uint32, types.uint64),
229
+ ]
230
+
231
+
232
+ @register
233
+ class Cuda_selp(AbstractTemplate):
234
+ key = cuda.selp
235
+
236
+ def generic(self, args, kws):
237
+ assert not kws
238
+ test, a, b = args
239
+
240
+ # per docs
241
+ # http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp
242
+ supported_types = (
243
+ types.float64,
244
+ types.float32,
245
+ types.int16,
246
+ types.uint16,
247
+ types.int32,
248
+ types.uint32,
249
+ types.int64,
250
+ types.uint64,
251
+ )
252
+
253
+ if a != b or a not in supported_types:
254
+ return
255
+
256
+ return signature(a, test, a, a)
257
+
258
+
259
+ @register_global(float)
260
+ class Float(AbstractTemplate):
261
+ def generic(self, args, kws):
262
+ assert not kws
263
+
264
+ [arg] = args
265
+
266
+ if arg == types.float16:
267
+ return signature(arg, arg)
268
+
269
+
270
+ # If multiple ConcreteTemplates provide typing for a single function, then
271
+ # function resolution will pick the first compatible typing it finds even if it
272
+ # involves inserting a cast that would be considered undesirable (in this
273
+ # specific case, float16s could be cast to float32s for comparisons).
274
+ #
275
+ # To work around this, we instead use an AbstractTemplate that implements
276
+ # exactly the casting logic that we desire. The AbstractTemplate gets
277
+ # considered in preference to ConcreteTemplates during typing.
278
+ #
279
+ # This is tracked as Issue #7863 (https://github.com/numba/numba/issues/7863) -
280
+ # once this is resolved it should be possible to replace this AbstractTemplate
281
+ # with a ConcreteTemplate to simplify the logic.
282
+
283
+
284
+ # generate atomic operations
285
+ def _gen(l_key, supported_types):
286
+ @register
287
+ class Cuda_atomic(AbstractTemplate):
288
+ key = l_key
289
+
290
+ def generic(self, args, kws):
291
+ assert not kws
292
+ ary, idx, val = args
293
+
294
+ if ary.dtype not in supported_types:
295
+ return
296
+
297
+ if ary.ndim == 1:
298
+ return signature(ary.dtype, ary, types.intp, ary.dtype)
299
+ elif ary.ndim > 1:
300
+ return signature(ary.dtype, ary, idx, ary.dtype)
301
+
302
+ return Cuda_atomic
303
+
304
+
305
+ all_numba_types = (
306
+ types.float64,
307
+ types.float32,
308
+ types.int32,
309
+ types.uint32,
310
+ types.int64,
311
+ types.uint64,
312
+ )
313
+
314
+ integer_numba_types = (types.int32, types.uint32, types.int64, types.uint64)
315
+
316
+ unsigned_int_numba_types = (types.uint32, types.uint64)
317
+
318
+ Cuda_atomic_add = _gen(cuda.atomic.add, all_numba_types)
319
+ Cuda_atomic_sub = _gen(cuda.atomic.sub, all_numba_types)
320
+ Cuda_atomic_max = _gen(cuda.atomic.max, all_numba_types)
321
+ Cuda_atomic_min = _gen(cuda.atomic.min, all_numba_types)
322
+ Cuda_atomic_nanmax = _gen(cuda.atomic.nanmax, all_numba_types)
323
+ Cuda_atomic_nanmin = _gen(cuda.atomic.nanmin, all_numba_types)
324
+ Cuda_atomic_and = _gen(cuda.atomic.and_, integer_numba_types)
325
+ Cuda_atomic_or = _gen(cuda.atomic.or_, integer_numba_types)
326
+ Cuda_atomic_xor = _gen(cuda.atomic.xor, integer_numba_types)
327
+ Cuda_atomic_inc = _gen(cuda.atomic.inc, unsigned_int_numba_types)
328
+ Cuda_atomic_dec = _gen(cuda.atomic.dec, unsigned_int_numba_types)
329
+ Cuda_atomic_exch = _gen(cuda.atomic.exch, integer_numba_types)
330
+
331
+
332
+ @register
333
+ class Cuda_atomic_compare_and_swap(AbstractTemplate):
334
+ key = cuda.atomic.compare_and_swap
335
+
336
+ def generic(self, args, kws):
337
+ assert not kws
338
+ ary, old, val = args
339
+ dty = ary.dtype
340
+
341
+ if dty in integer_numba_types and ary.ndim == 1:
342
+ return signature(dty, ary, dty, dty)
343
+
344
+
345
+ @register
346
+ class Cuda_atomic_cas(AbstractTemplate):
347
+ key = cuda.atomic.cas
348
+
349
+ def generic(self, args, kws):
350
+ assert not kws
351
+ ary, idx, old, val = args
352
+ dty = ary.dtype
353
+
354
+ if dty not in integer_numba_types:
355
+ return
356
+
357
+ if ary.ndim == 1:
358
+ return signature(dty, ary, types.intp, dty, dty)
359
+ elif ary.ndim > 1:
360
+ return signature(dty, ary, idx, dty, dty)
361
+
362
+
363
+ @register_global(breakpoint)
364
+ class Cuda_breakpoint(ConcreteTemplate):
365
+ cases = [signature(types.none)]
366
+
367
+
368
+ @register
369
+ class Cuda_nanosleep(ConcreteTemplate):
370
+ key = cuda.nanosleep
371
+
372
+ cases = [signature(types.void, types.uint32)]
373
+
374
+
375
+ @register_attr
376
+ class Dim3_attrs(AttributeTemplate):
377
+ key = dim3
378
+
379
+ def resolve_x(self, mod):
380
+ return types.int32
381
+
382
+ def resolve_y(self, mod):
383
+ return types.int32
384
+
385
+ def resolve_z(self, mod):
386
+ return types.int32
387
+
388
+
389
+ @register_attr
390
+ class CudaSharedModuleTemplate(AttributeTemplate):
391
+ key = types.Module(cuda.shared)
392
+
393
+ def resolve_array(self, mod):
394
+ return types.Function(Cuda_shared_array)
395
+
396
+
397
+ @register_attr
398
+ class CudaConstModuleTemplate(AttributeTemplate):
399
+ key = types.Module(cuda.const)
400
+
401
+ def resolve_array_like(self, mod):
402
+ return types.Function(Cuda_const_array_like)
403
+
404
+
405
+ @register_attr
406
+ class CudaLocalModuleTemplate(AttributeTemplate):
407
+ key = types.Module(cuda.local)
408
+
409
+ def resolve_array(self, mod):
410
+ return types.Function(Cuda_local_array)
411
+
412
+
413
+ @register_attr
414
+ class CudaAtomicTemplate(AttributeTemplate):
415
+ key = types.Module(cuda.atomic)
416
+
417
+ def resolve_add(self, mod):
418
+ return types.Function(Cuda_atomic_add)
419
+
420
+ def resolve_sub(self, mod):
421
+ return types.Function(Cuda_atomic_sub)
422
+
423
+ def resolve_and_(self, mod):
424
+ return types.Function(Cuda_atomic_and)
425
+
426
+ def resolve_or_(self, mod):
427
+ return types.Function(Cuda_atomic_or)
428
+
429
+ def resolve_xor(self, mod):
430
+ return types.Function(Cuda_atomic_xor)
431
+
432
+ def resolve_inc(self, mod):
433
+ return types.Function(Cuda_atomic_inc)
434
+
435
+ def resolve_dec(self, mod):
436
+ return types.Function(Cuda_atomic_dec)
437
+
438
+ def resolve_exch(self, mod):
439
+ return types.Function(Cuda_atomic_exch)
440
+
441
+ def resolve_max(self, mod):
442
+ return types.Function(Cuda_atomic_max)
443
+
444
+ def resolve_min(self, mod):
445
+ return types.Function(Cuda_atomic_min)
446
+
447
+ def resolve_nanmin(self, mod):
448
+ return types.Function(Cuda_atomic_nanmin)
449
+
450
+ def resolve_nanmax(self, mod):
451
+ return types.Function(Cuda_atomic_nanmax)
452
+
453
+ def resolve_compare_and_swap(self, mod):
454
+ return types.Function(Cuda_atomic_compare_and_swap)
455
+
456
+ def resolve_cas(self, mod):
457
+ return types.Function(Cuda_atomic_cas)
458
+
459
+
460
+ @register_attr
461
+ class CudaModuleTemplate(AttributeTemplate):
462
+ key = types.Module(cuda)
463
+
464
+ def resolve_cg(self, mod):
465
+ return types.Module(cuda.cg)
466
+
467
+ def resolve_threadIdx(self, mod):
468
+ return dim3
469
+
470
+ def resolve_blockIdx(self, mod):
471
+ return dim3
472
+
473
+ def resolve_blockDim(self, mod):
474
+ return dim3
475
+
476
+ def resolve_gridDim(self, mod):
477
+ return dim3
478
+
479
+ def resolve_laneid(self, mod):
480
+ return types.int32
481
+
482
+ def resolve_shared(self, mod):
483
+ return types.Module(cuda.shared)
484
+
485
+ def resolve_popc(self, mod):
486
+ return types.Function(Cuda_popc)
487
+
488
+ def resolve_brev(self, mod):
489
+ return types.Function(Cuda_brev)
490
+
491
+ def resolve_clz(self, mod):
492
+ return types.Function(Cuda_clz)
493
+
494
+ def resolve_ffs(self, mod):
495
+ return types.Function(Cuda_ffs)
496
+
497
+ def resolve_fma(self, mod):
498
+ return types.Function(Cuda_fma)
499
+
500
+ def resolve_cbrt(self, mod):
501
+ return types.Function(Cuda_cbrt)
502
+
503
+ def resolve_threadfence(self, mod):
504
+ return types.Function(Cuda_threadfence_device)
505
+
506
+ def resolve_threadfence_block(self, mod):
507
+ return types.Function(Cuda_threadfence_block)
508
+
509
+ def resolve_threadfence_system(self, mod):
510
+ return types.Function(Cuda_threadfence_system)
511
+
512
+ def resolve_syncwarp(self, mod):
513
+ return types.Function(Cuda_syncwarp)
514
+
515
+ def resolve_match_any_sync(self, mod):
516
+ return types.Function(Cuda_match_any_sync)
517
+
518
+ def resolve_match_all_sync(self, mod):
519
+ return types.Function(Cuda_match_all_sync)
520
+
521
+ def resolve_activemask(self, mod):
522
+ return types.Function(Cuda_activemask)
523
+
524
+ def resolve_lanemask_lt(self, mod):
525
+ return types.Function(Cuda_lanemask_lt)
526
+
527
+ def resolve_selp(self, mod):
528
+ return types.Function(Cuda_selp)
529
+
530
+ def resolve_nanosleep(self, mod):
531
+ return types.Function(Cuda_nanosleep)
532
+
533
+ def resolve_atomic(self, mod):
534
+ return types.Module(cuda.atomic)
535
+
536
+ def resolve_const(self, mod):
537
+ return types.Module(cuda.const)
538
+
539
+ def resolve_local(self, mod):
540
+ return types.Module(cuda.local)
541
+
542
+
543
+ register_global(cuda, types.Module(cuda))
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """CUDA Driver
5
+
6
+ - Driver API binding
7
+ - NVVM API binding
8
+ - Device array implementation
9
+
10
+ """
11
+
12
+ from numba.cuda.core import config
13
+
14
+ assert not config.ENABLE_CUDASIM, "Cannot use real driver API with simulator"