numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,556 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba.cuda.core import errors
5
+ from numba.cuda import types
6
+ from numba.cuda.typing.npydecl import (
7
+ parse_dtype,
8
+ parse_shape,
9
+ )
10
+ from numba.cuda.typing.templates import (
11
+ AttributeTemplate,
12
+ ConcreteTemplate,
13
+ AbstractTemplate,
14
+ CallableTemplate,
15
+ signature,
16
+ Registry,
17
+ )
18
+ from numba.cuda.types.ext_types import dim3
19
+ from numba import cuda
20
+
21
+ registry = Registry()
22
+ register = registry.register
23
+ register_attr = registry.register_attr
24
+ register_global = registry.register_global
25
+
26
+
27
+ class Cuda_array_decl(CallableTemplate):
28
+ def generic(self):
29
+ def typer(shape, dtype, alignment=None):
30
+ # Only integer literals and tuples of integer literals are valid
31
+ # shapes
32
+ if isinstance(shape, types.Integer):
33
+ if not isinstance(shape, types.IntegerLiteral):
34
+ return None
35
+ elif isinstance(shape, (types.Tuple, types.UniTuple)):
36
+ if any(
37
+ [not isinstance(s, types.IntegerLiteral) for s in shape]
38
+ ):
39
+ return None
40
+ else:
41
+ return None
42
+
43
+ if alignment is not None:
44
+ permitted = (types.IntegerLiteral, types.NoneType)
45
+ if not isinstance(alignment, permitted):
46
+ msg = "alignment must be a constant integer"
47
+ raise errors.RequireLiteralValue(msg)
48
+
49
+ # N.B. We don't use alignment for typing; it's not part of
50
+ # types.Array. The value supplied to the array declaration
51
+ # is handled in the lowering.
52
+
53
+ ndim = parse_shape(shape)
54
+ nb_dtype = parse_dtype(dtype)
55
+ if nb_dtype is not None and ndim is not None:
56
+ return types.Array(dtype=nb_dtype, ndim=ndim, layout="C")
57
+
58
+ return typer
59
+
60
+
61
+ @register
62
+ class Cuda_shared_array(Cuda_array_decl):
63
+ key = cuda.shared.array
64
+
65
+
66
+ @register
67
+ class Cuda_local_array(Cuda_array_decl):
68
+ key = cuda.local.array
69
+
70
+
71
+ @register
72
+ class Cuda_const_array_like(CallableTemplate):
73
+ key = cuda.const.array_like
74
+
75
+ def generic(self):
76
+ def typer(ndarray):
77
+ return ndarray
78
+
79
+ return typer
80
+
81
+
82
+ @register
83
+ class Cuda_threadfence_device(ConcreteTemplate):
84
+ key = cuda.threadfence
85
+ cases = [signature(types.none)]
86
+
87
+
88
+ @register
89
+ class Cuda_threadfence_block(ConcreteTemplate):
90
+ key = cuda.threadfence_block
91
+ cases = [signature(types.none)]
92
+
93
+
94
+ @register
95
+ class Cuda_threadfence_system(ConcreteTemplate):
96
+ key = cuda.threadfence_system
97
+ cases = [signature(types.none)]
98
+
99
+
100
+ @register
101
+ class Cuda_syncwarp(ConcreteTemplate):
102
+ key = cuda.syncwarp
103
+ cases = [signature(types.none), signature(types.none, types.i4)]
104
+
105
+
106
+ @register
107
+ class Cuda_vote_sync_intrinsic(ConcreteTemplate):
108
+ key = cuda.vote_sync_intrinsic
109
+ cases = [
110
+ signature(
111
+ types.Tuple((types.i4, types.b1)), types.i4, types.i4, types.b1
112
+ )
113
+ ]
114
+
115
+
116
+ @register
117
+ class Cuda_match_any_sync(ConcreteTemplate):
118
+ key = cuda.match_any_sync
119
+ cases = [
120
+ signature(types.i4, types.i4, types.i4),
121
+ signature(types.i4, types.i4, types.i8),
122
+ signature(types.i4, types.i4, types.f4),
123
+ signature(types.i4, types.i4, types.f8),
124
+ ]
125
+
126
+
127
+ @register
128
+ class Cuda_match_all_sync(ConcreteTemplate):
129
+ key = cuda.match_all_sync
130
+ cases = [
131
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.i4),
132
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.i8),
133
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.f4),
134
+ signature(types.Tuple((types.i4, types.b1)), types.i4, types.f8),
135
+ ]
136
+
137
+
138
+ @register
139
+ class Cuda_activemask(ConcreteTemplate):
140
+ key = cuda.activemask
141
+ cases = [signature(types.uint32)]
142
+
143
+
144
+ @register
145
+ class Cuda_lanemask_lt(ConcreteTemplate):
146
+ key = cuda.lanemask_lt
147
+ cases = [signature(types.uint32)]
148
+
149
+
150
+ @register
151
+ class Cuda_popc(ConcreteTemplate):
152
+ """
153
+ Supported types from `llvm.popc`
154
+ [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
155
+ """
156
+
157
+ key = cuda.popc
158
+ cases = [
159
+ signature(types.int8, types.int8),
160
+ signature(types.int16, types.int16),
161
+ signature(types.int32, types.int32),
162
+ signature(types.int64, types.int64),
163
+ signature(types.uint8, types.uint8),
164
+ signature(types.uint16, types.uint16),
165
+ signature(types.uint32, types.uint32),
166
+ signature(types.uint64, types.uint64),
167
+ ]
168
+
169
+
170
+ @register
171
+ class Cuda_fma(ConcreteTemplate):
172
+ """
173
+ Supported types from `llvm.fma`
174
+ [here](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#standard-c-library-intrinics)
175
+ """
176
+
177
+ key = cuda.fma
178
+ cases = [
179
+ signature(types.float32, types.float32, types.float32, types.float32),
180
+ signature(types.float64, types.float64, types.float64, types.float64),
181
+ ]
182
+
183
+
184
+ @register
185
+ class Cuda_cbrt(ConcreteTemplate):
186
+ key = cuda.cbrt
187
+ cases = [
188
+ signature(types.float32, types.float32),
189
+ signature(types.float64, types.float64),
190
+ ]
191
+
192
+
193
+ @register
194
+ class Cuda_brev(ConcreteTemplate):
195
+ key = cuda.brev
196
+ cases = [
197
+ signature(types.uint32, types.uint32),
198
+ signature(types.uint64, types.uint64),
199
+ ]
200
+
201
+
202
+ @register
203
+ class Cuda_clz(ConcreteTemplate):
204
+ """
205
+ Supported types from `llvm.ctlz`
206
+ [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
207
+ """
208
+
209
+ key = cuda.clz
210
+ cases = [
211
+ signature(types.int8, types.int8),
212
+ signature(types.int16, types.int16),
213
+ signature(types.int32, types.int32),
214
+ signature(types.int64, types.int64),
215
+ signature(types.uint8, types.uint8),
216
+ signature(types.uint16, types.uint16),
217
+ signature(types.uint32, types.uint32),
218
+ signature(types.uint64, types.uint64),
219
+ ]
220
+
221
+
222
+ @register
223
+ class Cuda_ffs(ConcreteTemplate):
224
+ """
225
+ Supported types from `llvm.cttz`
226
+ [here](http://docs.nvidia.com/cuda/nvvm-ir-spec/index.html#bit-manipulations-intrinics)
227
+ """
228
+
229
+ key = cuda.ffs
230
+ cases = [
231
+ signature(types.uint32, types.int8),
232
+ signature(types.uint32, types.int16),
233
+ signature(types.uint32, types.int32),
234
+ signature(types.uint32, types.int64),
235
+ signature(types.uint32, types.uint8),
236
+ signature(types.uint32, types.uint16),
237
+ signature(types.uint32, types.uint32),
238
+ signature(types.uint32, types.uint64),
239
+ ]
240
+
241
+
242
+ @register
243
+ class Cuda_selp(AbstractTemplate):
244
+ key = cuda.selp
245
+
246
+ def generic(self, args, kws):
247
+ assert not kws
248
+ test, a, b = args
249
+
250
+ # per docs
251
+ # http://docs.nvidia.com/cuda/parallel-thread-execution/index.html#comparison-and-selection-instructions-selp
252
+ supported_types = (
253
+ types.float64,
254
+ types.float32,
255
+ types.int16,
256
+ types.uint16,
257
+ types.int32,
258
+ types.uint32,
259
+ types.int64,
260
+ types.uint64,
261
+ )
262
+
263
+ if a != b or a not in supported_types:
264
+ return
265
+
266
+ return signature(a, test, a, a)
267
+
268
+
269
+ @register_global(float)
270
+ class Float(AbstractTemplate):
271
+ def generic(self, args, kws):
272
+ assert not kws
273
+
274
+ [arg] = args
275
+
276
+ if arg == types.float16:
277
+ return signature(arg, arg)
278
+
279
+
280
+ # If multiple ConcreteTemplates provide typing for a single function, then
281
+ # function resolution will pick the first compatible typing it finds even if it
282
+ # involves inserting a cast that would be considered undesirable (in this
283
+ # specific case, float16s could be cast to float32s for comparisons).
284
+ #
285
+ # To work around this, we instead use an AbstractTemplate that implements
286
+ # exactly the casting logic that we desire. The AbstractTemplate gets
287
+ # considered in preference to ConcreteTemplates during typing.
288
+ #
289
+ # This is tracked as Issue #7863 (https://github.com/numba/numba/issues/7863) -
290
+ # once this is resolved it should be possible to replace this AbstractTemplate
291
+ # with a ConcreteTemplate to simplify the logic.
292
+
293
+
294
+ # generate atomic operations
295
+ def _gen(l_key, supported_types):
296
+ @register
297
+ class Cuda_atomic(AbstractTemplate):
298
+ key = l_key
299
+
300
+ def generic(self, args, kws):
301
+ assert not kws
302
+ ary, idx, val = args
303
+
304
+ if ary.dtype not in supported_types:
305
+ return
306
+
307
+ if ary.ndim == 1:
308
+ return signature(ary.dtype, ary, types.intp, ary.dtype)
309
+ elif ary.ndim > 1:
310
+ return signature(ary.dtype, ary, idx, ary.dtype)
311
+
312
+ return Cuda_atomic
313
+
314
+
315
+ all_numba_types = (
316
+ types.float64,
317
+ types.float32,
318
+ types.int32,
319
+ types.uint32,
320
+ types.int64,
321
+ types.uint64,
322
+ )
323
+
324
+ integer_numba_types = (types.int32, types.uint32, types.int64, types.uint64)
325
+
326
+ unsigned_int_numba_types = (types.uint32, types.uint64)
327
+
328
+ Cuda_atomic_add = _gen(cuda.atomic.add, all_numba_types)
329
+ Cuda_atomic_sub = _gen(cuda.atomic.sub, all_numba_types)
330
+ Cuda_atomic_max = _gen(cuda.atomic.max, all_numba_types)
331
+ Cuda_atomic_min = _gen(cuda.atomic.min, all_numba_types)
332
+ Cuda_atomic_nanmax = _gen(cuda.atomic.nanmax, all_numba_types)
333
+ Cuda_atomic_nanmin = _gen(cuda.atomic.nanmin, all_numba_types)
334
+ Cuda_atomic_and = _gen(cuda.atomic.and_, integer_numba_types)
335
+ Cuda_atomic_or = _gen(cuda.atomic.or_, integer_numba_types)
336
+ Cuda_atomic_xor = _gen(cuda.atomic.xor, integer_numba_types)
337
+ Cuda_atomic_inc = _gen(cuda.atomic.inc, unsigned_int_numba_types)
338
+ Cuda_atomic_dec = _gen(cuda.atomic.dec, unsigned_int_numba_types)
339
+ Cuda_atomic_exch = _gen(cuda.atomic.exch, integer_numba_types)
340
+
341
+
342
+ @register
343
+ class Cuda_atomic_compare_and_swap(AbstractTemplate):
344
+ key = cuda.atomic.compare_and_swap
345
+
346
+ def generic(self, args, kws):
347
+ assert not kws
348
+ ary, old, val = args
349
+ dty = ary.dtype
350
+
351
+ if dty in integer_numba_types and ary.ndim == 1:
352
+ return signature(dty, ary, dty, dty)
353
+
354
+
355
+ @register
356
+ class Cuda_atomic_cas(AbstractTemplate):
357
+ key = cuda.atomic.cas
358
+
359
+ def generic(self, args, kws):
360
+ assert not kws
361
+ ary, idx, old, val = args
362
+ dty = ary.dtype
363
+
364
+ if dty not in integer_numba_types:
365
+ return
366
+
367
+ if ary.ndim == 1:
368
+ return signature(dty, ary, types.intp, dty, dty)
369
+ elif ary.ndim > 1:
370
+ return signature(dty, ary, idx, dty, dty)
371
+
372
+
373
+ @register_global(breakpoint)
374
+ class Cuda_breakpoint(ConcreteTemplate):
375
+ cases = [signature(types.none)]
376
+
377
+
378
+ @register
379
+ class Cuda_nanosleep(ConcreteTemplate):
380
+ key = cuda.nanosleep
381
+
382
+ cases = [signature(types.void, types.uint32)]
383
+
384
+
385
+ @register_attr
386
+ class Dim3_attrs(AttributeTemplate):
387
+ key = dim3
388
+
389
+ def resolve_x(self, mod):
390
+ return types.int32
391
+
392
+ def resolve_y(self, mod):
393
+ return types.int32
394
+
395
+ def resolve_z(self, mod):
396
+ return types.int32
397
+
398
+
399
+ @register_attr
400
+ class CudaSharedModuleTemplate(AttributeTemplate):
401
+ key = types.Module(cuda.shared)
402
+
403
+ def resolve_array(self, mod):
404
+ return types.Function(Cuda_shared_array)
405
+
406
+
407
+ @register_attr
408
+ class CudaConstModuleTemplate(AttributeTemplate):
409
+ key = types.Module(cuda.const)
410
+
411
+ def resolve_array_like(self, mod):
412
+ return types.Function(Cuda_const_array_like)
413
+
414
+
415
+ @register_attr
416
+ class CudaLocalModuleTemplate(AttributeTemplate):
417
+ key = types.Module(cuda.local)
418
+
419
+ def resolve_array(self, mod):
420
+ return types.Function(Cuda_local_array)
421
+
422
+
423
+ @register_attr
424
+ class CudaAtomicTemplate(AttributeTemplate):
425
+ key = types.Module(cuda.atomic)
426
+
427
+ def resolve_add(self, mod):
428
+ return types.Function(Cuda_atomic_add)
429
+
430
+ def resolve_sub(self, mod):
431
+ return types.Function(Cuda_atomic_sub)
432
+
433
+ def resolve_and_(self, mod):
434
+ return types.Function(Cuda_atomic_and)
435
+
436
+ def resolve_or_(self, mod):
437
+ return types.Function(Cuda_atomic_or)
438
+
439
+ def resolve_xor(self, mod):
440
+ return types.Function(Cuda_atomic_xor)
441
+
442
+ def resolve_inc(self, mod):
443
+ return types.Function(Cuda_atomic_inc)
444
+
445
+ def resolve_dec(self, mod):
446
+ return types.Function(Cuda_atomic_dec)
447
+
448
+ def resolve_exch(self, mod):
449
+ return types.Function(Cuda_atomic_exch)
450
+
451
+ def resolve_max(self, mod):
452
+ return types.Function(Cuda_atomic_max)
453
+
454
+ def resolve_min(self, mod):
455
+ return types.Function(Cuda_atomic_min)
456
+
457
+ def resolve_nanmin(self, mod):
458
+ return types.Function(Cuda_atomic_nanmin)
459
+
460
+ def resolve_nanmax(self, mod):
461
+ return types.Function(Cuda_atomic_nanmax)
462
+
463
+ def resolve_compare_and_swap(self, mod):
464
+ return types.Function(Cuda_atomic_compare_and_swap)
465
+
466
+ def resolve_cas(self, mod):
467
+ return types.Function(Cuda_atomic_cas)
468
+
469
+
470
+ @register_attr
471
+ class CudaModuleTemplate(AttributeTemplate):
472
+ key = types.Module(cuda)
473
+
474
+ def resolve_cg(self, mod):
475
+ return types.Module(cuda.cg)
476
+
477
+ def resolve_threadIdx(self, mod):
478
+ return dim3
479
+
480
+ def resolve_blockIdx(self, mod):
481
+ return dim3
482
+
483
+ def resolve_blockDim(self, mod):
484
+ return dim3
485
+
486
+ def resolve_gridDim(self, mod):
487
+ return dim3
488
+
489
+ def resolve_laneid(self, mod):
490
+ return types.int32
491
+
492
+ def resolve_shared(self, mod):
493
+ return types.Module(cuda.shared)
494
+
495
+ def resolve_popc(self, mod):
496
+ return types.Function(Cuda_popc)
497
+
498
+ def resolve_brev(self, mod):
499
+ return types.Function(Cuda_brev)
500
+
501
+ def resolve_clz(self, mod):
502
+ return types.Function(Cuda_clz)
503
+
504
+ def resolve_ffs(self, mod):
505
+ return types.Function(Cuda_ffs)
506
+
507
+ def resolve_fma(self, mod):
508
+ return types.Function(Cuda_fma)
509
+
510
+ def resolve_cbrt(self, mod):
511
+ return types.Function(Cuda_cbrt)
512
+
513
+ def resolve_threadfence(self, mod):
514
+ return types.Function(Cuda_threadfence_device)
515
+
516
+ def resolve_threadfence_block(self, mod):
517
+ return types.Function(Cuda_threadfence_block)
518
+
519
+ def resolve_threadfence_system(self, mod):
520
+ return types.Function(Cuda_threadfence_system)
521
+
522
+ def resolve_syncwarp(self, mod):
523
+ return types.Function(Cuda_syncwarp)
524
+
525
+ def resolve_vote_sync_intrinsic(self, mod):
526
+ return types.Function(Cuda_vote_sync_intrinsic)
527
+
528
+ def resolve_match_any_sync(self, mod):
529
+ return types.Function(Cuda_match_any_sync)
530
+
531
+ def resolve_match_all_sync(self, mod):
532
+ return types.Function(Cuda_match_all_sync)
533
+
534
+ def resolve_activemask(self, mod):
535
+ return types.Function(Cuda_activemask)
536
+
537
+ def resolve_lanemask_lt(self, mod):
538
+ return types.Function(Cuda_lanemask_lt)
539
+
540
+ def resolve_selp(self, mod):
541
+ return types.Function(Cuda_selp)
542
+
543
+ def resolve_nanosleep(self, mod):
544
+ return types.Function(Cuda_nanosleep)
545
+
546
+ def resolve_atomic(self, mod):
547
+ return types.Module(cuda.atomic)
548
+
549
+ def resolve_const(self, mod):
550
+ return types.Module(cuda.const)
551
+
552
+ def resolve_local(self, mod):
553
+ return types.Module(cuda.local)
554
+
555
+
556
+ register_global(cuda, types.Module(cuda))
@@ -0,0 +1,14 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """CUDA Driver
5
+
6
+ - Driver API binding
7
+ - NVVM API binding
8
+ - Device array implementation
9
+
10
+ """
11
+
12
+ from numba.cuda.core import config
13
+
14
+ assert not config.ENABLE_CUDASIM, "Cannot use real driver API with simulator"