numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,214 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ # CUDA built-in Vector Types
5
+ # https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#built-in-vector-types
6
+
7
+ from typing import List, Tuple, Dict
8
+
9
+ from numba.cuda import types
10
+ from numba.cuda import cgutils
11
+ from numba.cuda.datamodel import models
12
+ from numba.cuda.core.imputils import Registry as ImplRegistry
13
+ from numba.cuda.typing.templates import ConcreteTemplate
14
+ from numba.cuda.typing.templates import Registry as TypingRegistry
15
+ from numba.cuda.typing.templates import signature
16
+ from numba.cuda import stubs
17
+ from numba.cuda.errors import CudaLoweringError
18
+ from numba.cuda.extending import make_attribute_wrapper, register_model
19
+
20
+
21
+ typing_registry = TypingRegistry()
22
+ impl_registry = ImplRegistry()
23
+
24
+ register = typing_registry.register
25
+ register_attr = typing_registry.register_attr
26
+ register_global = typing_registry.register_global
27
+ lower = impl_registry.lower
28
+
29
+
30
+ class VectorType(types.Type):
31
+ def __init__(self, name, base_type, attr_names, user_facing_object):
32
+ self._base_type = base_type
33
+ self._attr_names = attr_names
34
+ self._user_facing_object = user_facing_object
35
+ super().__init__(name=name)
36
+
37
+ @property
38
+ def base_type(self):
39
+ return self._base_type
40
+
41
+ @property
42
+ def attr_names(self):
43
+ return self._attr_names
44
+
45
+ @property
46
+ def num_elements(self):
47
+ return len(self._attr_names)
48
+
49
+ @property
50
+ def user_facing_object(self):
51
+ return self._user_facing_object
52
+
53
+
54
+ def make_vector_type(
55
+ name: str,
56
+ base_type: types.Type,
57
+ attr_names: Tuple[str, ...],
58
+ user_facing_object,
59
+ ) -> types.Type:
60
+ """Create a vector type.
61
+
62
+ Parameters
63
+ ----------
64
+ name: str
65
+ The name of the type.
66
+ base_type: numba.cuda.types.Type
67
+ The primitive type for each element in the vector.
68
+ attr_names: tuple of str
69
+ Name for each attribute.
70
+ user_facing_object: object
71
+ The handle to be used in cuda kernel.
72
+ """
73
+
74
+ class _VectorType(VectorType):
75
+ """Internal instantiation of VectorType."""
76
+
77
+ pass
78
+
79
+ class VectorTypeModel(models.StructModel):
80
+ def __init__(self, dmm, fe_type):
81
+ members = [(attr_name, base_type) for attr_name in attr_names]
82
+ super().__init__(dmm, fe_type, members)
83
+
84
+ vector_type = _VectorType(name, base_type, attr_names, user_facing_object)
85
+ register_model(_VectorType)(VectorTypeModel)
86
+ for attr_name in attr_names:
87
+ make_attribute_wrapper(_VectorType, attr_name, attr_name)
88
+
89
+ return vector_type
90
+
91
+
92
+ def enable_vector_type_ctor(
93
+ vector_type: VectorType, overloads: List[List[types.Type]]
94
+ ):
95
+ """Create typing and lowering for vector type constructor.
96
+
97
+ Parameters
98
+ ----------
99
+ vector_type: VectorType
100
+ The type whose constructor to type and lower.
101
+ overloads: List of argument types
102
+ A list containing different overloads of the constructor. Each base type
103
+ in the argument list should either be primitive type or VectorType.
104
+ """
105
+ ctor = vector_type.user_facing_object
106
+
107
+ @register
108
+ class CtorTemplate(ConcreteTemplate):
109
+ key = ctor
110
+ cases = [signature(vector_type, *arglist) for arglist in overloads]
111
+
112
+ register_global(ctor, types.Function(CtorTemplate))
113
+
114
+ # Lowering
115
+
116
+ def make_lowering(fml_arg_list):
117
+ """Meta function to create a lowering for the constructor. Flattens
118
+ the arguments by converting vector_type into load instructions for each
119
+ of its attributes. Such as float2 -> float2.x, float2.y.
120
+ """
121
+
122
+ def lowering(context, builder, sig, actual_args):
123
+ # A list of elements to assign from
124
+ source_list = []
125
+ # Convert the list of argument types to a list of load IRs.
126
+ for argidx, fml_arg in enumerate(fml_arg_list):
127
+ if isinstance(fml_arg, VectorType):
128
+ pxy = cgutils.create_struct_proxy(fml_arg)(
129
+ context, builder, actual_args[argidx]
130
+ )
131
+ source_list += [
132
+ getattr(pxy, attr) for attr in fml_arg.attr_names
133
+ ]
134
+ else:
135
+ # assumed primitive type
136
+ source_list.append(actual_args[argidx])
137
+
138
+ if len(source_list) != vector_type.num_elements:
139
+ raise CudaLoweringError(
140
+ f"Unmatched number of source elements ({len(source_list)}) "
141
+ "and target elements ({vector_type.num_elements})."
142
+ )
143
+
144
+ out = cgutils.create_struct_proxy(vector_type)(context, builder)
145
+
146
+ for attr_name, source in zip(vector_type.attr_names, source_list):
147
+ setattr(out, attr_name, source)
148
+ return out._getvalue()
149
+
150
+ return lowering
151
+
152
+ for arglist in overloads:
153
+ lowering = make_lowering(arglist)
154
+ lower(ctor, *arglist)(lowering)
155
+
156
+
157
+ vector_types: Dict[str, VectorType] = {}
158
+
159
+
160
+ def build_constructor_overloads(base_type, vty_name, num_elements, arglists, l):
161
+ """
162
+ For a given vector type, build a list of overloads for its constructor.
163
+ """
164
+
165
+ # TODO: speed up with memoization
166
+ if num_elements == 0:
167
+ arglists.append(l[:])
168
+
169
+ for i in range(1, num_elements + 1):
170
+ if i == 1:
171
+ # For 1-element component, it can construct with either a
172
+ # primitive type or other 1-element component.
173
+ l.append(base_type)
174
+ build_constructor_overloads(
175
+ base_type, vty_name, num_elements - i, arglists, l
176
+ )
177
+ l.pop(-1)
178
+
179
+ l.append(vector_types[f"{vty_name[:-1]}1"])
180
+ build_constructor_overloads(
181
+ base_type, vty_name, num_elements - i, arglists, l
182
+ )
183
+ l.pop(-1)
184
+ else:
185
+ l.append(vector_types[f"{vty_name[:-1]}{i}"])
186
+ build_constructor_overloads(
187
+ base_type, vty_name, num_elements - i, arglists, l
188
+ )
189
+ l.pop(-1)
190
+
191
+
192
+ def _initialize():
193
+ """
194
+ Construct the vector types, populate `vector_types` dictionary, and
195
+ enable the constructors.
196
+ """
197
+ vector_type_attribute_names = ("x", "y", "z", "w")
198
+ for stub in stubs._vector_type_stubs:
199
+ type_name = stub.__name__
200
+ base_type = getattr(types, type_name[:-2])
201
+ num_elements = int(type_name[-1])
202
+ attributes = vector_type_attribute_names[:num_elements]
203
+ vector_type = make_vector_type(type_name, base_type, attributes, stub)
204
+ vector_types[type_name] = vector_type
205
+
206
+ for vty in vector_types.values():
207
+ arglists, l = [], []
208
+ build_constructor_overloads(
209
+ vty.base_type, vty.name, vty.num_elements, arglists, l
210
+ )
211
+ enable_vector_type_ctor(vty, arglists)
212
+
213
+
214
+ _initialize()
@@ -0,0 +1,260 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+ from numpy import array as np_array
6
+ from numba.cuda import deviceufunc
7
+ from numba.cuda.deviceufunc import (
8
+ UFuncMechanism,
9
+ GeneralizedUFunc,
10
+ GUFuncCallSteps,
11
+ )
12
+
13
+
14
+ class CUDAUFuncDispatcher(object):
15
+ """
16
+ Invoke the CUDA ufunc specialization for the given inputs.
17
+ """
18
+
19
+ def __init__(self, types_to_retty_kernels, pyfunc):
20
+ self.functions = types_to_retty_kernels
21
+ self.__name__ = pyfunc.__name__
22
+
23
+ def __call__(self, *args, **kws):
24
+ """
25
+ *args: numpy arrays or DeviceArrayBase (created by cuda.to_device).
26
+ Cannot mix the two types in one call.
27
+
28
+ **kws:
29
+ stream -- cuda stream; when defined, asynchronous mode is used.
30
+ out -- output array. Can be a numpy array or DeviceArrayBase
31
+ depending on the input arguments. Type must match
32
+ the input arguments.
33
+ """
34
+ return CUDAUFuncMechanism.call(self.functions, args, kws)
35
+
36
+ def reduce(self, arg, stream=0):
37
+ assert len(list(self.functions.keys())[0]) == 2, (
38
+ "must be a binary ufunc"
39
+ )
40
+ assert arg.ndim == 1, "must use 1d array"
41
+
42
+ n = arg.shape[0]
43
+ gpu_mems = []
44
+
45
+ if n == 0:
46
+ raise TypeError("Reduction on an empty array.")
47
+ elif n == 1: # nothing to do
48
+ return arg[0]
49
+
50
+ # always use a stream
51
+ stream = stream or cuda.stream()
52
+ with stream.auto_synchronize():
53
+ # transfer memory to device if necessary
54
+ if cuda.cudadrv.devicearray.is_cuda_ndarray(arg):
55
+ mem = arg
56
+ else:
57
+ mem = cuda.to_device(arg, stream)
58
+ # do reduction
59
+ out = self.__reduce(mem, gpu_mems, stream)
60
+ # use a small buffer to store the result element
61
+ buf = np_array((1,), dtype=arg.dtype)
62
+ out.copy_to_host(buf, stream=stream)
63
+
64
+ return buf[0]
65
+
66
+ def __reduce(self, mem, gpu_mems, stream):
67
+ n = mem.shape[0]
68
+ if n % 2 != 0: # odd?
69
+ fatcut, thincut = mem.split(n - 1)
70
+ # prevent freeing during async mode
71
+ gpu_mems.append(fatcut)
72
+ gpu_mems.append(thincut)
73
+ # execute the kernel
74
+ out = self.__reduce(fatcut, gpu_mems, stream)
75
+ gpu_mems.append(out)
76
+ return self(out, thincut, out=out, stream=stream)
77
+ else: # even?
78
+ left, right = mem.split(n // 2)
79
+ # prevent freeing during async mode
80
+ gpu_mems.append(left)
81
+ gpu_mems.append(right)
82
+ # execute the kernel
83
+ self(left, right, out=left, stream=stream)
84
+ if n // 2 > 1:
85
+ return self.__reduce(left, gpu_mems, stream)
86
+ else:
87
+ return left
88
+
89
+
90
+ class _CUDAGUFuncCallSteps(GUFuncCallSteps):
91
+ __slots__ = [
92
+ "_stream",
93
+ ]
94
+
95
+ def __init__(self, nin, nout, args, kwargs):
96
+ super().__init__(nin, nout, args, kwargs)
97
+ self._stream = kwargs.get("stream", 0)
98
+
99
+ def is_device_array(self, obj):
100
+ return cuda.is_cuda_array(obj)
101
+
102
+ def as_device_array(self, obj):
103
+ # We don't want to call as_cuda_array on objects that are already Numba
104
+ # device arrays, because this results in exporting the array as a
105
+ # Producer then importing it as a Consumer, which causes a
106
+ # synchronization on the array's stream (if it has one) by default.
107
+ # When we have a Numba device array, we can simply return it.
108
+ if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
109
+ return obj
110
+ return cuda.as_cuda_array(obj)
111
+
112
+ def to_device(self, hostary):
113
+ return cuda.to_device(hostary, stream=self._stream)
114
+
115
+ def to_host(self, devary, hostary):
116
+ out = devary.copy_to_host(hostary, stream=self._stream)
117
+ return out
118
+
119
+ def allocate_device_array(self, shape, dtype):
120
+ return cuda.device_array(shape=shape, dtype=dtype, stream=self._stream)
121
+
122
+ def launch_kernel(self, kernel, nelem, args):
123
+ kernel.forall(nelem, stream=self._stream)(*args)
124
+
125
+
126
+ class CUDAGeneralizedUFunc(GeneralizedUFunc):
127
+ def __init__(self, kernelmap, engine, pyfunc):
128
+ self.__name__ = pyfunc.__name__
129
+ super().__init__(kernelmap, engine)
130
+
131
+ @property
132
+ def _call_steps(self):
133
+ return _CUDAGUFuncCallSteps
134
+
135
+ def _broadcast_scalar_input(self, ary, shape):
136
+ return cuda.cudadrv.devicearray.DeviceNDArray(
137
+ shape=shape, strides=(0,), dtype=ary.dtype, gpu_data=ary.gpu_data
138
+ )
139
+
140
+ def _broadcast_add_axis(self, ary, newshape):
141
+ newax = len(newshape) - len(ary.shape)
142
+ # Add 0 strides for missing dimension
143
+ newstrides = (0,) * newax + ary.strides
144
+ return cuda.cudadrv.devicearray.DeviceNDArray(
145
+ shape=newshape,
146
+ strides=newstrides,
147
+ dtype=ary.dtype,
148
+ gpu_data=ary.gpu_data,
149
+ )
150
+
151
+
152
+ class CUDAUFuncMechanism(UFuncMechanism):
153
+ """
154
+ Provide CUDA specialization
155
+ """
156
+
157
+ DEFAULT_STREAM = 0
158
+
159
+ def launch(self, func, count, stream, args):
160
+ func.forall(count, stream=stream)(*args)
161
+
162
+ def is_device_array(self, obj):
163
+ return cuda.is_cuda_array(obj)
164
+
165
+ def as_device_array(self, obj):
166
+ # We don't want to call as_cuda_array on objects that are already Numba
167
+ # device arrays, because this results in exporting the array as a
168
+ # Producer then importing it as a Consumer, which causes a
169
+ # synchronization on the array's stream (if it has one) by default.
170
+ # When we have a Numba device array, we can simply return it.
171
+ if cuda.cudadrv.devicearray.is_cuda_ndarray(obj):
172
+ return obj
173
+ return cuda.as_cuda_array(obj)
174
+
175
+ def to_device(self, hostary, stream):
176
+ return cuda.to_device(hostary, stream=stream)
177
+
178
+ def to_host(self, devary, stream):
179
+ return devary.copy_to_host(stream=stream)
180
+
181
+ def allocate_device_array(self, shape, dtype, stream):
182
+ return cuda.device_array(shape=shape, dtype=dtype, stream=stream)
183
+
184
+ def broadcast_device(self, ary, shape):
185
+ ax_differs = [
186
+ ax
187
+ for ax in range(len(shape))
188
+ if ax >= ary.ndim or ary.shape[ax] != shape[ax]
189
+ ]
190
+
191
+ missingdim = len(shape) - len(ary.shape)
192
+ strides = [0] * missingdim + list(ary.strides)
193
+
194
+ for ax in ax_differs:
195
+ strides[ax] = 0
196
+
197
+ return cuda.cudadrv.devicearray.DeviceNDArray(
198
+ shape=shape, strides=strides, dtype=ary.dtype, gpu_data=ary.gpu_data
199
+ )
200
+
201
+
202
+ vectorizer_stager_source = """
203
+ def __vectorized_{name}({args}, __out__):
204
+ __tid__ = __cuda__.grid(1)
205
+ if __tid__ < __out__.shape[0]:
206
+ __out__[__tid__] = __core__({argitems})
207
+ """
208
+
209
+
210
+ class CUDAVectorize(deviceufunc.DeviceVectorize):
211
+ def _compile_core(self, sig):
212
+ cudevfn = cuda.jit(sig, device=True, inline="always")(self.pyfunc)
213
+ return cudevfn, cudevfn.overloads[sig.args].signature.return_type
214
+
215
+ def _get_globals(self, corefn):
216
+ glbl = self.pyfunc.__globals__.copy()
217
+ glbl.update({"__cuda__": cuda, "__core__": corefn})
218
+ return glbl
219
+
220
+ def _compile_kernel(self, fnobj, sig):
221
+ return cuda.jit(fnobj)
222
+
223
+ def build_ufunc(self):
224
+ return CUDAUFuncDispatcher(self.kernelmap, self.pyfunc)
225
+
226
+ @property
227
+ def _kernel_template(self):
228
+ return vectorizer_stager_source
229
+
230
+
231
+ # ------------------------------------------------------------------------------
232
+ # Generalized CUDA ufuncs
233
+
234
+ _gufunc_stager_source = """
235
+ def __gufunc_{name}({args}):
236
+ __tid__ = __cuda__.grid(1)
237
+ if __tid__ < {checkedarg}:
238
+ __core__({argitems})
239
+ """
240
+
241
+
242
+ class CUDAGUFuncVectorize(deviceufunc.DeviceGUFuncVectorize):
243
+ def build_ufunc(self):
244
+ engine = deviceufunc.GUFuncEngine(self.inputsig, self.outputsig)
245
+ return CUDAGeneralizedUFunc(
246
+ kernelmap=self.kernelmap, engine=engine, pyfunc=self.pyfunc
247
+ )
248
+
249
+ def _compile_kernel(self, fnobj, sig):
250
+ return cuda.jit(sig)(fnobj)
251
+
252
+ @property
253
+ def _kernel_template(self):
254
+ return _gufunc_stager_source
255
+
256
+ def _get_globals(self, sig):
257
+ corefn = cuda.jit(sig, device=True)(self.pyfunc)
258
+ glbls = self.py_func.__globals__.copy()
259
+ glbls.update({"__cuda__": cuda, "__core__": corefn})
260
+ return glbls
@@ -0,0 +1,109 @@
1
+ Metadata-Version: 2.4
2
+ Name: numba-cuda
3
+ Version: 0.21.1
4
+ Summary: CUDA target for Numba
5
+ Author: Anaconda Inc., NVIDIA Corporation
6
+ License-Expression: BSD-2-Clause
7
+ Project-URL: Homepage, https://nvidia.github.io/numba-cuda/
8
+ Project-URL: Documentation, https://nvidia.github.io/numba-cuda/
9
+ Project-URL: Repository, https://github.com/NVIDIA/numba-cuda
10
+ Project-URL: License, https://github.com/NVIDIA/numba-cuda/blob/main/LICENSE
11
+ Project-URL: Issues, https://github.com/NVIDIA/numba-cuda/issues
12
+ Requires-Python: >=3.9
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ License-File: LICENSE.numba
16
+ Requires-Dist: numba>=0.60.0
17
+ Requires-Dist: cuda-bindings<14.0.0,>=12.9.1
18
+ Requires-Dist: cuda-core<1.0.0,>=0.3.2
19
+ Provides-Extra: cu12
20
+ Requires-Dist: cuda-bindings<13.0.0,>=12.9.1; extra == "cu12"
21
+ Requires-Dist: cuda-core<1.0.0,>=0.3.0; extra == "cu12"
22
+ Requires-Dist: cuda-python==12.9.*; extra == "cu12"
23
+ Requires-Dist: nvidia-cuda-nvcc-cu12; extra == "cu12"
24
+ Requires-Dist: nvidia-cuda-runtime-cu12; extra == "cu12"
25
+ Requires-Dist: nvidia-cuda-nvrtc-cu12; extra == "cu12"
26
+ Requires-Dist: nvidia-nvjitlink-cu12; extra == "cu12"
27
+ Requires-Dist: nvidia-cuda-cccl-cu12; extra == "cu12"
28
+ Provides-Extra: cu13
29
+ Requires-Dist: cuda-bindings==13.*; extra == "cu13"
30
+ Requires-Dist: cuda-core<1.0.0,>=0.3.2; extra == "cu13"
31
+ Requires-Dist: cuda-python==13.*; extra == "cu13"
32
+ Requires-Dist: nvidia-nvvm==13.*; extra == "cu13"
33
+ Requires-Dist: nvidia-cuda-runtime==13.*; extra == "cu13"
34
+ Requires-Dist: nvidia-cuda-nvrtc==13.*; extra == "cu13"
35
+ Requires-Dist: nvidia-nvjitlink==13.*; extra == "cu13"
36
+ Requires-Dist: nvidia-cuda-cccl==13.*; extra == "cu13"
37
+ Dynamic: license-file
38
+
39
+ <div align="center"><img src="docs/source/_static/numba-green-icon-rgb.svg" width="200"/></div>
40
+
41
+ # Numba CUDA Target
42
+
43
+ The CUDA target for Numba. Please visit the [official
44
+ documentation](https://nvidia.github.io/numba-cuda) to get started!
45
+
46
+
47
+ To report issues or file feature requests, please use the [issue
48
+ tracker](https://github.com/NVIDIA/numba-cuda/issues).
49
+
50
+ To raise questions or initiate discussions, please use the [Numba Discourse
51
+ forum](https://numba.discourse.group).
52
+
53
+ ## Installation with pip or conda
54
+
55
+ Please refer to the [Installation documentation](https://nvidia.github.io/numba-cuda/user/installation.html#installation-with-a-python-package-manager).
56
+
57
+
58
+ ## Installation from source
59
+
60
+ Install as an editable install:
61
+
62
+ ```
63
+ pip install -e .
64
+ ```
65
+
66
+ If you want to manage all run-time dependencies yourself, also pass the `--no-deps` flag.
67
+
68
+ ## Running tests
69
+
70
+ Tests must be run from the `testing` folder, which contains the pytest
71
+ configuration and code to generate binaries used during the tests. The test
72
+ binaries need to be built on the system on which the tests are run, so that
73
+ they are compiled for the appropriate compute capability.
74
+
75
+ ```
76
+ cd testing
77
+ # Optionally, build test binaries and point to their location for the test suite
78
+ make -j $(nproc)
79
+ export NUMBA_CUDA_TEST_BIN_DIR=`pwd`
80
+ # Execute tests
81
+ pytest -n auto -v --dist loadscope
82
+ ```
83
+
84
+ Alternatively, you can use [pixi](https://pixi.sh/latest/installation/) to wrap all of that up for you:
85
+
86
+ ```
87
+ # run tests against CUDA 13
88
+ pixi run -e cu13 test -n auto -v --dist loadscope
89
+ ```
90
+
91
+
92
+ Testing should discover the `numba.cuda` module from the `numba_cuda` package. You
93
+ can check where `numba.cuda` files are being located by running
94
+
95
+ ```
96
+ python -c "from numba import cuda; print(cuda.__file__)"
97
+ ```
98
+
99
+ which will show a path like:
100
+
101
+ ```
102
+ <path to numba-cuda repo>/numba_cuda/numba/cuda/__init__.py
103
+ ```
104
+
105
+ ## Contributing Guide
106
+
107
+ Review the
108
+ [CONTRIBUTING.md](https://github.com/NVIDIA/numba-cuda/blob/main/CONTRIBUTING.md)
109
+ file for information on how to contribute code and issues to the project.