numba-cuda 0.22.1__cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-311-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-311-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-311-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-311-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-311-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
  60. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  61. numba_cuda/numba/cuda/core/environment.py +66 -0
  62. numba_cuda/numba/cuda/core/errors.py +9 -0
  63. numba_cuda/numba/cuda/core/event.py +511 -0
  64. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  65. numba_cuda/numba/cuda/core/generators.py +387 -0
  66. numba_cuda/numba/cuda/core/imputils.py +509 -0
  67. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  68. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  69. numba_cuda/numba/cuda/core/ir.py +1812 -0
  70. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  71. numba_cuda/numba/cuda/core/optional.py +129 -0
  72. numba_cuda/numba/cuda/core/options.py +262 -0
  73. numba_cuda/numba/cuda/core/postproc.py +249 -0
  74. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  75. numba_cuda/numba/cuda/core/registry.py +46 -0
  76. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  77. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  78. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  79. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  82. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  83. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  84. numba_cuda/numba/cuda/core/ssa.py +498 -0
  85. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  86. numba_cuda/numba/cuda/core/tracing.py +231 -0
  87. numba_cuda/numba/cuda/core/transforms.py +956 -0
  88. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  89. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  90. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  91. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  93. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  94. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  95. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  96. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  97. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  98. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  99. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  100. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  101. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  102. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  103. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  104. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  105. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  106. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  107. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  110. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  111. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  112. numba_cuda/numba/cuda/cudadecl.py +543 -0
  113. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  114. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  115. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  116. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  117. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  118. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  119. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  120. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  121. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  122. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  123. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  124. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  125. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  126. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  127. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  128. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  129. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  130. numba_cuda/numba/cuda/cudamath.py +149 -0
  131. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  136. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  137. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  138. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  140. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  141. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  142. numba_cuda/numba/cuda/debuginfo.py +997 -0
  143. numba_cuda/numba/cuda/decorators.py +294 -0
  144. numba_cuda/numba/cuda/descriptor.py +35 -0
  145. numba_cuda/numba/cuda/device_init.py +155 -0
  146. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  147. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  148. numba_cuda/numba/cuda/errors.py +72 -0
  149. numba_cuda/numba/cuda/extending.py +697 -0
  150. numba_cuda/numba/cuda/flags.py +178 -0
  151. numba_cuda/numba/cuda/fp16.py +357 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  153. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  155. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  157. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  159. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  160. numba_cuda/numba/cuda/initialize.py +24 -0
  161. numba_cuda/numba/cuda/intrinsics.py +531 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1980 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +624 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +360 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.22.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.22.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.22.1.dist-info/WHEEL +6 -0
  486. numba_cuda-0.22.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.22.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.22.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,580 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ API that are reported to numba.cuda
6
+ """
7
+
8
+ import contextlib
9
+ import os
10
+
11
+ import numpy as np
12
+
13
+ from .cudadrv import devicearray, devices, driver
14
+ from numba.cuda.core import config
15
+ from numba.cuda.api_util import prepare_shape_strides_dtype
16
+
17
+ # NDarray device helper
18
+
19
+ require_context = devices.require_context
20
+ current_context = devices.get_context
21
+ gpus = devices.gpus
22
+
23
+
24
+ @require_context
25
+ def from_cuda_array_interface(desc, owner=None, sync=True):
26
+ """Create a DeviceNDArray from a cuda-array-interface description.
27
+ The ``owner`` is the owner of the underlying memory.
28
+ The resulting DeviceNDArray will acquire a reference from it.
29
+
30
+ If ``sync`` is ``True``, then the imported stream (if present) will be
31
+ synchronized.
32
+ """
33
+ version = desc.get("version")
34
+ # Mask introduced in version 1
35
+ if 1 <= version:
36
+ mask = desc.get("mask")
37
+ # Would ideally be better to detect if the mask is all valid
38
+ if mask is not None:
39
+ raise NotImplementedError("Masked arrays are not supported")
40
+
41
+ shape = desc["shape"]
42
+ strides = desc.get("strides")
43
+
44
+ shape, strides, dtype = prepare_shape_strides_dtype(
45
+ shape, strides, desc["typestr"], order="C"
46
+ )
47
+ size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
48
+
49
+ cudevptr_class = driver.binding.CUdeviceptr
50
+ devptr = cudevptr_class(desc["data"][0])
51
+ data = driver.MemoryPointer(
52
+ current_context(), devptr, size=size, owner=owner
53
+ )
54
+ stream_ptr = desc.get("stream", None)
55
+ if stream_ptr is not None:
56
+ stream = external_stream(stream_ptr)
57
+ if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
58
+ stream.synchronize()
59
+ else:
60
+ stream = 0 # No "Numba default stream", not the CUDA default stream
61
+ da = devicearray.DeviceNDArray(
62
+ shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
63
+ )
64
+ return da
65
+
66
+
67
+ def as_cuda_array(obj, sync=True):
68
+ """Create a DeviceNDArray from any object that implements
69
+ the :ref:`cuda array interface <cuda-array-interface>`.
70
+
71
+ A view of the underlying GPU buffer is created. No copying of the data
72
+ is done. The resulting DeviceNDArray will acquire a reference from `obj`.
73
+
74
+ If ``sync`` is ``True``, then the imported stream (if present) will be
75
+ synchronized.
76
+ """
77
+ if (
78
+ interface := getattr(obj, "__cuda_array_interface__", None)
79
+ ) is not None:
80
+ return from_cuda_array_interface(interface, owner=obj, sync=sync)
81
+ raise TypeError("*obj* doesn't implement the cuda array interface.")
82
+
83
+
84
+ def is_cuda_array(obj):
85
+ """Test if the object has defined the `__cuda_array_interface__` attribute.
86
+
87
+ Does not verify the validity of the interface.
88
+ """
89
+ return hasattr(obj, "__cuda_array_interface__")
90
+
91
+
92
+ def is_float16_supported():
93
+ """Whether 16-bit floats are supported.
94
+
95
+ float16 is always supported in current versions of Numba - returns True.
96
+ """
97
+ return True
98
+
99
+
100
+ def is_bfloat16_supported():
101
+ """Whether bfloat16 are supported.
102
+
103
+ bfloat16 is only supported on devices with compute capability >= 8.0
104
+ """
105
+ return current_context().device.supports_bfloat16
106
+
107
+
108
+ @require_context
109
+ def to_device(obj, stream=0, copy=True, to=None):
110
+ """to_device(obj, stream=0, copy=True, to=None)
111
+
112
+ Allocate and transfer a numpy ndarray or structured scalar to the device.
113
+
114
+ To copy host->device a numpy array::
115
+
116
+ ary = np.arange(10)
117
+ d_ary = cuda.to_device(ary)
118
+
119
+ To enqueue the transfer to a stream::
120
+
121
+ stream = cuda.stream()
122
+ d_ary = cuda.to_device(ary, stream=stream)
123
+
124
+ The resulting ``d_ary`` is a ``DeviceNDArray``.
125
+
126
+ To copy device->host::
127
+
128
+ hary = d_ary.copy_to_host()
129
+
130
+ To copy device->host to an existing array::
131
+
132
+ ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
133
+ d_ary.copy_to_host(ary)
134
+
135
+ To enqueue the transfer to a stream::
136
+
137
+ hary = d_ary.copy_to_host(stream=stream)
138
+ """
139
+ if to is None:
140
+ to, new = devicearray.auto_device(
141
+ obj, stream=stream, copy=copy, user_explicit=True
142
+ )
143
+ return to
144
+ if copy:
145
+ to.copy_to_device(obj, stream=stream)
146
+ return to
147
+
148
+
149
+ @require_context
150
+ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
151
+ """device_array(shape, dtype=np.float64, strides=None, order='C', stream=0)
152
+
153
+ Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
154
+ """
155
+ shape, strides, dtype = prepare_shape_strides_dtype(
156
+ shape, strides, dtype, order
157
+ )
158
+ return devicearray.DeviceNDArray(
159
+ shape=shape, strides=strides, dtype=dtype, stream=stream
160
+ )
161
+
162
+
163
+ @require_context
164
+ def managed_array(
165
+ shape,
166
+ dtype=np.float64,
167
+ strides=None,
168
+ order="C",
169
+ stream=0,
170
+ attach_global=True,
171
+ ):
172
+ """managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
173
+ attach_global=True)
174
+
175
+ Allocate a np.ndarray with a buffer that is managed.
176
+ Similar to np.empty().
177
+
178
+ Managed memory is supported on Linux / x86 and PowerPC, and is considered
179
+ experimental on Windows and Linux / AArch64.
180
+
181
+ :param attach_global: A flag indicating whether to attach globally. Global
182
+ attachment implies that the memory is accessible from
183
+ any stream on any device. If ``False``, attachment is
184
+ *host*, and memory is only accessible by devices
185
+ with Compute Capability 6.0 and later.
186
+ """
187
+ shape, strides, dtype = prepare_shape_strides_dtype(
188
+ shape, strides, dtype, order
189
+ )
190
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
191
+ buffer = current_context().memallocmanaged(
192
+ bytesize, attach_global=attach_global
193
+ )
194
+ npary = np.ndarray(
195
+ shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
196
+ )
197
+ managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
198
+ managedview.device_setup(buffer, stream=stream)
199
+ return managedview
200
+
201
+
202
+ @require_context
203
+ def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
204
+ """pinned_array(shape, dtype=np.float64, strides=None, order='C')
205
+
206
+ Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
207
+ (pagelocked). Similar to :func:`np.empty() <numpy.empty>`.
208
+ """
209
+ shape, strides, dtype = prepare_shape_strides_dtype(
210
+ shape, strides, dtype, order
211
+ )
212
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
213
+ buffer = current_context().memhostalloc(bytesize)
214
+ return np.ndarray(
215
+ shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
216
+ )
217
+
218
+
219
+ @require_context
220
+ def mapped_array(
221
+ shape,
222
+ dtype=np.float64,
223
+ strides=None,
224
+ order="C",
225
+ stream=0,
226
+ portable=False,
227
+ wc=False,
228
+ ):
229
+ """mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
230
+ portable=False, wc=False)
231
+
232
+ Allocate a mapped ndarray with a buffer that is pinned and mapped on
233
+ to the device. Similar to np.empty()
234
+
235
+ :param portable: a boolean flag to allow the allocated device memory to be
236
+ usable in multiple devices.
237
+ :param wc: a boolean flag to enable writecombined allocation which is faster
238
+ to write by the host and to read by the device, but slower to
239
+ write by the host and slower to write by the device.
240
+ """
241
+ shape, strides, dtype = prepare_shape_strides_dtype(
242
+ shape, strides, dtype, order
243
+ )
244
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
245
+ buffer = current_context().memhostalloc(bytesize, mapped=True)
246
+ npary = np.ndarray(
247
+ shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
248
+ )
249
+ mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
250
+ mappedview.device_setup(buffer, stream=stream)
251
+ return mappedview
252
+
253
+
254
+ @contextlib.contextmanager
255
+ @require_context
256
+ def open_ipc_array(handle, shape, dtype, strides=None, offset=0):
257
+ """
258
+ A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
259
+ represented as a sequence of bytes (e.g. *bytes*, tuple of int)
260
+ and represent it as an array of the given *shape*, *strides* and *dtype*.
261
+ The *strides* can be omitted. In that case, it is assumed to be a 1D
262
+ C contiguous array.
263
+
264
+ Yields a device array.
265
+
266
+ The IPC handle is closed automatically when context manager exits.
267
+ """
268
+ dtype = np.dtype(dtype)
269
+ # compute size
270
+ size = np.prod(shape) * dtype.itemsize
271
+ # manually recreate the IPC mem handle
272
+ driver_handle = driver.binding.CUipcMemHandle()
273
+ driver_handle.reserved = handle
274
+ # use *IpcHandle* to open the IPC memory
275
+ ipchandle = driver.IpcHandle(None, driver_handle, size, offset=offset)
276
+ yield ipchandle.open_array(
277
+ current_context(), shape=shape, strides=strides, dtype=dtype
278
+ )
279
+ ipchandle.close()
280
+
281
+
282
+ def synchronize():
283
+ "Synchronize the current context."
284
+ return current_context().synchronize()
285
+
286
+
287
+ def _contiguous_strides_like_array(ary):
288
+ """
289
+ Given an array, compute strides for a new contiguous array of the same
290
+ shape.
291
+ """
292
+ # Don't recompute strides if the default strides will be sufficient to
293
+ # create a contiguous array.
294
+ if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
295
+ return None
296
+
297
+ # Otherwise, we need to compute new strides using an algorithm adapted from
298
+ # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
299
+ # core/src/multiarray/ctors.c. We permute the strides in ascending order
300
+ # then compute the stride for the dimensions with the same permutation.
301
+
302
+ # Stride permutation. E.g. a stride array (4, -2, 12) becomes
303
+ # [(1, -2), (0, 4), (2, 12)]
304
+ strideperm = [x for x in enumerate(ary.strides)]
305
+ strideperm.sort(key=lambda x: x[1])
306
+
307
+ # Compute new strides using permutation
308
+ strides = [0] * len(ary.strides)
309
+ stride = ary.dtype.itemsize
310
+ for i_perm, _ in strideperm:
311
+ strides[i_perm] = stride
312
+ stride *= ary.shape[i_perm]
313
+ return tuple(strides)
314
+
315
+
316
+ def _order_like_array(ary):
317
+ if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
318
+ return "F"
319
+ else:
320
+ return "C"
321
+
322
+
323
+ def device_array_like(ary, stream=0):
324
+ """
325
+ Call :func:`device_array() <numba.cuda.device_array>` with information from
326
+ the array.
327
+ """
328
+ strides = _contiguous_strides_like_array(ary)
329
+ order = _order_like_array(ary)
330
+ return device_array(
331
+ shape=ary.shape,
332
+ dtype=ary.dtype,
333
+ strides=strides,
334
+ order=order,
335
+ stream=stream,
336
+ )
337
+
338
+
339
+ def mapped_array_like(ary, stream=0, portable=False, wc=False):
340
+ """
341
+ Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
342
+ from the array.
343
+ """
344
+ strides = _contiguous_strides_like_array(ary)
345
+ order = _order_like_array(ary)
346
+ return mapped_array(
347
+ shape=ary.shape,
348
+ dtype=ary.dtype,
349
+ strides=strides,
350
+ order=order,
351
+ stream=stream,
352
+ portable=portable,
353
+ wc=wc,
354
+ )
355
+
356
+
357
+ def pinned_array_like(ary):
358
+ """
359
+ Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
360
+ from the array.
361
+ """
362
+ strides = _contiguous_strides_like_array(ary)
363
+ order = _order_like_array(ary)
364
+ return pinned_array(
365
+ shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
366
+ )
367
+
368
+
369
+ # Stream helper
370
+ @require_context
371
+ def stream():
372
+ """
373
+ Create a CUDA stream that represents a command queue for the device.
374
+ """
375
+ return current_context().create_stream()
376
+
377
+
378
+ @require_context
379
+ def default_stream():
380
+ """
381
+ Get the default CUDA stream. CUDA semantics in general are that the default
382
+ stream is either the legacy default stream or the per-thread default stream
383
+ depending on which CUDA APIs are in use. In Numba, the APIs for the legacy
384
+ default stream are always the ones in use, but an option to use APIs for
385
+ the per-thread default stream may be provided in future.
386
+ """
387
+ return current_context().get_default_stream()
388
+
389
+
390
+ @require_context
391
+ def legacy_default_stream():
392
+ """
393
+ Get the legacy default CUDA stream.
394
+ """
395
+ return current_context().get_legacy_default_stream()
396
+
397
+
398
+ @require_context
399
+ def per_thread_default_stream():
400
+ """
401
+ Get the per-thread default CUDA stream.
402
+ """
403
+ return current_context().get_per_thread_default_stream()
404
+
405
+
406
+ @require_context
407
+ def external_stream(ptr):
408
+ """Create a Numba stream object for a stream allocated outside Numba.
409
+
410
+ :param ptr: Pointer to the external stream to wrap in a Numba Stream
411
+ :type ptr: int
412
+ """
413
+ return current_context().create_external_stream(ptr)
414
+
415
+
416
+ # Page lock
417
+ @require_context
418
+ @contextlib.contextmanager
419
+ def pinned(*arylist):
420
+ """A context manager for temporary pinning a sequence of host ndarrays."""
421
+ pmlist = []
422
+ for ary in arylist:
423
+ pm = current_context().mempin(
424
+ ary,
425
+ driver.host_pointer(ary),
426
+ driver.host_memory_size(ary),
427
+ mapped=False,
428
+ )
429
+ pmlist.append(pm)
430
+ yield
431
+
432
+
433
+ @require_context
434
+ @contextlib.contextmanager
435
+ def mapped(*arylist, **kws):
436
+ """A context manager for temporarily mapping a sequence of host ndarrays."""
437
+ assert not kws or "stream" in kws, "Only accept 'stream' as keyword."
438
+ stream = kws.get("stream", 0)
439
+ pmlist = []
440
+ devarylist = []
441
+ for ary in arylist:
442
+ pm = current_context().mempin(
443
+ ary,
444
+ driver.host_pointer(ary),
445
+ driver.host_memory_size(ary),
446
+ mapped=True,
447
+ )
448
+ pmlist.append(pm)
449
+ devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream)
450
+ devarylist.append(devary)
451
+ try:
452
+ if len(devarylist) == 1:
453
+ yield devarylist[0]
454
+ else:
455
+ yield devarylist
456
+ finally:
457
+ # When exiting from `with cuda.mapped(*arrs) as mapped_arrs:`, the name
458
+ # `mapped_arrs` stays in scope, blocking automatic unmapping based on
459
+ # reference count. We therefore invoke the finalizer manually.
460
+ for pm in pmlist:
461
+ pm.free()
462
+
463
+
464
+ def event(timing=True):
465
+ """
466
+ Create a CUDA event. Timing data is only recorded by the event if it is
467
+ created with ``timing=True``.
468
+ """
469
+ evt = current_context().create_event(timing=timing)
470
+ return evt
471
+
472
+
473
+ event_elapsed_time = driver.event_elapsed_time
474
+
475
+
476
+ # Device selection
477
+
478
+
479
+ def select_device(device_id):
480
+ """
481
+ Make the context associated with device *device_id* the current context.
482
+
483
+ Returns a Device instance.
484
+
485
+ Raises exception on error.
486
+ """
487
+ context = devices.get_context(device_id)
488
+ return context.device
489
+
490
+
491
+ def get_current_device():
492
+ "Get current device associated with the current thread"
493
+ return current_context().device
494
+
495
+
496
+ def list_devices():
497
+ "Return a list of all detected devices"
498
+ return devices.gpus
499
+
500
+
501
+ def close():
502
+ """
503
+ Explicitly clears all contexts in the current thread, and destroys all
504
+ contexts if the current thread is the main thread.
505
+ """
506
+ # Must clear memsys object in case it has been used already
507
+ from .memory_management import rtsys
508
+
509
+ rtsys.close()
510
+
511
+ devices.reset()
512
+
513
+
514
+ def _auto_device(ary, stream=0, copy=True):
515
+ return devicearray.auto_device(ary, stream=stream, copy=copy)
516
+
517
+
518
+ def detect():
519
+ """
520
+ Detect supported CUDA hardware and print a summary of the detected hardware.
521
+
522
+ Returns a boolean indicating whether any supported devices were detected.
523
+ """
524
+ devlist = list_devices()
525
+ print("Found %d CUDA devices" % len(devlist))
526
+ supported_count = 0
527
+ for dev in devlist:
528
+ attrs = []
529
+ cc = dev.compute_capability
530
+ kernel_timeout = dev.KERNEL_EXEC_TIMEOUT
531
+ tcc = dev.TCC_DRIVER
532
+ fp32_to_fp64_ratio = dev.SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO
533
+ attrs += [("Compute Capability", "%d.%d" % cc)]
534
+ attrs += [("PCI Device ID", dev.PCI_DEVICE_ID)]
535
+ attrs += [("PCI Bus ID", dev.PCI_BUS_ID)]
536
+ attrs += [("UUID", dev.uuid)]
537
+ attrs += [("Watchdog", "Enabled" if kernel_timeout else "Disabled")]
538
+ if os.name == "nt":
539
+ attrs += [("Compute Mode", "TCC" if tcc else "WDDM")]
540
+ attrs += [("FP32/FP64 Performance Ratio", fp32_to_fp64_ratio)]
541
+ if cc < (3, 5):
542
+ support = "[NOT SUPPORTED: CC < 3.5]"
543
+ elif cc < (5, 0):
544
+ support = "[SUPPORTED (DEPRECATED)]"
545
+ supported_count += 1
546
+ else:
547
+ support = "[SUPPORTED]"
548
+ supported_count += 1
549
+
550
+ print("id %d %20s %40s" % (dev.id, dev.name, support))
551
+ for key, val in attrs:
552
+ print("%40s: %s" % (key, val))
553
+
554
+ print("Summary:")
555
+ print("\t%d/%d devices are supported" % (supported_count, len(devlist)))
556
+ return supported_count > 0
557
+
558
+
559
+ @contextlib.contextmanager
560
+ def defer_cleanup():
561
+ """
562
+ Temporarily disable memory deallocation.
563
+ Use this to prevent resource deallocation breaking asynchronous execution.
564
+
565
+ For example::
566
+
567
+ with defer_cleanup():
568
+ # all cleanup is deferred in here
569
+ do_speed_critical_code()
570
+ # cleanup can occur here
571
+
572
+ Note: this context manager can be nested.
573
+ """
574
+ with current_context().defer_cleanup():
575
+ yield
576
+
577
+
578
+ profiling = require_context(driver.profiling)
579
+ profile_start = require_context(driver.profile_start)
580
+ profile_stop = require_context(driver.profile_stop)
@@ -0,0 +1,76 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import types
5
+ from numba.core import cgutils
6
+ import numpy as np
7
+
8
+ import functools
9
+
10
+
11
+ def prepare_shape_strides_dtype(shape, strides, dtype, order):
12
+ dtype = np.dtype(dtype)
13
+ if isinstance(shape, (float, np.floating)):
14
+ raise TypeError("shape must be an integer or tuple of integers")
15
+ if isinstance(shape, np.ndarray) and np.issubdtype(
16
+ shape.dtype, np.floating
17
+ ):
18
+ raise TypeError("shape must be an integer or tuple of integers")
19
+ if isinstance(shape, int):
20
+ shape = (shape,)
21
+ else:
22
+ shape = tuple(shape)
23
+ if isinstance(strides, int):
24
+ strides = (strides,)
25
+ else:
26
+ if not strides:
27
+ strides = _fill_stride_by_order(shape, dtype, order)
28
+ else:
29
+ strides = tuple(strides)
30
+ return shape, strides, dtype
31
+
32
+
33
+ @functools.cache
34
+ def _fill_stride_by_order(shape, dtype, order):
35
+ ndims = len(shape)
36
+ if not ndims:
37
+ return ()
38
+ strides = [0] * ndims
39
+ if order == "C":
40
+ strides[-1] = dtype.itemsize
41
+ # -2 because we subtract one for zero-based indexing and another one
42
+ # for skipping the already-filled-in last element
43
+ for d in range(ndims - 2, -1, -1):
44
+ strides[d] = strides[d + 1] * shape[d + 1]
45
+ elif order == "F":
46
+ strides[0] = dtype.itemsize
47
+ for d in range(1, ndims):
48
+ strides[d] = strides[d - 1] * shape[d - 1]
49
+ else:
50
+ raise ValueError("must be either C/F order")
51
+ return tuple(strides)
52
+
53
+
54
+ def normalize_indices(context, builder, indty, inds, aryty, valty):
55
+ """
56
+ Convert integer indices into tuple of intp
57
+ """
58
+ if indty in types.integer_domain:
59
+ indty = types.UniTuple(dtype=indty, count=1)
60
+ indices = [inds]
61
+ else:
62
+ indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
63
+ indices = [
64
+ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)
65
+ ]
66
+
67
+ dtype = aryty.dtype
68
+ if dtype != valty:
69
+ raise TypeError("expect %s but got %s" % (dtype, valty))
70
+
71
+ if aryty.ndim != len(indty):
72
+ raise TypeError(
73
+ "indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))
74
+ )
75
+
76
+ return indty, indices