numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,577 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ API that are reported to numba.cuda
6
+ """
7
+
8
+ import contextlib
9
+ import os
10
+
11
+ import numpy as np
12
+
13
+ from .cudadrv import devicearray, devices, driver
14
+ from numba.cuda.core import config
15
+ from numba.cuda.api_util import prepare_shape_strides_dtype
16
+
17
+ # NDarray device helper
18
+
19
+ require_context = devices.require_context
20
+ current_context = devices.get_context
21
+ gpus = devices.gpus
22
+
23
+
24
+ def from_cuda_array_interface(desc, owner=None, sync=True):
25
+ """Create a DeviceNDArray from a cuda-array-interface description.
26
+ The ``owner`` is the owner of the underlying memory.
27
+ The resulting DeviceNDArray will acquire a reference from it.
28
+
29
+ If ``sync`` is ``True``, then the imported stream (if present) will be
30
+ synchronized.
31
+ """
32
+ version = desc.get("version")
33
+ # Mask introduced in version 1
34
+ if 1 <= version:
35
+ mask = desc.get("mask")
36
+ # Would ideally be better to detect if the mask is all valid
37
+ if mask is not None:
38
+ raise NotImplementedError("Masked arrays are not supported")
39
+
40
+ shape = desc["shape"]
41
+ strides = desc.get("strides")
42
+
43
+ shape, strides, dtype = prepare_shape_strides_dtype(
44
+ shape, strides, desc["typestr"], order="C"
45
+ )
46
+ size = driver.memory_size_from_info(shape, strides, dtype.itemsize)
47
+
48
+ cudevptr_class = driver.binding.CUdeviceptr
49
+ devptr = cudevptr_class(desc["data"][0])
50
+ data = driver.MemoryPointer(devptr, size=size, owner=owner)
51
+ stream_ptr = desc.get("stream", None)
52
+ if stream_ptr is not None:
53
+ stream = external_stream(stream_ptr)
54
+ if sync and config.CUDA_ARRAY_INTERFACE_SYNC:
55
+ stream.synchronize()
56
+ else:
57
+ stream = 0 # No "Numba default stream", not the CUDA default stream
58
+ da = devicearray.DeviceNDArray(
59
+ shape=shape, strides=strides, dtype=dtype, gpu_data=data, stream=stream
60
+ )
61
+ return da
62
+
63
+
64
+ def as_cuda_array(obj, sync=True):
65
+ """Create a DeviceNDArray from any object that implements
66
+ the :ref:`cuda array interface <cuda-array-interface>`.
67
+
68
+ A view of the underlying GPU buffer is created. No copying of the data
69
+ is done. The resulting DeviceNDArray will acquire a reference from `obj`.
70
+
71
+ If ``sync`` is ``True``, then the imported stream (if present) will be
72
+ synchronized.
73
+ """
74
+ if (
75
+ interface := getattr(obj, "__cuda_array_interface__", None)
76
+ ) is not None:
77
+ return from_cuda_array_interface(interface, owner=obj, sync=sync)
78
+ raise TypeError("*obj* doesn't implement the cuda array interface.")
79
+
80
+
81
+ def is_cuda_array(obj):
82
+ """Test if the object has defined the `__cuda_array_interface__` attribute.
83
+
84
+ Does not verify the validity of the interface.
85
+ """
86
+ return hasattr(obj, "__cuda_array_interface__")
87
+
88
+
89
+ def is_float16_supported():
90
+ """Whether 16-bit floats are supported.
91
+
92
+ float16 is always supported in current versions of Numba - returns True.
93
+ """
94
+ return True
95
+
96
+
97
+ def is_bfloat16_supported():
98
+ """Whether bfloat16 are supported.
99
+
100
+ bfloat16 is only supported on devices with compute capability >= 8.0
101
+ """
102
+ return current_context().device.supports_bfloat16
103
+
104
+
105
+ @require_context
106
+ def to_device(obj, stream=0, copy=True, to=None):
107
+ """to_device(obj, stream=0, copy=True, to=None)
108
+
109
+ Allocate and transfer a numpy ndarray or structured scalar to the device.
110
+
111
+ To copy host->device a numpy array::
112
+
113
+ ary = np.arange(10)
114
+ d_ary = cuda.to_device(ary)
115
+
116
+ To enqueue the transfer to a stream::
117
+
118
+ stream = cuda.stream()
119
+ d_ary = cuda.to_device(ary, stream=stream)
120
+
121
+ The resulting ``d_ary`` is a ``DeviceNDArray``.
122
+
123
+ To copy device->host::
124
+
125
+ hary = d_ary.copy_to_host()
126
+
127
+ To copy device->host to an existing array::
128
+
129
+ ary = np.empty(shape=d_ary.shape, dtype=d_ary.dtype)
130
+ d_ary.copy_to_host(ary)
131
+
132
+ To enqueue the transfer to a stream::
133
+
134
+ hary = d_ary.copy_to_host(stream=stream)
135
+ """
136
+ if to is None:
137
+ to, new = devicearray.auto_device(
138
+ obj, stream=stream, copy=copy, user_explicit=True
139
+ )
140
+ return to
141
+ if copy:
142
+ to.copy_to_device(obj, stream=stream)
143
+ return to
144
+
145
+
146
+ @require_context
147
+ def device_array(shape, dtype=np.float64, strides=None, order="C", stream=0):
148
+ """device_array(shape, dtype=np.float64, strides=None, order='C', stream=0)
149
+
150
+ Allocate an empty device ndarray. Similar to :meth:`numpy.empty`.
151
+ """
152
+ shape, strides, dtype = prepare_shape_strides_dtype(
153
+ shape, strides, dtype, order
154
+ )
155
+ return devicearray.DeviceNDArray(
156
+ shape=shape, strides=strides, dtype=dtype, stream=stream
157
+ )
158
+
159
+
160
+ @require_context
161
+ def managed_array(
162
+ shape,
163
+ dtype=np.float64,
164
+ strides=None,
165
+ order="C",
166
+ stream=0,
167
+ attach_global=True,
168
+ ):
169
+ """managed_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
170
+ attach_global=True)
171
+
172
+ Allocate a np.ndarray with a buffer that is managed.
173
+ Similar to np.empty().
174
+
175
+ Managed memory is supported on Linux / x86 and PowerPC, and is considered
176
+ experimental on Windows and Linux / AArch64.
177
+
178
+ :param attach_global: A flag indicating whether to attach globally. Global
179
+ attachment implies that the memory is accessible from
180
+ any stream on any device. If ``False``, attachment is
181
+ *host*, and memory is only accessible by devices
182
+ with Compute Capability 6.0 and later.
183
+ """
184
+ shape, strides, dtype = prepare_shape_strides_dtype(
185
+ shape, strides, dtype, order
186
+ )
187
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
188
+ buffer = current_context().memallocmanaged(
189
+ bytesize, attach_global=attach_global
190
+ )
191
+ npary = np.ndarray(
192
+ shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
193
+ )
194
+ managedview = np.ndarray.view(npary, type=devicearray.ManagedNDArray)
195
+ managedview.device_setup(buffer, stream=stream)
196
+ return managedview
197
+
198
+
199
+ @require_context
200
+ def pinned_array(shape, dtype=np.float64, strides=None, order="C"):
201
+ """pinned_array(shape, dtype=np.float64, strides=None, order='C')
202
+
203
+ Allocate an :class:`ndarray <numpy.ndarray>` with a buffer that is pinned
204
+ (pagelocked). Similar to :func:`np.empty() <numpy.empty>`.
205
+ """
206
+ shape, strides, dtype = prepare_shape_strides_dtype(
207
+ shape, strides, dtype, order
208
+ )
209
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
210
+ buffer = current_context().memhostalloc(bytesize)
211
+ return np.ndarray(
212
+ shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
213
+ )
214
+
215
+
216
+ @require_context
217
+ def mapped_array(
218
+ shape,
219
+ dtype=np.float64,
220
+ strides=None,
221
+ order="C",
222
+ stream=0,
223
+ portable=False,
224
+ wc=False,
225
+ ):
226
+ """mapped_array(shape, dtype=np.float64, strides=None, order='C', stream=0,
227
+ portable=False, wc=False)
228
+
229
+ Allocate a mapped ndarray with a buffer that is pinned and mapped on
230
+ to the device. Similar to np.empty()
231
+
232
+ :param portable: a boolean flag to allow the allocated device memory to be
233
+ usable in multiple devices.
234
+ :param wc: a boolean flag to enable writecombined allocation which is faster
235
+ to write by the host and to read by the device, but slower to
236
+ write by the host and slower to write by the device.
237
+ """
238
+ shape, strides, dtype = prepare_shape_strides_dtype(
239
+ shape, strides, dtype, order
240
+ )
241
+ bytesize = driver.memory_size_from_info(shape, strides, dtype.itemsize)
242
+ buffer = current_context().memhostalloc(bytesize, mapped=True)
243
+ npary = np.ndarray(
244
+ shape=shape, strides=strides, dtype=dtype, order=order, buffer=buffer
245
+ )
246
+ mappedview = np.ndarray.view(npary, type=devicearray.MappedNDArray)
247
+ mappedview.device_setup(buffer, stream=stream)
248
+ return mappedview
249
+
250
+
251
+ @contextlib.contextmanager
252
+ @require_context
253
+ def open_ipc_array(handle, shape, dtype, strides=None, offset=0):
254
+ """
255
+ A context manager that opens a IPC *handle* (*CUipcMemHandle*) that is
256
+ represented as a sequence of bytes (e.g. *bytes*, tuple of int)
257
+ and represent it as an array of the given *shape*, *strides* and *dtype*.
258
+ The *strides* can be omitted. In that case, it is assumed to be a 1D
259
+ C contiguous array.
260
+
261
+ Yields a device array.
262
+
263
+ The IPC handle is closed automatically when context manager exits.
264
+ """
265
+ dtype = np.dtype(dtype)
266
+ # compute size
267
+ size = np.prod(shape) * dtype.itemsize
268
+ # manually recreate the IPC mem handle
269
+ driver_handle = driver.binding.CUipcMemHandle()
270
+ driver_handle.reserved = handle
271
+ # use *IpcHandle* to open the IPC memory
272
+ ipchandle = driver.IpcHandle(None, driver_handle, size, offset=offset)
273
+ yield ipchandle.open_array(
274
+ current_context(), shape=shape, strides=strides, dtype=dtype
275
+ )
276
+ ipchandle.close()
277
+
278
+
279
+ def synchronize():
280
+ "Synchronize the current context."
281
+ return current_context().synchronize()
282
+
283
+
284
+ def _contiguous_strides_like_array(ary):
285
+ """
286
+ Given an array, compute strides for a new contiguous array of the same
287
+ shape.
288
+ """
289
+ # Don't recompute strides if the default strides will be sufficient to
290
+ # create a contiguous array.
291
+ if ary.flags["C_CONTIGUOUS"] or ary.flags["F_CONTIGUOUS"] or ary.ndim <= 1:
292
+ return None
293
+
294
+ # Otherwise, we need to compute new strides using an algorithm adapted from
295
+ # NumPy v1.17.4's PyArray_NewLikeArrayWithShape in
296
+ # core/src/multiarray/ctors.c. We permute the strides in ascending order
297
+ # then compute the stride for the dimensions with the same permutation.
298
+
299
+ # Stride permutation. E.g. a stride array (4, -2, 12) becomes
300
+ # [(1, -2), (0, 4), (2, 12)]
301
+ strideperm = [x for x in enumerate(ary.strides)]
302
+ strideperm.sort(key=lambda x: x[1])
303
+
304
+ # Compute new strides using permutation
305
+ strides = [0] * len(ary.strides)
306
+ stride = ary.dtype.itemsize
307
+ for i_perm, _ in strideperm:
308
+ strides[i_perm] = stride
309
+ stride *= ary.shape[i_perm]
310
+ return tuple(strides)
311
+
312
+
313
+ def _order_like_array(ary):
314
+ if ary.flags["F_CONTIGUOUS"] and not ary.flags["C_CONTIGUOUS"]:
315
+ return "F"
316
+ else:
317
+ return "C"
318
+
319
+
320
+ def device_array_like(ary, stream=0):
321
+ """
322
+ Call :func:`device_array() <numba.cuda.device_array>` with information from
323
+ the array.
324
+ """
325
+ strides = _contiguous_strides_like_array(ary)
326
+ order = _order_like_array(ary)
327
+ return device_array(
328
+ shape=ary.shape,
329
+ dtype=ary.dtype,
330
+ strides=strides,
331
+ order=order,
332
+ stream=stream,
333
+ )
334
+
335
+
336
+ def mapped_array_like(ary, stream=0, portable=False, wc=False):
337
+ """
338
+ Call :func:`mapped_array() <numba.cuda.mapped_array>` with the information
339
+ from the array.
340
+ """
341
+ strides = _contiguous_strides_like_array(ary)
342
+ order = _order_like_array(ary)
343
+ return mapped_array(
344
+ shape=ary.shape,
345
+ dtype=ary.dtype,
346
+ strides=strides,
347
+ order=order,
348
+ stream=stream,
349
+ portable=portable,
350
+ wc=wc,
351
+ )
352
+
353
+
354
+ def pinned_array_like(ary):
355
+ """
356
+ Call :func:`pinned_array() <numba.cuda.pinned_array>` with the information
357
+ from the array.
358
+ """
359
+ strides = _contiguous_strides_like_array(ary)
360
+ order = _order_like_array(ary)
361
+ return pinned_array(
362
+ shape=ary.shape, dtype=ary.dtype, strides=strides, order=order
363
+ )
364
+
365
+
366
+ # Stream helper
367
+ @require_context
368
+ def stream():
369
+ """
370
+ Create a CUDA stream that represents a command queue for the device.
371
+ """
372
+ return current_context().create_stream()
373
+
374
+
375
+ @require_context
376
+ def default_stream():
377
+ """
378
+ Get the default CUDA stream. CUDA semantics in general are that the default
379
+ stream is either the legacy default stream or the per-thread default stream
380
+ depending on which CUDA APIs are in use. In Numba, the APIs for the legacy
381
+ default stream are always the ones in use, but an option to use APIs for
382
+ the per-thread default stream may be provided in future.
383
+ """
384
+ return current_context().get_default_stream()
385
+
386
+
387
+ @require_context
388
+ def legacy_default_stream():
389
+ """
390
+ Get the legacy default CUDA stream.
391
+ """
392
+ return current_context().get_legacy_default_stream()
393
+
394
+
395
+ @require_context
396
+ def per_thread_default_stream():
397
+ """
398
+ Get the per-thread default CUDA stream.
399
+ """
400
+ return current_context().get_per_thread_default_stream()
401
+
402
+
403
+ @require_context
404
+ def external_stream(ptr):
405
+ """Create a Numba stream object for a stream allocated outside Numba.
406
+
407
+ :param ptr: Pointer to the external stream to wrap in a Numba Stream
408
+ :type ptr: int
409
+ """
410
+ return current_context().create_external_stream(ptr)
411
+
412
+
413
+ # Page lock
414
+ @require_context
415
+ @contextlib.contextmanager
416
+ def pinned(*arylist):
417
+ """A context manager for temporary pinning a sequence of host ndarrays."""
418
+ pmlist = []
419
+ for ary in arylist:
420
+ pm = current_context().mempin(
421
+ ary,
422
+ driver.host_pointer(ary),
423
+ driver.host_memory_size(ary),
424
+ mapped=False,
425
+ )
426
+ pmlist.append(pm)
427
+ yield
428
+
429
+
430
+ @require_context
431
+ @contextlib.contextmanager
432
+ def mapped(*arylist, **kws):
433
+ """A context manager for temporarily mapping a sequence of host ndarrays."""
434
+ assert not kws or "stream" in kws, "Only accept 'stream' as keyword."
435
+ stream = kws.get("stream", 0)
436
+ pmlist = []
437
+ devarylist = []
438
+ for ary in arylist:
439
+ pm = current_context().mempin(
440
+ ary,
441
+ driver.host_pointer(ary),
442
+ driver.host_memory_size(ary),
443
+ mapped=True,
444
+ )
445
+ pmlist.append(pm)
446
+ devary = devicearray.from_array_like(ary, gpu_data=pm, stream=stream)
447
+ devarylist.append(devary)
448
+ try:
449
+ if len(devarylist) == 1:
450
+ yield devarylist[0]
451
+ else:
452
+ yield devarylist
453
+ finally:
454
+ # When exiting from `with cuda.mapped(*arrs) as mapped_arrs:`, the name
455
+ # `mapped_arrs` stays in scope, blocking automatic unmapping based on
456
+ # reference count. We therefore invoke the finalizer manually.
457
+ for pm in pmlist:
458
+ pm.free()
459
+
460
+
461
+ def event(timing=True):
462
+ """
463
+ Create a CUDA event. Timing data is only recorded by the event if it is
464
+ created with ``timing=True``.
465
+ """
466
+ evt = current_context().create_event(timing=timing)
467
+ return evt
468
+
469
+
470
+ event_elapsed_time = driver.event_elapsed_time
471
+
472
+
473
+ # Device selection
474
+
475
+
476
+ def select_device(device_id):
477
+ """
478
+ Make the context associated with device *device_id* the current context.
479
+
480
+ Returns a Device instance.
481
+
482
+ Raises exception on error.
483
+ """
484
+ context = devices.get_context(device_id)
485
+ return context.device
486
+
487
+
488
+ def get_current_device():
489
+ "Get current device associated with the current thread"
490
+ return current_context().device
491
+
492
+
493
+ def list_devices():
494
+ "Return a list of all detected devices"
495
+ return devices.gpus
496
+
497
+
498
+ def close():
499
+ """
500
+ Explicitly clears all contexts in the current thread, and destroys all
501
+ contexts if the current thread is the main thread.
502
+ """
503
+ # Must clear memsys object in case it has been used already
504
+ from .memory_management import rtsys
505
+
506
+ rtsys.close()
507
+
508
+ devices.reset()
509
+
510
+
511
+ def _auto_device(ary, stream=0, copy=True):
512
+ return devicearray.auto_device(ary, stream=stream, copy=copy)
513
+
514
+
515
+ def detect():
516
+ """
517
+ Detect supported CUDA hardware and print a summary of the detected hardware.
518
+
519
+ Returns a boolean indicating whether any supported devices were detected.
520
+ """
521
+ devlist = list_devices()
522
+ print("Found %d CUDA devices" % len(devlist))
523
+ supported_count = 0
524
+ for dev in devlist:
525
+ attrs = []
526
+ cc = dev.compute_capability
527
+ kernel_timeout = dev.KERNEL_EXEC_TIMEOUT
528
+ tcc = dev.TCC_DRIVER
529
+ fp32_to_fp64_ratio = dev.SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO
530
+ attrs += [("Compute Capability", "%d.%d" % cc)]
531
+ attrs += [("PCI Device ID", dev.PCI_DEVICE_ID)]
532
+ attrs += [("PCI Bus ID", dev.PCI_BUS_ID)]
533
+ attrs += [("UUID", dev.uuid)]
534
+ attrs += [("Watchdog", "Enabled" if kernel_timeout else "Disabled")]
535
+ if os.name == "nt":
536
+ attrs += [("Compute Mode", "TCC" if tcc else "WDDM")]
537
+ attrs += [("FP32/FP64 Performance Ratio", fp32_to_fp64_ratio)]
538
+ if cc < (3, 5):
539
+ support = "[NOT SUPPORTED: CC < 3.5]"
540
+ elif cc < (5, 0):
541
+ support = "[SUPPORTED (DEPRECATED)]"
542
+ supported_count += 1
543
+ else:
544
+ support = "[SUPPORTED]"
545
+ supported_count += 1
546
+
547
+ print("id %d %20s %40s" % (dev.id, dev.name, support))
548
+ for key, val in attrs:
549
+ print("%40s: %s" % (key, val))
550
+
551
+ print("Summary:")
552
+ print("\t%d/%d devices are supported" % (supported_count, len(devlist)))
553
+ return supported_count > 0
554
+
555
+
556
+ @contextlib.contextmanager
557
+ def defer_cleanup():
558
+ """
559
+ Temporarily disable memory deallocation.
560
+ Use this to prevent resource deallocation breaking asynchronous execution.
561
+
562
+ For example::
563
+
564
+ with defer_cleanup():
565
+ # all cleanup is deferred in here
566
+ do_speed_critical_code()
567
+ # cleanup can occur here
568
+
569
+ Note: this context manager can be nested.
570
+ """
571
+ with current_context().defer_cleanup():
572
+ yield
573
+
574
+
575
+ profiling = require_context(driver.profiling)
576
+ profile_start = require_context(driver.profile_start)
577
+ profile_stop = require_context(driver.profile_stop)
@@ -0,0 +1,76 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import types
5
+ from numba.core import cgutils
6
+ import numpy as np
7
+
8
+ import functools
9
+
10
+
11
+ def prepare_shape_strides_dtype(shape, strides, dtype, order):
12
+ dtype = np.dtype(dtype)
13
+ if isinstance(shape, (float, np.floating)):
14
+ raise TypeError("shape must be an integer or tuple of integers")
15
+ if isinstance(shape, np.ndarray) and np.issubdtype(
16
+ shape.dtype, np.floating
17
+ ):
18
+ raise TypeError("shape must be an integer or tuple of integers")
19
+ if isinstance(shape, int):
20
+ shape = (shape,)
21
+ else:
22
+ shape = tuple(shape)
23
+ if isinstance(strides, int):
24
+ strides = (strides,)
25
+ else:
26
+ if not strides:
27
+ strides = _fill_stride_by_order(shape, dtype, order)
28
+ else:
29
+ strides = tuple(strides)
30
+ return shape, strides, dtype
31
+
32
+
33
+ @functools.cache
34
+ def _fill_stride_by_order(shape, dtype, order):
35
+ ndims = len(shape)
36
+ if not ndims:
37
+ return ()
38
+ strides = [0] * ndims
39
+ if order == "C":
40
+ strides[-1] = dtype.itemsize
41
+ # -2 because we subtract one for zero-based indexing and another one
42
+ # for skipping the already-filled-in last element
43
+ for d in range(ndims - 2, -1, -1):
44
+ strides[d] = strides[d + 1] * shape[d + 1]
45
+ elif order == "F":
46
+ strides[0] = dtype.itemsize
47
+ for d in range(1, ndims):
48
+ strides[d] = strides[d - 1] * shape[d - 1]
49
+ else:
50
+ raise ValueError("must be either C/F order")
51
+ return tuple(strides)
52
+
53
+
54
+ def normalize_indices(context, builder, indty, inds, aryty, valty):
55
+ """
56
+ Convert integer indices into tuple of intp
57
+ """
58
+ if indty in types.integer_domain:
59
+ indty = types.UniTuple(dtype=indty, count=1)
60
+ indices = [inds]
61
+ else:
62
+ indices = cgutils.unpack_tuple(builder, inds, count=len(indty))
63
+ indices = [
64
+ context.cast(builder, i, t, types.intp) for t, i in zip(indty, indices)
65
+ ]
66
+
67
+ dtype = aryty.dtype
68
+ if dtype != valty:
69
+ raise TypeError("expect %s but got %s" % (dtype, valty))
70
+
71
+ if aryty.ndim != len(indty):
72
+ raise TypeError(
73
+ "indexing %d-D array with %d-D index" % (aryty.ndim, len(indty))
74
+ )
75
+
76
+ return indty, indices