numba-cuda 0.22.1__cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-311-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-311-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-311-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-311-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-311-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
  60. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  61. numba_cuda/numba/cuda/core/environment.py +66 -0
  62. numba_cuda/numba/cuda/core/errors.py +9 -0
  63. numba_cuda/numba/cuda/core/event.py +511 -0
  64. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  65. numba_cuda/numba/cuda/core/generators.py +387 -0
  66. numba_cuda/numba/cuda/core/imputils.py +509 -0
  67. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  68. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  69. numba_cuda/numba/cuda/core/ir.py +1812 -0
  70. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  71. numba_cuda/numba/cuda/core/optional.py +129 -0
  72. numba_cuda/numba/cuda/core/options.py +262 -0
  73. numba_cuda/numba/cuda/core/postproc.py +249 -0
  74. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  75. numba_cuda/numba/cuda/core/registry.py +46 -0
  76. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  77. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  78. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  79. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  82. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  83. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  84. numba_cuda/numba/cuda/core/ssa.py +498 -0
  85. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  86. numba_cuda/numba/cuda/core/tracing.py +231 -0
  87. numba_cuda/numba/cuda/core/transforms.py +956 -0
  88. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  89. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  90. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  91. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  93. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  94. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  95. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  96. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  97. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  98. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  99. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  100. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  101. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  102. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  103. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  104. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  105. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  106. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  107. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  110. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  111. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  112. numba_cuda/numba/cuda/cudadecl.py +543 -0
  113. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  114. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  115. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  116. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  117. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  118. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  119. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  120. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  121. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  122. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  123. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  124. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  125. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  126. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  127. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  128. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  129. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  130. numba_cuda/numba/cuda/cudamath.py +149 -0
  131. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  136. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  137. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  138. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  140. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  141. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  142. numba_cuda/numba/cuda/debuginfo.py +997 -0
  143. numba_cuda/numba/cuda/decorators.py +294 -0
  144. numba_cuda/numba/cuda/descriptor.py +35 -0
  145. numba_cuda/numba/cuda/device_init.py +155 -0
  146. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  147. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  148. numba_cuda/numba/cuda/errors.py +72 -0
  149. numba_cuda/numba/cuda/extending.py +697 -0
  150. numba_cuda/numba/cuda/flags.py +178 -0
  151. numba_cuda/numba/cuda/fp16.py +357 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  153. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  155. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  157. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  159. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  160. numba_cuda/numba/cuda/initialize.py +24 -0
  161. numba_cuda/numba/cuda/intrinsics.py +531 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1980 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +624 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +360 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.22.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.22.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.22.1.dist-info/WHEEL +6 -0
  486. numba_cuda-0.22.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.22.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.22.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,438 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from numba.cuda import vectorize, guvectorize
7
+ from numba import cuda
8
+ from numba.cuda.testing import unittest, CUDATestCase, ForeignArray
9
+ from numba.cuda.testing import skip_on_cudasim, skip_if_external_memmgr
10
+ from numba.cuda.tests.support import linux_only, override_config
11
+ from unittest.mock import call, patch
12
+
13
+
14
+ @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
15
+ class TestCudaArrayInterface(CUDATestCase):
16
+ def assertPointersEqual(self, a, b):
17
+ self.assertEqual(
18
+ a.device_ctypes_pointer.value, b.device_ctypes_pointer.value
19
+ )
20
+
21
+ def test_as_cuda_array(self):
22
+ h_arr = np.arange(10)
23
+ self.assertFalse(cuda.is_cuda_array(h_arr))
24
+ d_arr = cuda.to_device(h_arr)
25
+ self.assertTrue(cuda.is_cuda_array(d_arr))
26
+ my_arr = ForeignArray(d_arr)
27
+ self.assertTrue(cuda.is_cuda_array(my_arr))
28
+ wrapped = cuda.as_cuda_array(my_arr)
29
+ self.assertTrue(cuda.is_cuda_array(wrapped))
30
+ # Their values must equal the original array
31
+ np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr)
32
+ np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr)
33
+ # d_arr and wrapped must be the same buffer
34
+ self.assertPointersEqual(wrapped, d_arr)
35
+
36
+ def get_stream_value(self, stream):
37
+ return stream.handle.value
38
+
39
+ @skip_if_external_memmgr("Ownership not relevant with external memmgr")
40
+ def test_ownership(self):
41
+ # Get the deallocation queue
42
+ ctx = cuda.current_context()
43
+ deallocs = ctx.memory_manager.deallocations
44
+ # Flush all deallocations
45
+ deallocs.clear()
46
+ self.assertEqual(len(deallocs), 0)
47
+ # Make new device array
48
+ d_arr = cuda.to_device(np.arange(100))
49
+ # Convert it
50
+ cvted = cuda.as_cuda_array(d_arr)
51
+ # Drop reference to the original object such that
52
+ # only `cvted` has a reference to it.
53
+ del d_arr
54
+ # There shouldn't be any new deallocations
55
+ self.assertEqual(len(deallocs), 0)
56
+ # Try to access the memory and verify its content
57
+ np.testing.assert_equal(cvted.copy_to_host(), np.arange(100))
58
+ # Drop last reference to the memory
59
+ del cvted
60
+ self.assertEqual(len(deallocs), 1)
61
+ # Flush
62
+ deallocs.clear()
63
+
64
+ def test_kernel_arg(self):
65
+ h_arr = np.arange(10)
66
+ d_arr = cuda.to_device(h_arr)
67
+ my_arr = ForeignArray(d_arr)
68
+ wrapped = cuda.as_cuda_array(my_arr)
69
+
70
+ @cuda.jit
71
+ def mutate(arr, val):
72
+ i = cuda.grid(1)
73
+ if i >= len(arr):
74
+ return
75
+ arr[i] += val
76
+
77
+ val = 7
78
+ mutate.forall(wrapped.size)(wrapped, val)
79
+
80
+ np.testing.assert_array_equal(wrapped.copy_to_host(), h_arr + val)
81
+ np.testing.assert_array_equal(d_arr.copy_to_host(), h_arr + val)
82
+
83
+ def test_ufunc_arg(self):
84
+ @vectorize(["f8(f8, f8)"], target="cuda")
85
+ def vadd(a, b):
86
+ return a + b
87
+
88
+ # Case 1: use custom array as argument
89
+ h_arr = np.random.random(10)
90
+ arr = ForeignArray(cuda.to_device(h_arr))
91
+ val = 6
92
+ out = vadd(arr, val)
93
+ np.testing.assert_array_equal(out.copy_to_host(), h_arr + val)
94
+
95
+ # Case 2: use custom array as return
96
+ out = ForeignArray(cuda.device_array(h_arr.shape))
97
+ returned = vadd(h_arr, val, out=out)
98
+ np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val)
99
+
100
+ def test_gufunc_arg(self):
101
+ @guvectorize(["(f8, f8, f8[:])"], "(),()->()", target="cuda")
102
+ def vadd(inp, val, out):
103
+ out[0] = inp + val
104
+
105
+ # Case 1: use custom array as argument
106
+ h_arr = np.random.random(10)
107
+ arr = ForeignArray(cuda.to_device(h_arr))
108
+ val = np.float64(7)
109
+ out = vadd(arr, val)
110
+ np.testing.assert_array_equal(out.copy_to_host(), h_arr + val)
111
+
112
+ # Case 2: use custom array as return
113
+ out = ForeignArray(cuda.device_array(h_arr.shape))
114
+ returned = vadd(h_arr, val, out=out)
115
+ np.testing.assert_array_equal(returned.copy_to_host(), h_arr + val)
116
+ self.assertPointersEqual(returned, out._arr)
117
+
118
+ def test_array_views(self):
119
+ """Views created via array interface support:
120
+ - Strided slices
121
+ - Strided slices
122
+ """
123
+ h_arr = np.random.random(10)
124
+ c_arr = cuda.to_device(h_arr)
125
+
126
+ arr = cuda.as_cuda_array(c_arr)
127
+
128
+ # __getitem__ interface accesses expected data
129
+
130
+ # Direct views
131
+ np.testing.assert_array_equal(arr.copy_to_host(), h_arr)
132
+ np.testing.assert_array_equal(arr[:].copy_to_host(), h_arr)
133
+
134
+ # Slicing
135
+ np.testing.assert_array_equal(arr[:5].copy_to_host(), h_arr[:5])
136
+
137
+ # Strided view
138
+ np.testing.assert_array_equal(arr[::2].copy_to_host(), h_arr[::2])
139
+
140
+ # View of strided array
141
+ arr_strided = cuda.as_cuda_array(c_arr[::2])
142
+ np.testing.assert_array_equal(arr_strided.copy_to_host(), h_arr[::2])
143
+
144
+ # A strided-view-of-array and view-of-strided-array have the same
145
+ # shape, strides, itemsize, and alloc_size
146
+ self.assertEqual(arr[::2].shape, arr_strided.shape)
147
+ self.assertEqual(arr[::2].strides, arr_strided.strides)
148
+ self.assertEqual(arr[::2].dtype.itemsize, arr_strided.dtype.itemsize)
149
+ self.assertEqual(arr[::2].alloc_size, arr_strided.alloc_size)
150
+ self.assertEqual(
151
+ arr[::2].nbytes, arr_strided.size * arr_strided.dtype.itemsize
152
+ )
153
+
154
+ # __setitem__ interface propagates into external array
155
+
156
+ # Writes to a slice
157
+ arr[:5] = np.pi
158
+ np.testing.assert_array_equal(
159
+ c_arr.copy_to_host(), np.concatenate((np.full(5, np.pi), h_arr[5:]))
160
+ )
161
+
162
+ # Writes to a slice from a view
163
+ arr[:5] = arr[5:]
164
+ np.testing.assert_array_equal(
165
+ c_arr.copy_to_host(), np.concatenate((h_arr[5:], h_arr[5:]))
166
+ )
167
+
168
+ # Writes through a view
169
+ arr[:] = cuda.to_device(h_arr)
170
+ np.testing.assert_array_equal(c_arr.copy_to_host(), h_arr)
171
+
172
+ # Writes to a strided slice
173
+ arr[::2] = np.pi
174
+ np.testing.assert_array_equal(
175
+ c_arr.copy_to_host()[::2],
176
+ np.full(5, np.pi),
177
+ )
178
+ np.testing.assert_array_equal(c_arr.copy_to_host()[1::2], h_arr[1::2])
179
+
180
+ def test_negative_strided_issue(self):
181
+ # issue #3705
182
+ h_arr = np.random.random(10)
183
+ c_arr = cuda.to_device(h_arr)
184
+
185
+ def base_offset(orig, sliced):
186
+ return sliced["data"][0] - orig["data"][0]
187
+
188
+ h_ai = h_arr.__array_interface__
189
+ c_ai = c_arr.__cuda_array_interface__
190
+
191
+ h_ai_sliced = h_arr[::-1].__array_interface__
192
+ c_ai_sliced = c_arr[::-1].__cuda_array_interface__
193
+
194
+ # Check data offset is correct
195
+ self.assertEqual(
196
+ base_offset(h_ai, h_ai_sliced),
197
+ base_offset(c_ai, c_ai_sliced),
198
+ )
199
+ # Check shape and strides are correct
200
+ self.assertEqual(h_ai_sliced["shape"], c_ai_sliced["shape"])
201
+ self.assertEqual(h_ai_sliced["strides"], c_ai_sliced["strides"])
202
+
203
+ def test_negative_strided_copy_to_host(self):
204
+ # issue #3705
205
+ h_arr = np.random.random(10)
206
+ c_arr = cuda.to_device(h_arr)
207
+ sliced = c_arr[::-1]
208
+ with self.assertRaises(NotImplementedError) as raises:
209
+ sliced.copy_to_host()
210
+ expected_msg = "D->H copy not implemented for negative strides"
211
+ self.assertIn(expected_msg, str(raises.exception))
212
+
213
+ def test_masked_array(self):
214
+ h_arr = np.random.random(10)
215
+ h_mask = np.random.randint(2, size=10, dtype="bool")
216
+ c_arr = cuda.to_device(h_arr)
217
+ c_mask = cuda.to_device(h_mask)
218
+
219
+ # Manually create a masked CUDA Array Interface dictionary
220
+ masked_cuda_array_interface = c_arr.__cuda_array_interface__.copy()
221
+ masked_cuda_array_interface["mask"] = c_mask
222
+
223
+ with self.assertRaises(NotImplementedError) as raises:
224
+ cuda.from_cuda_array_interface(masked_cuda_array_interface)
225
+ expected_msg = "Masked arrays are not supported"
226
+ self.assertIn(expected_msg, str(raises.exception))
227
+
228
+ def test_zero_size_array(self):
229
+ # for #4175
230
+ c_arr = cuda.device_array(0)
231
+ self.assertEqual(c_arr.__cuda_array_interface__["data"][0], 0)
232
+
233
+ @cuda.jit
234
+ def add_one(arr):
235
+ x = cuda.grid(1)
236
+ N = arr.shape[0]
237
+ if x < N:
238
+ arr[x] += 1
239
+
240
+ d_arr = ForeignArray(c_arr)
241
+ add_one[1, 10](d_arr) # this should pass
242
+
243
+ def test_strides(self):
244
+ # for #4175
245
+ # First, test C-contiguous array
246
+ c_arr = cuda.device_array((2, 3, 4))
247
+ self.assertEqual(c_arr.__cuda_array_interface__["strides"], None)
248
+
249
+ # Second, test non C-contiguous array
250
+ c_arr = c_arr[:, 1, :]
251
+ self.assertNotEqual(c_arr.__cuda_array_interface__["strides"], None)
252
+
253
+ def test_consuming_strides(self):
254
+ hostarray = np.arange(10).reshape(2, 5)
255
+ devarray = cuda.to_device(hostarray)
256
+ face = devarray.__cuda_array_interface__
257
+ self.assertIsNone(face["strides"])
258
+ got = cuda.from_cuda_array_interface(face).copy_to_host()
259
+ np.testing.assert_array_equal(got, hostarray)
260
+ self.assertTrue(got.flags["C_CONTIGUOUS"])
261
+ # Try non-NULL strides
262
+ face["strides"] = hostarray.strides
263
+ self.assertIsNotNone(face["strides"])
264
+ got = cuda.from_cuda_array_interface(face).copy_to_host()
265
+ np.testing.assert_array_equal(got, hostarray)
266
+ self.assertTrue(got.flags["C_CONTIGUOUS"])
267
+
268
+ def test_produce_no_stream(self):
269
+ c_arr = cuda.device_array(10)
270
+ self.assertIsNone(c_arr.__cuda_array_interface__["stream"])
271
+
272
+ mapped_arr = cuda.mapped_array(10)
273
+ self.assertIsNone(mapped_arr.__cuda_array_interface__["stream"])
274
+
275
+ @linux_only
276
+ def test_produce_managed_no_stream(self):
277
+ managed_arr = cuda.managed_array(10)
278
+ self.assertIsNone(managed_arr.__cuda_array_interface__["stream"])
279
+
280
+ def test_produce_stream(self):
281
+ s = cuda.stream()
282
+ c_arr = cuda.device_array(10, stream=s)
283
+ cai_stream = c_arr.__cuda_array_interface__["stream"]
284
+ stream_value = self.get_stream_value(s)
285
+ self.assertEqual(stream_value, cai_stream)
286
+
287
+ s = cuda.stream()
288
+ mapped_arr = cuda.mapped_array(10, stream=s)
289
+ cai_stream = mapped_arr.__cuda_array_interface__["stream"]
290
+ stream_value = self.get_stream_value(s)
291
+ self.assertEqual(stream_value, cai_stream)
292
+
293
+ @linux_only
294
+ def test_produce_managed_stream(self):
295
+ s = cuda.stream()
296
+ managed_arr = cuda.managed_array(10, stream=s)
297
+ cai_stream = managed_arr.__cuda_array_interface__["stream"]
298
+ stream_value = self.get_stream_value(s)
299
+ self.assertEqual(stream_value, cai_stream)
300
+
301
+ def test_consume_no_stream(self):
302
+ # Create a foreign array with no stream
303
+ f_arr = ForeignArray(cuda.device_array(10))
304
+
305
+ # Ensure that the imported array has no default stream
306
+ c_arr = cuda.as_cuda_array(f_arr)
307
+ self.assertEqual(c_arr.stream, 0)
308
+
309
+ def test_consume_stream(self):
310
+ # Create a foreign array with a stream
311
+ s = cuda.stream()
312
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
313
+
314
+ # Ensure that an imported array has the stream as its default stream
315
+ c_arr = cuda.as_cuda_array(f_arr)
316
+ self.assertTrue(c_arr.stream.external)
317
+ stream_value = self.get_stream_value(s)
318
+ imported_stream_value = self.get_stream_value(c_arr.stream)
319
+ self.assertEqual(stream_value, imported_stream_value)
320
+
321
+ def test_consume_no_sync(self):
322
+ # Create a foreign array with no stream
323
+ f_arr = ForeignArray(cuda.device_array(10))
324
+
325
+ with patch.object(
326
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
327
+ ) as mock_sync:
328
+ cuda.as_cuda_array(f_arr)
329
+
330
+ # Ensure the synchronize method of a stream was not called
331
+ mock_sync.assert_not_called()
332
+
333
+ def test_consume_sync(self):
334
+ # Create a foreign array with a stream
335
+ s = cuda.stream()
336
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
337
+
338
+ with patch.object(
339
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
340
+ ) as mock_sync:
341
+ cuda.as_cuda_array(f_arr)
342
+
343
+ # Ensure the synchronize method of a stream was called
344
+ mock_sync.assert_called_once_with()
345
+
346
+ def test_consume_sync_disabled(self):
347
+ # Create a foreign array with a stream
348
+ s = cuda.stream()
349
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
350
+
351
+ # Set sync to false before testing. The test suite should generally be
352
+ # run with sync enabled, but stash the old value just in case it is
353
+ # not.
354
+ with override_config("CUDA_ARRAY_INTERFACE_SYNC", False):
355
+ with patch.object(
356
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
357
+ ) as mock_sync:
358
+ cuda.as_cuda_array(f_arr)
359
+
360
+ # Ensure the synchronize method of a stream was not called
361
+ mock_sync.assert_not_called()
362
+
363
+ def test_launch_no_sync(self):
364
+ # Create a foreign array with no stream
365
+ f_arr = ForeignArray(cuda.device_array(10))
366
+
367
+ @cuda.jit
368
+ def f(x):
369
+ pass
370
+
371
+ with patch.object(
372
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
373
+ ) as mock_sync:
374
+ f[1, 1](f_arr)
375
+
376
+ # Ensure the synchronize method of a stream was not called
377
+ mock_sync.assert_not_called()
378
+
379
+ def test_launch_sync(self):
380
+ # Create a foreign array with a stream
381
+ s = cuda.stream()
382
+ f_arr = ForeignArray(cuda.device_array(10, stream=s))
383
+
384
+ @cuda.jit
385
+ def f(x):
386
+ pass
387
+
388
+ with patch.object(
389
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
390
+ ) as mock_sync:
391
+ f[1, 1](f_arr)
392
+
393
+ # Ensure the synchronize method of a stream was called
394
+ mock_sync.assert_called_once_with()
395
+
396
+ def test_launch_sync_two_streams(self):
397
+ # Create two foreign arrays with streams
398
+ s1 = cuda.stream()
399
+ s2 = cuda.stream()
400
+ f_arr1 = ForeignArray(cuda.device_array(10, stream=s1))
401
+ f_arr2 = ForeignArray(cuda.device_array(10, stream=s2))
402
+
403
+ @cuda.jit
404
+ def f(x, y):
405
+ pass
406
+
407
+ with patch.object(
408
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
409
+ ) as mock_sync:
410
+ f[1, 1](f_arr1, f_arr2)
411
+
412
+ # Ensure that synchronize was called twice
413
+ mock_sync.assert_has_calls([call(), call()])
414
+
415
+ def test_launch_sync_disabled(self):
416
+ # Create two foreign arrays with streams
417
+ s1 = cuda.stream()
418
+ s2 = cuda.stream()
419
+ f_arr1 = ForeignArray(cuda.device_array(10, stream=s1))
420
+ f_arr2 = ForeignArray(cuda.device_array(10, stream=s2))
421
+
422
+ with override_config("CUDA_ARRAY_INTERFACE_SYNC", False):
423
+
424
+ @cuda.jit
425
+ def f(x, y):
426
+ pass
427
+
428
+ with patch.object(
429
+ cuda.cudadrv.driver.Stream, "synchronize", return_value=None
430
+ ) as mock_sync:
431
+ f[1, 1](f_arr1, f_arr2)
432
+
433
+ # Ensure that synchronize was not called
434
+ mock_sync.assert_not_called()
435
+
436
+
437
+ if __name__ == "__main__":
438
+ unittest.main()
@@ -0,0 +1,94 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+ import numpy as np
6
+ from numba.cuda.testing import CUDATestCase
7
+ from numba.cuda.tests.support import override_config
8
+ import unittest
9
+
10
+
11
+ class TestCudaJitNoTypes(CUDATestCase):
12
+ """
13
+ Tests the jit decorator with no types provided.
14
+ """
15
+
16
+ def test_device_array(self):
17
+ @cuda.jit
18
+ def foo(x, y):
19
+ i = cuda.grid(1)
20
+ y[i] = x[i]
21
+
22
+ x = np.arange(10)
23
+ y = np.empty_like(x)
24
+
25
+ dx = cuda.to_device(x)
26
+ dy = cuda.to_device(y)
27
+
28
+ foo[10, 1](dx, dy)
29
+
30
+ dy.copy_to_host(y)
31
+
32
+ self.assertTrue(np.all(x == y))
33
+
34
+ def test_device_jit(self):
35
+ @cuda.jit(device=True)
36
+ def mapper(args):
37
+ a, b, c = args
38
+ return a + b + c
39
+
40
+ @cuda.jit(device=True)
41
+ def reducer(a, b):
42
+ return a + b
43
+
44
+ @cuda.jit
45
+ def driver(A, B):
46
+ i = cuda.grid(1)
47
+ if i < B.size:
48
+ args = A[i], A[i] + B[i], B[i]
49
+ B[i] = reducer(mapper(args), 1)
50
+
51
+ A = np.arange(100, dtype=np.float32)
52
+ B = np.arange(100, dtype=np.float32)
53
+
54
+ Acopy = A.copy()
55
+ Bcopy = B.copy()
56
+
57
+ driver[1, 100](A, B)
58
+
59
+ np.testing.assert_allclose(Acopy + Acopy + Bcopy + Bcopy + 1, B)
60
+
61
+ def test_device_jit_2(self):
62
+ @cuda.jit(device=True)
63
+ def inner(arg):
64
+ return arg + 1
65
+
66
+ @cuda.jit
67
+ def outer(argin, argout):
68
+ argout[0] = inner(argin[0]) + inner(2)
69
+
70
+ a = np.zeros(1)
71
+ b = np.zeros(1)
72
+
73
+ stream = cuda.stream()
74
+ d_a = cuda.to_device(a, stream)
75
+ d_b = cuda.to_device(b, stream)
76
+
77
+ outer[1, 1, stream](d_a, d_b)
78
+
79
+ d_b.copy_to_host(b, stream)
80
+
81
+ self.assertEqual(b[0], (a[0] + 1) + (2 + 1))
82
+
83
+ def test_jit_debug_simulator(self):
84
+ # Ensure that the jit decorator accepts the debug kwarg when the
85
+ # simulator is in use - see Issue #6615.
86
+ with override_config("ENABLE_CUDASIM", 1):
87
+
88
+ @cuda.jit(debug=True, opt=False)
89
+ def f(x):
90
+ pass
91
+
92
+
93
+ if __name__ == "__main__":
94
+ unittest.main()
@@ -0,0 +1,101 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from numba import cuda, vectorize, guvectorize
7
+ from numba.cuda.np.numpy_support import from_dtype
8
+ from numba.cuda.testing import CUDATestCase, skip_on_cudasim
9
+ import unittest
10
+
11
+
12
+ class TestCudaDateTime(CUDATestCase):
13
+ def test_basic_datetime_kernel(self):
14
+ @cuda.jit
15
+ def foo(start, end, delta):
16
+ for i in range(cuda.grid(1), delta.size, cuda.gridsize(1)):
17
+ delta[i] = end[i] - start[i]
18
+
19
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
20
+ arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
21
+ delta = np.zeros_like(arr1, dtype="timedelta64[D]")
22
+
23
+ foo[1, 32](arr1, arr2, delta)
24
+
25
+ self.assertPreciseEqual(delta, arr2 - arr1)
26
+
27
+ def test_scalar_datetime_kernel(self):
28
+ @cuda.jit
29
+ def foo(dates, target, delta, matches, outdelta):
30
+ for i in range(cuda.grid(1), matches.size, cuda.gridsize(1)):
31
+ matches[i] = dates[i] == target
32
+ outdelta[i] = dates[i] - delta
33
+
34
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
35
+ target = arr1[5] # datetime
36
+ delta = arr1[6] - arr1[5] # timedelta
37
+ matches = np.zeros_like(arr1, dtype=np.bool_)
38
+ outdelta = np.zeros_like(arr1, dtype="datetime64[D]")
39
+
40
+ foo[1, 32](arr1, target, delta, matches, outdelta)
41
+ where = matches.nonzero()
42
+
43
+ self.assertEqual(list(where), [5])
44
+ self.assertPreciseEqual(outdelta, arr1 - delta)
45
+
46
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
47
+ def test_ufunc(self):
48
+ datetime_t = from_dtype(np.dtype("datetime64[D]"))
49
+
50
+ @vectorize([(datetime_t, datetime_t)], target="cuda")
51
+ def timediff(start, end):
52
+ return end - start
53
+
54
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
55
+ arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
56
+
57
+ delta = timediff(arr1, arr2)
58
+
59
+ self.assertPreciseEqual(delta, arr2 - arr1)
60
+
61
+ @skip_on_cudasim("ufunc API unsupported in the simulator")
62
+ def test_gufunc(self):
63
+ datetime_t = from_dtype(np.dtype("datetime64[D]"))
64
+ timedelta_t = from_dtype(np.dtype("timedelta64[D]"))
65
+
66
+ @guvectorize(
67
+ [(datetime_t, datetime_t, timedelta_t[:])],
68
+ "(),()->()",
69
+ target="cuda",
70
+ )
71
+ def timediff(start, end, out):
72
+ out[0] = end - start
73
+
74
+ arr1 = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
75
+ arr2 = arr1 + np.random.randint(0, 10000, arr1.size)
76
+
77
+ delta = timediff(arr1, arr2)
78
+
79
+ self.assertPreciseEqual(delta, arr2 - arr1)
80
+
81
+ @skip_on_cudasim("no .copy_to_host() in the simulator")
82
+ def test_datetime_view_as_int64(self):
83
+ arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
84
+ darr = cuda.to_device(arr)
85
+ viewed = darr.view(np.int64)
86
+ self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
87
+ self.assertEqual(viewed.gpu_data, darr.gpu_data)
88
+
89
+ @skip_on_cudasim("no .copy_to_host() in the simulator")
90
+ def test_timedelta_view_as_int64(self):
91
+ arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
92
+ arr = arr - (arr - 1)
93
+ self.assertEqual(arr.dtype, np.dtype("timedelta64[D]"))
94
+ darr = cuda.to_device(arr)
95
+ viewed = darr.view(np.int64)
96
+ self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
97
+ self.assertEqual(viewed.gpu_data, darr.gpu_data)
98
+
99
+
100
+ if __name__ == "__main__":
101
+ unittest.main()