numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,344 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+
6
+ from numba.cuda.testing import unittest, CUDATestCase
7
+ from numba.cuda.testing import skip_on_cudasim, skip_unless_cudasim
8
+ from numba import cuda
9
+ from numba.cuda import config
10
+
11
+
12
+ if config.ENABLE_CUDASIM:
13
+ ARRAY_LIKE_FUNCTIONS = (cuda.device_array_like, cuda.pinned_array_like)
14
+ else:
15
+ ARRAY_LIKE_FUNCTIONS = (
16
+ cuda.device_array_like,
17
+ cuda.mapped_array_like,
18
+ cuda.pinned_array_like,
19
+ )
20
+
21
+
22
+ def array_reshape1d(arr, newshape, got):
23
+ y = arr.reshape(newshape)
24
+ for i in range(y.shape[0]):
25
+ got[i] = y[i]
26
+
27
+
28
+ def array_reshape2d(arr, newshape, got):
29
+ y = arr.reshape(newshape)
30
+ for i in range(y.shape[0]):
31
+ for j in range(y.shape[1]):
32
+ got[i, j] = y[i, j]
33
+
34
+
35
+ def array_reshape3d(arr, newshape, got):
36
+ y = arr.reshape(newshape)
37
+ for i in range(y.shape[0]):
38
+ for j in range(y.shape[1]):
39
+ for k in range(y.shape[2]):
40
+ got[i, j, k] = y[i, j, k]
41
+
42
+
43
+ def array_reshape(arr, newshape):
44
+ return arr.reshape(newshape)
45
+
46
+
47
+ class TestCudaArray(CUDATestCase):
48
+ def test_gpu_array_zero_length(self):
49
+ x = np.arange(0)
50
+ dx = cuda.to_device(x)
51
+ hx = dx.copy_to_host()
52
+ self.assertEqual(x.shape, dx.shape)
53
+ self.assertEqual(x.size, dx.size)
54
+ self.assertEqual(x.shape, hx.shape)
55
+ self.assertEqual(x.size, hx.size)
56
+
57
+ def test_null_shape(self):
58
+ null_shape = ()
59
+ shape1 = cuda.device_array(()).shape
60
+ shape2 = cuda.device_array_like(np.ndarray(())).shape
61
+ self.assertEqual(shape1, null_shape)
62
+ self.assertEqual(shape2, null_shape)
63
+
64
+ def test_gpu_array_strided(self):
65
+ @cuda.jit("void(double[:])")
66
+ def kernel(x):
67
+ i = cuda.grid(1)
68
+ if i < x.shape[0]:
69
+ x[i] = i
70
+
71
+ x = np.arange(10, dtype=np.double)
72
+ y = np.ndarray(shape=10 * 8, buffer=x, dtype=np.byte)
73
+ z = np.ndarray(9, buffer=y[4:-4], dtype=np.double)
74
+ kernel[10, 10](z)
75
+ self.assertTrue(np.allclose(z, list(range(9))))
76
+
77
+ def test_gpu_array_interleaved(self):
78
+ @cuda.jit("void(double[:], double[:])")
79
+ def copykernel(x, y):
80
+ i = cuda.grid(1)
81
+ if i < x.shape[0]:
82
+ x[i] = i
83
+ y[i] = i
84
+
85
+ x = np.arange(10, dtype=np.double)
86
+ y = x[:-1:2]
87
+ # z = x[1::2]
88
+ # n = y.size
89
+ try:
90
+ cuda.devicearray.auto_device(y)
91
+ except ValueError:
92
+ pass
93
+ else:
94
+ raise AssertionError(
95
+ "Should raise exception complaining the "
96
+ "contiguous-ness of the array."
97
+ )
98
+ # Should we handle this use case?
99
+ # assert z.size == y.size
100
+ # copykernel[1, n](y, x)
101
+ # print(y, z)
102
+ # assert np.all(y == z)
103
+ # assert np.all(y == list(range(n)))
104
+
105
+ def test_auto_device_const(self):
106
+ d, _ = cuda.devicearray.auto_device(2)
107
+ self.assertTrue(np.all(d.copy_to_host() == np.array(2)))
108
+
109
+ def _test_array_like_same(self, like_func, array):
110
+ """
111
+ Tests of *_array_like where shape, strides, dtype, and flags should
112
+ all be equal.
113
+ """
114
+ array_like = like_func(array)
115
+ self.assertEqual(array.shape, array_like.shape)
116
+ self.assertEqual(array.strides, array_like.strides)
117
+ self.assertEqual(array.dtype, array_like.dtype)
118
+ self.assertEqual(
119
+ array.flags["C_CONTIGUOUS"], array_like.flags["C_CONTIGUOUS"]
120
+ )
121
+ self.assertEqual(
122
+ array.flags["F_CONTIGUOUS"], array_like.flags["F_CONTIGUOUS"]
123
+ )
124
+
125
+ def test_array_like_1d(self):
126
+ d_a = cuda.device_array(10, order="C")
127
+ for like_func in ARRAY_LIKE_FUNCTIONS:
128
+ with self.subTest(like_func=like_func):
129
+ self._test_array_like_same(like_func, d_a)
130
+
131
+ def test_array_like_2d(self):
132
+ d_a = cuda.device_array((10, 12), order="C")
133
+ for like_func in ARRAY_LIKE_FUNCTIONS:
134
+ with self.subTest(like_func=like_func):
135
+ self._test_array_like_same(like_func, d_a)
136
+
137
+ def test_array_like_2d_transpose(self):
138
+ d_a = cuda.device_array((10, 12), order="C")
139
+ for like_func in ARRAY_LIKE_FUNCTIONS:
140
+ with self.subTest(like_func=like_func):
141
+ self._test_array_like_same(like_func, d_a)
142
+
143
+ def test_array_like_3d(self):
144
+ d_a = cuda.device_array((10, 12, 14), order="C")
145
+ for like_func in ARRAY_LIKE_FUNCTIONS:
146
+ with self.subTest(like_func=like_func):
147
+ self._test_array_like_same(like_func, d_a)
148
+
149
+ def test_array_like_1d_f(self):
150
+ d_a = cuda.device_array(10, order="F")
151
+ for like_func in ARRAY_LIKE_FUNCTIONS:
152
+ with self.subTest(like_func=like_func):
153
+ self._test_array_like_same(like_func, d_a)
154
+
155
+ def test_array_like_2d_f(self):
156
+ d_a = cuda.device_array((10, 12), order="F")
157
+ for like_func in ARRAY_LIKE_FUNCTIONS:
158
+ with self.subTest(like_func=like_func):
159
+ self._test_array_like_same(like_func, d_a)
160
+
161
+ def test_array_like_2d_f_transpose(self):
162
+ d_a = cuda.device_array((10, 12), order="F")
163
+ for like_func in ARRAY_LIKE_FUNCTIONS:
164
+ with self.subTest(like_func=like_func):
165
+ self._test_array_like_same(like_func, d_a)
166
+
167
+ def test_array_like_3d_f(self):
168
+ d_a = cuda.device_array((10, 12, 14), order="F")
169
+ for like_func in ARRAY_LIKE_FUNCTIONS:
170
+ with self.subTest(like_func=like_func):
171
+ self._test_array_like_same(like_func, d_a)
172
+
173
+ def _test_array_like_view(self, like_func, view, d_view):
174
+ """
175
+ Tests of device_array_like where the original array is a view - the
176
+ strides should not be equal because a contiguous array is expected.
177
+ """
178
+ nb_like = like_func(d_view)
179
+ self.assertEqual(d_view.shape, nb_like.shape)
180
+ self.assertEqual(d_view.dtype, nb_like.dtype)
181
+
182
+ # Use NumPy as a reference for the expected strides
183
+ np_like = np.zeros_like(view)
184
+ self.assertEqual(nb_like.strides, np_like.strides)
185
+ self.assertEqual(
186
+ nb_like.flags["C_CONTIGUOUS"], np_like.flags["C_CONTIGUOUS"]
187
+ )
188
+ self.assertEqual(
189
+ nb_like.flags["F_CONTIGUOUS"], np_like.flags["F_CONTIGUOUS"]
190
+ )
191
+
192
+ def test_array_like_1d_view(self):
193
+ shape = 10
194
+ view = np.zeros(shape)[::2]
195
+ d_view = cuda.device_array(shape)[::2]
196
+ for like_func in ARRAY_LIKE_FUNCTIONS:
197
+ with self.subTest(like_func=like_func):
198
+ self._test_array_like_view(like_func, view, d_view)
199
+
200
+ def test_array_like_1d_view_f(self):
201
+ shape = 10
202
+ view = np.zeros(shape, order="F")[::2]
203
+ d_view = cuda.device_array(shape, order="F")[::2]
204
+ for like_func in ARRAY_LIKE_FUNCTIONS:
205
+ with self.subTest(like_func=like_func):
206
+ self._test_array_like_view(like_func, view, d_view)
207
+
208
+ def test_array_like_2d_view(self):
209
+ shape = (10, 12)
210
+ view = np.zeros(shape)[::2, ::2]
211
+ d_view = cuda.device_array(shape)[::2, ::2]
212
+ for like_func in ARRAY_LIKE_FUNCTIONS:
213
+ with self.subTest(like_func=like_func):
214
+ self._test_array_like_view(like_func, view, d_view)
215
+
216
+ def test_array_like_2d_view_f(self):
217
+ shape = (10, 12)
218
+ view = np.zeros(shape, order="F")[::2, ::2]
219
+ d_view = cuda.device_array(shape, order="F")[::2, ::2]
220
+ for like_func in ARRAY_LIKE_FUNCTIONS:
221
+ with self.subTest(like_func=like_func):
222
+ self._test_array_like_view(like_func, view, d_view)
223
+
224
+ @skip_on_cudasim("Numba and NumPy stride semantics differ for transpose")
225
+ def test_array_like_2d_view_transpose_device(self):
226
+ shape = (10, 12)
227
+ d_view = cuda.device_array(shape)[::2, ::2].T
228
+ for like_func in ARRAY_LIKE_FUNCTIONS:
229
+ with self.subTest(like_func=like_func):
230
+ # This is a special case (see issue #4974) because creating the
231
+ # transpose creates a new contiguous allocation with different
232
+ # strides. In this case, rather than comparing against NumPy,
233
+ # we can only compare against expected values.
234
+ like = like_func(d_view)
235
+ self.assertEqual(d_view.shape, like.shape)
236
+ self.assertEqual(d_view.dtype, like.dtype)
237
+ self.assertEqual((40, 8), like.strides)
238
+ self.assertTrue(like.flags["C_CONTIGUOUS"])
239
+ self.assertFalse(like.flags["F_CONTIGUOUS"])
240
+
241
+ @skip_unless_cudasim(
242
+ "Numba and NumPy stride semantics differ for transpose"
243
+ )
244
+ def test_array_like_2d_view_transpose_simulator(self):
245
+ shape = (10, 12)
246
+ view = np.zeros(shape)[::2, ::2].T
247
+ d_view = cuda.device_array(shape)[::2, ::2].T
248
+ for like_func in ARRAY_LIKE_FUNCTIONS:
249
+ with self.subTest(like_func=like_func):
250
+ # On the simulator, the transpose has different strides to on a
251
+ # CUDA device (See issue #4974). Here we can compare strides
252
+ # against NumPy as a reference.
253
+ np_like = np.zeros_like(view)
254
+ nb_like = like_func(d_view)
255
+ self.assertEqual(d_view.shape, nb_like.shape)
256
+ self.assertEqual(d_view.dtype, nb_like.dtype)
257
+ self.assertEqual(np_like.strides, nb_like.strides)
258
+ self.assertEqual(
259
+ np_like.flags["C_CONTIGUOUS"], nb_like.flags["C_CONTIGUOUS"]
260
+ )
261
+ self.assertEqual(
262
+ np_like.flags["F_CONTIGUOUS"], nb_like.flags["F_CONTIGUOUS"]
263
+ )
264
+
265
+ def test_array_like_2d_view_f_transpose(self):
266
+ shape = (10, 12)
267
+ view = np.zeros(shape, order="F")[::2, ::2].T
268
+ d_view = cuda.device_array(shape, order="F")[::2, ::2].T
269
+ for like_func in ARRAY_LIKE_FUNCTIONS:
270
+ with self.subTest(like_func=like_func):
271
+ self._test_array_like_view(like_func, view, d_view)
272
+
273
+ @skip_on_cudasim("Kernel overloads not created in the simulator")
274
+ def test_issue_4628(self):
275
+ # CUDA Device arrays were reported as always being typed with 'A' order
276
+ # so launching the kernel with a host array and then a device array
277
+ # resulted in two overloads being compiled - one for 'C' order from
278
+ # the host array, and one for 'A' order from the device array. With the
279
+ # resolution of this issue, the order of the device array is also 'C',
280
+ # so after the kernel launches there should only be one overload of
281
+ # the function.
282
+ @cuda.jit
283
+ def func(A, out):
284
+ i = cuda.grid(1)
285
+ out[i] = A[i] * 2
286
+
287
+ n = 128
288
+ a = np.ones((n,))
289
+ d_a = cuda.to_device(a)
290
+ result = np.zeros((n,))
291
+
292
+ func[1, 128](a, result)
293
+ func[1, 128](d_a, result)
294
+
295
+ self.assertEqual(1, len(func.overloads))
296
+
297
+ def test_array_reshape(self):
298
+ def check(pyfunc, kernelfunc, arr, shape):
299
+ kernel = cuda.jit(kernelfunc)
300
+ expected = pyfunc(arr, shape)
301
+ got = np.zeros(expected.shape, dtype=arr.dtype)
302
+ kernel[1, 1](arr, shape, got)
303
+ self.assertPreciseEqual(got, expected)
304
+
305
+ def check_only_shape(kernelfunc, arr, shape, expected_shape):
306
+ kernel = cuda.jit(kernelfunc)
307
+ got = np.zeros(expected_shape, dtype=arr.dtype)
308
+ kernel[1, 1](arr, shape, got)
309
+ self.assertEqual(got.shape, expected_shape)
310
+ self.assertEqual(got.size, arr.size)
311
+
312
+ # 0-sized arrays
313
+ def check_empty(arr):
314
+ check(array_reshape, array_reshape1d, arr, 0)
315
+ check(array_reshape, array_reshape1d, arr, (0,))
316
+ check(array_reshape, array_reshape3d, arr, (1, 0, 2))
317
+
318
+ # C-contiguous
319
+ arr = np.arange(24)
320
+ check(array_reshape, array_reshape1d, arr, (24,))
321
+ check(array_reshape, array_reshape2d, arr, (4, 6))
322
+ check(array_reshape, array_reshape2d, arr, (8, 3))
323
+ check(array_reshape, array_reshape3d, arr, (8, 1, 3))
324
+
325
+ arr = np.arange(24).reshape((1, 8, 1, 1, 3, 1))
326
+ check(array_reshape, array_reshape1d, arr, (24,))
327
+ check(array_reshape, array_reshape2d, arr, (4, 6))
328
+ check(array_reshape, array_reshape2d, arr, (8, 3))
329
+ check(array_reshape, array_reshape3d, arr, (8, 1, 3))
330
+
331
+ # Test negative shape value
332
+ arr = np.arange(25).reshape(5, 5)
333
+ check(array_reshape, array_reshape1d, arr, -1)
334
+ check(array_reshape, array_reshape1d, arr, (-1,))
335
+ check(array_reshape, array_reshape2d, arr, (-1, 5))
336
+ check(array_reshape, array_reshape3d, arr, (5, -1, 5))
337
+ check(array_reshape, array_reshape3d, arr, (5, 5, -1))
338
+
339
+ arr = np.array([])
340
+ check_empty(arr)
341
+
342
+
343
+ if __name__ == "__main__":
344
+ unittest.main()
@@ -0,0 +1,268 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import re
5
+ import itertools
6
+ import numpy as np
7
+ from numba import cuda
8
+ from numba.cuda import HAS_NUMBA
9
+
10
+ if HAS_NUMBA:
11
+ from numba.core.errors import TypingError
12
+ else:
13
+ from numba.cuda.core.errors import TypingError
14
+ from numba.cuda.testing import (
15
+ CUDATestCase,
16
+ skip_on_cudasim,
17
+ skip_unless_cudasim,
18
+ )
19
+ import unittest
20
+
21
+
22
+ # Set to true if you want to see dots printed for each subtest.
23
+ NOISY = False
24
+
25
+
26
+ # In order to verify the alignment of the local and shared memory arrays, we
27
+ # inspect the LLVM IR of the generated kernel using the following regexes.
28
+
29
+ # Shared memory example:
30
+ # @"_cudapy_smem_38" = addrspace(3) global [1 x i8] undef, align 16
31
+ SMEM_PATTERN = re.compile(
32
+ r'^@"_cudapy_smem_\d+".*?align (\d+)',
33
+ re.MULTILINE,
34
+ )
35
+
36
+ # Local memory example:
37
+ # %"_cudapy_lmem" = alloca [1 x i8], align 64
38
+ LMEM_PATTERN = re.compile(
39
+ r'^\s*%"_cudapy_lmem".*?align (\d+)',
40
+ re.MULTILINE,
41
+ )
42
+
43
+
44
+ DTYPES = [np.uint8, np.uint32, np.uint64]
45
+
46
+ # Add in some record dtypes with and without alignment.
47
+ for align in (True, False):
48
+ DTYPES += [
49
+ np.dtype(
50
+ [
51
+ ("a", np.uint8),
52
+ ("b", np.int32),
53
+ ("c", np.float64),
54
+ ],
55
+ align=align,
56
+ ),
57
+ np.dtype(
58
+ [
59
+ ("a", np.uint32),
60
+ ("b", np.uint8),
61
+ ],
62
+ align=align,
63
+ ),
64
+ np.dtype(
65
+ [
66
+ ("a", np.uint8),
67
+ ("b", np.int32),
68
+ ("c", np.float64),
69
+ ("d", np.complex64),
70
+ ("e", (np.uint8, 5)),
71
+ ],
72
+ align=align,
73
+ ),
74
+ ]
75
+
76
+ # N.B. We name the test class TestArrayAddressAlignment to avoid name conflict
77
+ # with the test_alignment.TestArrayAlignment class.
78
+
79
+
80
+ @skip_on_cudasim("Array alignment not supported on cudasim")
81
+ class TestArrayAddressAlignment(CUDATestCase):
82
+ """
83
+ Test cuda.local.array and cuda.shared.array support for an alignment
84
+ keyword argument.
85
+ """
86
+
87
+ def test_array_alignment_1d(self):
88
+ shapes = (1, 8, 50)
89
+ alignments = (None, 16, 64, 256)
90
+ array_types = [(0, "local"), (1, "shared")]
91
+ self._do_test(array_types, shapes, DTYPES, alignments)
92
+
93
+ def test_array_alignment_2d(self):
94
+ shapes = ((2, 3),)
95
+ alignments = (None, 16, 64, 256)
96
+ array_types = [(0, "local"), (1, "shared")]
97
+ self._do_test(array_types, shapes, DTYPES, alignments)
98
+
99
+ def test_array_alignment_3d(self):
100
+ shapes = ((2, 3, 4), (1, 4, 5))
101
+ alignments = (None, 16, 64, 256)
102
+ array_types = [(0, "local"), (1, "shared")]
103
+ self._do_test(array_types, shapes, DTYPES, alignments)
104
+
105
+ def _do_test(self, array_types, shapes, dtypes, alignments):
106
+ items = itertools.product(array_types, shapes, dtypes, alignments)
107
+
108
+ for (which, array_type), shape, dtype, alignment in items:
109
+ with self.subTest(
110
+ array_type=array_type,
111
+ shape=shape,
112
+ dtype=dtype,
113
+ alignment=alignment,
114
+ ):
115
+
116
+ @cuda.jit
117
+ def f(loc, shrd, which):
118
+ i = cuda.grid(1)
119
+ if which == 0:
120
+ local_array = cuda.local.array(
121
+ shape=shape,
122
+ dtype=dtype,
123
+ alignment=alignment,
124
+ )
125
+ if i == 0:
126
+ loc[0] = local_array.ctypes.data
127
+ else:
128
+ shared_array = cuda.shared.array(
129
+ shape=shape,
130
+ dtype=dtype,
131
+ alignment=alignment,
132
+ )
133
+ if i == 0:
134
+ shrd[0] = shared_array.ctypes.data
135
+
136
+ loc = np.zeros(1, dtype=np.uint64)
137
+ shrd = np.zeros(1, dtype=np.uint64)
138
+ f[1, 1](loc, shrd, which)
139
+
140
+ kernel = f.overloads[f.signatures[0]]
141
+ llvm_ir = kernel.inspect_llvm()
142
+
143
+ if alignment is None:
144
+ if which == 0:
145
+ # Local memory shouldn't have any alignment information
146
+ # when no alignment is specified.
147
+ match = LMEM_PATTERN.findall(llvm_ir)
148
+ self.assertEqual(len(match), 0)
149
+ else:
150
+ # Shared memory should at least have a power-of-two
151
+ # alignment when no alignment is specified.
152
+ match = SMEM_PATTERN.findall(llvm_ir)
153
+ self.assertEqual(len(match), 1)
154
+
155
+ alignment = int(match[0])
156
+ # Verify alignment is a power of two.
157
+ self.assertTrue(alignment & (alignment - 1) == 0)
158
+ else:
159
+ # Verify alignment is in the LLVM IR.
160
+ if which == 0:
161
+ match = LMEM_PATTERN.findall(llvm_ir)
162
+ self.assertEqual(len(match), 1)
163
+ actual_alignment = int(match[0])
164
+ self.assertEqual(alignment, actual_alignment)
165
+ else:
166
+ match = SMEM_PATTERN.findall(llvm_ir)
167
+ self.assertEqual(len(match), 1)
168
+ actual_alignment = int(match[0])
169
+ self.assertEqual(alignment, actual_alignment)
170
+
171
+ # Also verify that the address of the array is aligned.
172
+ # If this fails, there problem is likely with NVVM.
173
+ address = loc[0] if which == 0 else shrd[0]
174
+ alignment_mod = int(address % alignment)
175
+ self.assertEqual(alignment_mod, 0)
176
+
177
+ if NOISY:
178
+ print(".", end="", flush=True)
179
+
180
+ def test_invalid_aligments(self):
181
+ shapes = (1, 50)
182
+ dtypes = (np.uint8, np.uint64)
183
+ invalid_alignment_values = (-1, 0, 3, 17, 33)
184
+ invalid_alignment_types = ("1.0", "1", "foo", 1.0, 1.5, 3.2)
185
+ alignments = invalid_alignment_values + invalid_alignment_types
186
+ array_types = [(0, "local"), (1, "shared")]
187
+
188
+ # Use regex pattern to match error message, handling potential ANSI
189
+ # color codes which appear on CI.
190
+ expected_invalid_type_error_regex = (
191
+ r"RequireLiteralValue:.*alignment must be a constant integer"
192
+ )
193
+
194
+ items = itertools.product(array_types, shapes, dtypes, alignments)
195
+
196
+ for (which, array_type), shape, dtype, alignment in items:
197
+ with self.subTest(
198
+ array_type=array_type,
199
+ shape=shape,
200
+ dtype=dtype,
201
+ alignment=alignment,
202
+ ):
203
+ if which == 0:
204
+
205
+ @cuda.jit
206
+ def f(dest_array):
207
+ i = cuda.grid(1)
208
+ local_array = cuda.local.array(
209
+ shape=shape,
210
+ dtype=dtype,
211
+ alignment=alignment,
212
+ )
213
+ if i == 0:
214
+ dest_array[0] = local_array.ctypes.data
215
+ else:
216
+
217
+ @cuda.jit
218
+ def f(dest_array):
219
+ i = cuda.grid(1)
220
+ shared_array = cuda.shared.array(
221
+ shape=shape,
222
+ dtype=dtype,
223
+ alignment=alignment,
224
+ )
225
+ if i == 0:
226
+ dest_array[0] = shared_array.ctypes.data
227
+
228
+ array = np.zeros(1, dtype=np.uint64)
229
+
230
+ # The type of error we expect differs between an invalid value
231
+ # that is still an int, and an invalid type.
232
+ if isinstance(alignment, int):
233
+ self.assertRaisesRegex(
234
+ ValueError, r"Alignment must be.*", f[1, 1], array
235
+ )
236
+ else:
237
+ self.assertRaisesRegex(
238
+ TypingError,
239
+ expected_invalid_type_error_regex,
240
+ f[1, 1],
241
+ array,
242
+ )
243
+
244
+ if NOISY:
245
+ print(".", end="", flush=True)
246
+
247
+
248
+ @skip_unless_cudasim("Only check for alignment unsupported in the simulator")
249
+ class TestCudasimUnsupportedAlignment(CUDATestCase):
250
+ def test_local_unsupported(self):
251
+ @cuda.jit
252
+ def f():
253
+ cuda.local.array(1, dtype=np.uint8, alignment=16)
254
+
255
+ with self.assertRaisesRegex(RuntimeError, "not supported in cudasim"):
256
+ f[1, 1]()
257
+
258
+ def test_shared_unsupported(self):
259
+ @cuda.jit
260
+ def f():
261
+ cuda.shared.array(1, dtype=np.uint8, alignment=16)
262
+
263
+ with self.assertRaisesRegex(RuntimeError, "not supported in cudasim"):
264
+ f[1, 1]()
265
+
266
+
267
+ if __name__ == "__main__":
268
+ unittest.main()