numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,457 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from numba import cuda
5
+ from numba.cuda import int32, float64, void
6
+ from numba.cuda import HAS_NUMBA
7
+
8
+ if HAS_NUMBA:
9
+ from numba.core.errors import TypingError as NumbaTypingError
10
+ from numba.cuda.core.errors import TypingError
11
+ from numba.cuda import types
12
+ from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
13
+
14
+ import numpy as np
15
+ from numba.cuda.np import numpy_support as nps
16
+
17
+ from .extensions_usecases import struct_model_type, MyStruct
18
+
19
+ recordwith2darray = np.dtype([("i", np.int32), ("j", np.float32, (3, 2))])
20
+
21
+
22
+ class TestSharedMemoryIssue(CUDATestCase):
23
+ def test_issue_953_sm_linkage_conflict(self):
24
+ @cuda.jit(device=True)
25
+ def inner():
26
+ inner_arr = cuda.shared.array(1, dtype=int32) # noqa: F841
27
+
28
+ @cuda.jit
29
+ def outer():
30
+ outer_arr = cuda.shared.array(1, dtype=int32) # noqa: F841
31
+ inner()
32
+
33
+ outer[1, 1]()
34
+
35
+ def _check_shared_array_size(self, shape, expected):
36
+ @cuda.jit
37
+ def s(a):
38
+ arr = cuda.shared.array(shape, dtype=int32)
39
+ a[0] = arr.size
40
+
41
+ result = np.zeros(1, dtype=np.int32)
42
+ s[1, 1](result)
43
+ self.assertEqual(result[0], expected)
44
+
45
+ def test_issue_1051_shared_size_broken_1d(self):
46
+ self._check_shared_array_size(2, 2)
47
+
48
+ def test_issue_1051_shared_size_broken_2d(self):
49
+ self._check_shared_array_size((2, 3), 6)
50
+
51
+ def test_issue_1051_shared_size_broken_3d(self):
52
+ self._check_shared_array_size((2, 3, 4), 24)
53
+
54
+ def _check_shared_array_size_fp16(self, shape, expected, ty):
55
+ @cuda.jit
56
+ def s(a):
57
+ arr = cuda.shared.array(shape, dtype=ty)
58
+ a[0] = arr.size
59
+
60
+ result = np.zeros(1, dtype=np.float16)
61
+ s[1, 1](result)
62
+ self.assertEqual(result[0], expected)
63
+
64
+ def test_issue_fp16_support(self):
65
+ self._check_shared_array_size_fp16(2, 2, types.float16)
66
+ self._check_shared_array_size_fp16(2, 2, np.float16)
67
+
68
+ def test_issue_2393(self):
69
+ """
70
+ Test issue of warp misalign address due to nvvm not knowing the
71
+ alignment(? but it should have taken the natural alignment of the type)
72
+ """
73
+ num_weights = 2
74
+ num_blocks = 48
75
+ examples_per_block = 4
76
+ threads_per_block = 1
77
+
78
+ @cuda.jit
79
+ def costs_func(d_block_costs):
80
+ s_features = cuda.shared.array(
81
+ (examples_per_block, num_weights), float64
82
+ )
83
+ s_initialcost = cuda.shared.array(7, float64) # Bug
84
+
85
+ threadIdx = cuda.threadIdx.x
86
+
87
+ prediction = 0
88
+ for j in range(num_weights):
89
+ prediction += s_features[threadIdx, j]
90
+
91
+ d_block_costs[0] = s_initialcost[0] + prediction
92
+
93
+ block_costs = np.zeros(num_blocks, dtype=np.float64)
94
+ d_block_costs = cuda.to_device(block_costs)
95
+
96
+ costs_func[num_blocks, threads_per_block](d_block_costs)
97
+
98
+ cuda.synchronize()
99
+
100
+
101
+ class TestSharedMemory(CUDATestCase):
102
+ def _test_shared(self, arr):
103
+ # Use a kernel that copies via shared memory to check loading and
104
+ # storing different dtypes with shared memory. All threads in a block
105
+ # collaborate to load in values, then the output values are written
106
+ # only by the first thread in the block after synchronization.
107
+
108
+ nelem = len(arr)
109
+ nthreads = 16
110
+ nblocks = int(nelem / nthreads)
111
+ dt = nps.from_dtype(arr.dtype)
112
+
113
+ @cuda.jit
114
+ def use_sm_chunk_copy(x, y):
115
+ sm = cuda.shared.array(nthreads, dtype=dt)
116
+
117
+ tx = cuda.threadIdx.x
118
+ bx = cuda.blockIdx.x
119
+ bd = cuda.blockDim.x
120
+
121
+ # Load this block's chunk into shared
122
+ i = bx * bd + tx
123
+ if i < len(x):
124
+ sm[tx] = x[i]
125
+
126
+ cuda.syncthreads()
127
+
128
+ # One thread per block writes this block's chunk
129
+ if tx == 0:
130
+ for j in range(nthreads):
131
+ y[bd * bx + j] = sm[j]
132
+
133
+ d_result = cuda.device_array_like(arr)
134
+ use_sm_chunk_copy[nblocks, nthreads](arr, d_result)
135
+ host_result = d_result.copy_to_host()
136
+ np.testing.assert_array_equal(arr, host_result)
137
+
138
+ def test_shared_recarray(self):
139
+ arr = np.recarray(128, dtype=recordwith2darray)
140
+ for x in range(len(arr)):
141
+ arr[x].i = x
142
+ j = np.arange(3 * 2, dtype=np.float32)
143
+ arr[x].j = j.reshape(3, 2) * x
144
+
145
+ self._test_shared(arr)
146
+
147
+ def test_shared_bool(self):
148
+ arr = np.random.randint(2, size=(1024,), dtype=np.bool_)
149
+ self._test_shared(arr)
150
+
151
+ def _test_dynshared_slice(self, func, arr, expected):
152
+ # Check that slices of shared memory are correct
153
+ # (See Bug #5073 - prior to the addition of these tests and
154
+ # corresponding fix, slices of dynamic shared arrays all aliased each
155
+ # other)
156
+ nshared = arr.size * arr.dtype.itemsize
157
+ func[1, 1, 0, nshared](arr)
158
+ np.testing.assert_array_equal(expected, arr)
159
+
160
+ def test_dynshared_slice_write(self):
161
+ # Test writing values into disjoint slices of dynamic shared memory
162
+ @cuda.jit
163
+ def slice_write(x):
164
+ dynsmem = cuda.shared.array(0, dtype=int32)
165
+ sm1 = dynsmem[0:1]
166
+ sm2 = dynsmem[1:2]
167
+
168
+ sm1[0] = 1
169
+ sm2[0] = 2
170
+ x[0] = dynsmem[0]
171
+ x[1] = dynsmem[1]
172
+
173
+ arr = np.zeros(2, dtype=np.int32)
174
+ expected = np.array([1, 2], dtype=np.int32)
175
+ self._test_dynshared_slice(slice_write, arr, expected)
176
+
177
+ def test_dynshared_slice_read(self):
178
+ # Test reading values from disjoint slices of dynamic shared memory
179
+ @cuda.jit
180
+ def slice_read(x):
181
+ dynsmem = cuda.shared.array(0, dtype=int32)
182
+ sm1 = dynsmem[0:1]
183
+ sm2 = dynsmem[1:2]
184
+
185
+ dynsmem[0] = 1
186
+ dynsmem[1] = 2
187
+ x[0] = sm1[0]
188
+ x[1] = sm2[0]
189
+
190
+ arr = np.zeros(2, dtype=np.int32)
191
+ expected = np.array([1, 2], dtype=np.int32)
192
+ self._test_dynshared_slice(slice_read, arr, expected)
193
+
194
+ def test_dynshared_slice_diff_sizes(self):
195
+ # Test reading values from disjoint slices of dynamic shared memory
196
+ # with different sizes
197
+ @cuda.jit
198
+ def slice_diff_sizes(x):
199
+ dynsmem = cuda.shared.array(0, dtype=int32)
200
+ sm1 = dynsmem[0:1]
201
+ sm2 = dynsmem[1:3]
202
+
203
+ dynsmem[0] = 1
204
+ dynsmem[1] = 2
205
+ dynsmem[2] = 3
206
+ x[0] = sm1[0]
207
+ x[1] = sm2[0]
208
+ x[2] = sm2[1]
209
+
210
+ arr = np.zeros(3, dtype=np.int32)
211
+ expected = np.array([1, 2, 3], dtype=np.int32)
212
+ self._test_dynshared_slice(slice_diff_sizes, arr, expected)
213
+
214
+ def test_dynshared_slice_overlap(self):
215
+ # Test reading values from overlapping slices of dynamic shared memory
216
+ @cuda.jit
217
+ def slice_overlap(x):
218
+ dynsmem = cuda.shared.array(0, dtype=int32)
219
+ sm1 = dynsmem[0:2]
220
+ sm2 = dynsmem[1:4]
221
+
222
+ dynsmem[0] = 1
223
+ dynsmem[1] = 2
224
+ dynsmem[2] = 3
225
+ dynsmem[3] = 4
226
+ x[0] = sm1[0]
227
+ x[1] = sm1[1]
228
+ x[2] = sm2[0]
229
+ x[3] = sm2[1]
230
+ x[4] = sm2[2]
231
+
232
+ arr = np.zeros(5, dtype=np.int32)
233
+ expected = np.array([1, 2, 2, 3, 4], dtype=np.int32)
234
+ self._test_dynshared_slice(slice_overlap, arr, expected)
235
+
236
+ def test_dynshared_slice_gaps(self):
237
+ # Test writing values to slices of dynamic shared memory doesn't write
238
+ # outside the slice
239
+ @cuda.jit
240
+ def slice_gaps(x):
241
+ dynsmem = cuda.shared.array(0, dtype=int32)
242
+ sm1 = dynsmem[1:3]
243
+ sm2 = dynsmem[4:6]
244
+
245
+ # Initial values for dynamic shared memory, some to be overwritten
246
+ dynsmem[0] = 99
247
+ dynsmem[1] = 99
248
+ dynsmem[2] = 99
249
+ dynsmem[3] = 99
250
+ dynsmem[4] = 99
251
+ dynsmem[5] = 99
252
+ dynsmem[6] = 99
253
+
254
+ sm1[0] = 1
255
+ sm1[1] = 2
256
+ sm2[0] = 3
257
+ sm2[1] = 4
258
+
259
+ x[0] = dynsmem[0]
260
+ x[1] = dynsmem[1]
261
+ x[2] = dynsmem[2]
262
+ x[3] = dynsmem[3]
263
+ x[4] = dynsmem[4]
264
+ x[5] = dynsmem[5]
265
+ x[6] = dynsmem[6]
266
+
267
+ arr = np.zeros(7, dtype=np.int32)
268
+ expected = np.array([99, 1, 2, 99, 3, 4, 99], dtype=np.int32)
269
+ self._test_dynshared_slice(slice_gaps, arr, expected)
270
+
271
+ def test_dynshared_slice_write_backwards(self):
272
+ # Test writing values into disjoint slices of dynamic shared memory
273
+ # with negative steps
274
+ @cuda.jit
275
+ def slice_write_backwards(x):
276
+ dynsmem = cuda.shared.array(0, dtype=int32)
277
+ sm1 = dynsmem[1::-1]
278
+ sm2 = dynsmem[3:1:-1]
279
+
280
+ sm1[0] = 1
281
+ sm1[1] = 2
282
+ sm2[0] = 3
283
+ sm2[1] = 4
284
+ x[0] = dynsmem[0]
285
+ x[1] = dynsmem[1]
286
+ x[2] = dynsmem[2]
287
+ x[3] = dynsmem[3]
288
+
289
+ arr = np.zeros(4, dtype=np.int32)
290
+ expected = np.array([2, 1, 4, 3], dtype=np.int32)
291
+ self._test_dynshared_slice(slice_write_backwards, arr, expected)
292
+
293
+ def test_dynshared_slice_nonunit_stride(self):
294
+ # Test writing values into slice of dynamic shared memory with
295
+ # non-unit stride
296
+ @cuda.jit
297
+ def slice_nonunit_stride(x):
298
+ dynsmem = cuda.shared.array(0, dtype=int32)
299
+ sm1 = dynsmem[::2]
300
+
301
+ # Initial values for dynamic shared memory, some to be overwritten
302
+ dynsmem[0] = 99
303
+ dynsmem[1] = 99
304
+ dynsmem[2] = 99
305
+ dynsmem[3] = 99
306
+ dynsmem[4] = 99
307
+ dynsmem[5] = 99
308
+
309
+ sm1[0] = 1
310
+ sm1[1] = 2
311
+ sm1[2] = 3
312
+
313
+ x[0] = dynsmem[0]
314
+ x[1] = dynsmem[1]
315
+ x[2] = dynsmem[2]
316
+ x[3] = dynsmem[3]
317
+ x[4] = dynsmem[4]
318
+ x[5] = dynsmem[5]
319
+
320
+ arr = np.zeros(6, dtype=np.int32)
321
+ expected = np.array([1, 99, 2, 99, 3, 99], dtype=np.int32)
322
+ self._test_dynshared_slice(slice_nonunit_stride, arr, expected)
323
+
324
+ def test_dynshared_slice_nonunit_reverse_stride(self):
325
+ # Test writing values into slice of dynamic shared memory with
326
+ # reverse non-unit stride
327
+ @cuda.jit
328
+ def slice_nonunit_reverse_stride(x):
329
+ dynsmem = cuda.shared.array(0, dtype=int32)
330
+ sm1 = dynsmem[-1::-2]
331
+
332
+ # Initial values for dynamic shared memory, some to be overwritten
333
+ dynsmem[0] = 99
334
+ dynsmem[1] = 99
335
+ dynsmem[2] = 99
336
+ dynsmem[3] = 99
337
+ dynsmem[4] = 99
338
+ dynsmem[5] = 99
339
+
340
+ sm1[0] = 1
341
+ sm1[1] = 2
342
+ sm1[2] = 3
343
+
344
+ x[0] = dynsmem[0]
345
+ x[1] = dynsmem[1]
346
+ x[2] = dynsmem[2]
347
+ x[3] = dynsmem[3]
348
+ x[4] = dynsmem[4]
349
+ x[5] = dynsmem[5]
350
+
351
+ arr = np.zeros(6, dtype=np.int32)
352
+ expected = np.array([99, 3, 99, 2, 99, 1], dtype=np.int32)
353
+ self._test_dynshared_slice(slice_nonunit_reverse_stride, arr, expected)
354
+
355
+ def test_issue_5073(self):
356
+ # An example with which Bug #5073 (slices of dynamic shared memory all
357
+ # alias) was discovered. The kernel uses all threads in the block to
358
+ # load values into slices of dynamic shared memory. One thread per
359
+ # block then writes the loaded values back to a global array after
360
+ # syncthreads().
361
+
362
+ arr = np.arange(1024)
363
+ nelem = len(arr)
364
+ nthreads = 16
365
+ nblocks = int(nelem / nthreads)
366
+ dt = nps.from_dtype(arr.dtype)
367
+ nshared = nthreads * arr.dtype.itemsize
368
+ chunksize = int(nthreads / 2)
369
+
370
+ @cuda.jit
371
+ def sm_slice_copy(x, y, chunksize):
372
+ dynsmem = cuda.shared.array(0, dtype=dt)
373
+ sm1 = dynsmem[0:chunksize]
374
+ sm2 = dynsmem[chunksize : chunksize * 2]
375
+
376
+ tx = cuda.threadIdx.x
377
+ bx = cuda.blockIdx.x
378
+ bd = cuda.blockDim.x
379
+
380
+ # load this block's chunk into shared
381
+ i = bx * bd + tx
382
+ if i < len(x):
383
+ if tx < chunksize:
384
+ sm1[tx] = x[i]
385
+ else:
386
+ sm2[tx - chunksize] = x[i]
387
+
388
+ cuda.syncthreads()
389
+
390
+ # one thread per block writes this block's chunk
391
+ if tx == 0:
392
+ for j in range(chunksize):
393
+ y[bd * bx + j] = sm1[j]
394
+ y[bd * bx + j + chunksize] = sm2[j]
395
+
396
+ d_result = cuda.device_array_like(arr)
397
+ sm_slice_copy[nblocks, nthreads, 0, nshared](arr, d_result, chunksize)
398
+ host_result = d_result.copy_to_host()
399
+ np.testing.assert_array_equal(arr, host_result)
400
+
401
+ @skip_on_cudasim("Can't check typing in simulator")
402
+ def test_invalid_array_type(self):
403
+ rgx = ".*Cannot infer the type of variable 'arr'.*"
404
+
405
+ def unsupported_type():
406
+ arr = cuda.shared.array(10, dtype=np.dtype("O")) # noqa: F841
407
+
408
+ with self.assertRaisesRegex(TypingError, rgx):
409
+ cuda.jit(void())(unsupported_type)
410
+
411
+ rgx = ".*Invalid NumPy dtype specified: 'int33'.*"
412
+
413
+ def invalid_string_type():
414
+ arr = cuda.shared.array(10, dtype="int33") # noqa: F841
415
+
416
+ if HAS_NUMBA:
417
+ with self.assertRaisesRegex(NumbaTypingError, rgx):
418
+ cuda.jit(void())(invalid_string_type)
419
+ else:
420
+ with self.assertRaisesRegex(TypingError, rgx):
421
+ cuda.jit(void())(invalid_string_type)
422
+
423
+ @skip_on_cudasim("Struct model array unsupported in simulator")
424
+ def struct_model_type_static(self):
425
+ nthreads = 64
426
+
427
+ @cuda.jit(void(int32[::1], int32[::1]))
428
+ def write_then_reverse_read_static(outx, outy):
429
+ # Test creation
430
+ arr = cuda.shared.array(nthreads, dtype=struct_model_type)
431
+
432
+ i = cuda.grid(1)
433
+ ri = nthreads - i - 1
434
+
435
+ if i < len(outx) and i < len(outy):
436
+ # Test set to arr
437
+ obj = MyStruct(int32(i), int32(i * 2))
438
+ arr[i] = obj
439
+
440
+ cuda.syncthreads()
441
+ # Test get from arr
442
+ outx[i] = arr[ri].x
443
+ outy[i] = arr[ri].y
444
+
445
+ arrx = np.zeros((nthreads,), dtype="int32")
446
+ arry = np.zeros((nthreads,), dtype="int32")
447
+
448
+ write_then_reverse_read_static[1, nthreads](arrx, arry)
449
+
450
+ for i, x in enumerate(arrx):
451
+ self.assertEqual(x, nthreads - i - 1)
452
+ for i, y in enumerate(arry):
453
+ self.assertEqual(y, (nthreads - i - 1) * 2)
454
+
455
+
456
+ if __name__ == "__main__":
457
+ unittest.main()
@@ -0,0 +1,233 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import numpy as np
5
+ from numba import cuda
6
+ from numba.cuda import float32, int32, void
7
+ from numba.cuda import HAS_NUMBA
8
+
9
+ if HAS_NUMBA:
10
+ from numba.core.errors import TypingError
11
+ else:
12
+ from numba.cuda.core.errors import TypingError
13
+ from numba.cuda.testing import unittest, CUDATestCase
14
+ from numba.cuda.testing import skip_on_cudasim
15
+ from .extensions_usecases import struct_model_type
16
+
17
+ GLOBAL_CONSTANT = 5
18
+ GLOBAL_CONSTANT_2 = 6
19
+ GLOBAL_CONSTANT_TUPLE = 5, 6
20
+
21
+
22
+ def udt_global_constants(A):
23
+ sa = cuda.shared.array(shape=GLOBAL_CONSTANT, dtype=float32)
24
+ i = cuda.grid(1)
25
+ A[i] = sa[i]
26
+
27
+
28
+ def udt_global_build_tuple(A):
29
+ sa = cuda.shared.array(
30
+ shape=(GLOBAL_CONSTANT, GLOBAL_CONSTANT_2), dtype=float32
31
+ )
32
+ i, j = cuda.grid(2)
33
+ A[i, j] = sa[i, j]
34
+
35
+
36
+ def udt_global_build_list(A):
37
+ sa = cuda.shared.array(
38
+ shape=[GLOBAL_CONSTANT, GLOBAL_CONSTANT_2], dtype=float32
39
+ )
40
+ i, j = cuda.grid(2)
41
+ A[i, j] = sa[i, j]
42
+
43
+
44
+ def udt_global_constant_tuple(A):
45
+ sa = cuda.shared.array(shape=GLOBAL_CONSTANT_TUPLE, dtype=float32)
46
+ i, j = cuda.grid(2)
47
+ A[i, j] = sa[i, j]
48
+
49
+
50
+ def udt_invalid_1(A):
51
+ sa = cuda.shared.array(shape=A[0], dtype=float32)
52
+ i = cuda.grid(1)
53
+ A[i] = sa[i]
54
+
55
+
56
+ def udt_invalid_2(A):
57
+ sa = cuda.shared.array(shape=(1, A[0]), dtype=float32)
58
+ i, j = cuda.grid(2)
59
+ A[i, j] = sa[i, j]
60
+
61
+
62
+ def udt_invalid_3(A):
63
+ sa = cuda.shared.array(shape=(1, A[0]), dtype=float32)
64
+ i = cuda.grid(1)
65
+ A[i] = sa[i, 0]
66
+
67
+
68
+ class TestSharedMemoryCreation(CUDATestCase):
69
+ def getarg(self):
70
+ return np.array(100, dtype=np.float32, ndmin=1)
71
+
72
+ def getarg2(self):
73
+ return self.getarg().reshape(1, 1)
74
+
75
+ def test_global_constants(self):
76
+ udt = cuda.jit((float32[:],))(udt_global_constants)
77
+ udt[1, 1](self.getarg())
78
+
79
+ def test_global_build_tuple(self):
80
+ udt = cuda.jit((float32[:, :],))(udt_global_build_tuple)
81
+ udt[1, 1](self.getarg2())
82
+
83
+ @skip_on_cudasim("Simulator does not prohibit lists for shared array shape")
84
+ def test_global_build_list(self):
85
+ with self.assertRaises(TypingError) as raises:
86
+ cuda.jit((float32[:, :],))(udt_global_build_list)
87
+
88
+ self.assertIn(
89
+ "No implementation of function Function(<function shared.array",
90
+ str(raises.exception),
91
+ )
92
+ self.assertIn(
93
+ "found for signature:\n \n "
94
+ ">>> array(shape=list(int64)<iv=[5, 6]>, "
95
+ "dtype=class(float32)",
96
+ str(raises.exception),
97
+ )
98
+
99
+ def test_global_constant_tuple(self):
100
+ udt = cuda.jit((float32[:, :],))(udt_global_constant_tuple)
101
+ udt[1, 1](self.getarg2())
102
+
103
+ @skip_on_cudasim("Can't check for constants in simulator")
104
+ def test_invalid_1(self):
105
+ # Scalar shape cannot be a floating point value
106
+ with self.assertRaises(TypingError) as raises:
107
+ cuda.jit((float32[:],))(udt_invalid_1)
108
+
109
+ self.assertIn(
110
+ "No implementation of function Function(<function shared.array",
111
+ str(raises.exception),
112
+ )
113
+ self.assertIn(
114
+ "found for signature:\n \n "
115
+ ">>> array(shape=float32, dtype=class(float32))",
116
+ str(raises.exception),
117
+ )
118
+
119
+ @skip_on_cudasim("Can't check for constants in simulator")
120
+ def test_invalid_2(self):
121
+ # Tuple shape cannot contain a floating point value
122
+ with self.assertRaises(TypingError) as raises:
123
+ cuda.jit((float32[:, :],))(udt_invalid_2)
124
+
125
+ self.assertIn(
126
+ "No implementation of function Function(<function shared.array",
127
+ str(raises.exception),
128
+ )
129
+ self.assertIn(
130
+ "found for signature:\n \n "
131
+ ">>> array(shape=Tuple(Literal[int](1), "
132
+ "array(float32, 1d, A)), dtype=class(float32))",
133
+ str(raises.exception),
134
+ )
135
+
136
+ @skip_on_cudasim("Can't check for constants in simulator")
137
+ def test_invalid_3(self):
138
+ # Scalar shape must be literal
139
+ with self.assertRaises(TypingError) as raises:
140
+ cuda.jit((int32[:],))(udt_invalid_1)
141
+
142
+ self.assertIn(
143
+ "No implementation of function Function(<function shared.array",
144
+ str(raises.exception),
145
+ )
146
+ self.assertIn(
147
+ "found for signature:\n \n "
148
+ ">>> array(shape=int32, dtype=class(float32))",
149
+ str(raises.exception),
150
+ )
151
+
152
+ @skip_on_cudasim("Can't check for constants in simulator")
153
+ def test_invalid_4(self):
154
+ # Tuple shape must contain only literals
155
+ with self.assertRaises(TypingError) as raises:
156
+ cuda.jit((int32[:],))(udt_invalid_3)
157
+
158
+ self.assertIn(
159
+ "No implementation of function Function(<function shared.array",
160
+ str(raises.exception),
161
+ )
162
+ self.assertIn(
163
+ "found for signature:\n \n "
164
+ ">>> array(shape=Tuple(Literal[int](1), int32), "
165
+ "dtype=class(float32))",
166
+ str(raises.exception),
167
+ )
168
+
169
+ def check_dtype(self, f, dtype):
170
+ # Find the typing of the dtype argument to cuda.shared.array
171
+ annotation = next(iter(f.overloads.values()))._type_annotation
172
+ l_dtype = annotation.typemap["s"].dtype
173
+ # Ensure that the typing is correct
174
+ self.assertEqual(l_dtype, dtype)
175
+
176
+ @skip_on_cudasim("Can't check typing in simulator")
177
+ def test_numba_dtype(self):
178
+ # Check that Numba types can be used as the dtype of a shared array
179
+ @cuda.jit(void(int32[::1]))
180
+ def f(x):
181
+ s = cuda.shared.array(10, dtype=int32)
182
+ s[0] = x[0]
183
+ x[0] = s[0]
184
+
185
+ self.check_dtype(f, int32)
186
+
187
+ @skip_on_cudasim("Can't check typing in simulator")
188
+ def test_numpy_dtype(self):
189
+ # Check that NumPy types can be used as the dtype of a shared array
190
+ @cuda.jit(void(int32[::1]))
191
+ def f(x):
192
+ s = cuda.shared.array(10, dtype=np.int32)
193
+ s[0] = x[0]
194
+ x[0] = s[0]
195
+
196
+ self.check_dtype(f, int32)
197
+
198
+ @skip_on_cudasim("Can't check typing in simulator")
199
+ def test_string_dtype(self):
200
+ # Check that strings can be used to specify the dtype of a shared array
201
+ @cuda.jit(void(int32[::1]))
202
+ def f(x):
203
+ s = cuda.shared.array(10, dtype="int32")
204
+ s[0] = x[0]
205
+ x[0] = s[0]
206
+
207
+ self.check_dtype(f, int32)
208
+
209
+ @skip_on_cudasim("Can't check typing in simulator")
210
+ def test_invalid_string_dtype(self):
211
+ # Check that strings of invalid dtypes cause a typing error
212
+ re = ".*Invalid NumPy dtype specified: 'int33'.*"
213
+ with self.assertRaisesRegex(TypingError, re):
214
+
215
+ @cuda.jit(void(int32[::1]))
216
+ def f(x):
217
+ s = cuda.shared.array(10, dtype="int33")
218
+ s[0] = x[0]
219
+ x[0] = s[0]
220
+
221
+ @skip_on_cudasim("Can't check typing in simulator")
222
+ def test_type_with_struct_data_model(self):
223
+ @cuda.jit(void(struct_model_type[::1]))
224
+ def f(x):
225
+ s = cuda.shared.array(10, dtype=struct_model_type)
226
+ s[0] = x[0]
227
+ x[0] = s[0]
228
+
229
+ self.check_dtype(f, struct_model_type)
230
+
231
+
232
+ if __name__ == "__main__":
233
+ unittest.main()