numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,621 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import itertools
5
+ import numpy as np
6
+ from numba.cuda.cudadrv import devicearray
7
+ from numba import cuda
8
+ from numba.cuda.testing import unittest, CUDATestCase
9
+ from numba.cuda.testing import skip_on_cudasim
10
+ from numba.cuda.tests.support import IS_NUMPY_2
11
+
12
+
13
+ class TestCudaNDArray(CUDATestCase):
14
+ def test_device_array_interface(self):
15
+ dary = cuda.device_array(shape=100)
16
+ devicearray.verify_cuda_ndarray_interface(dary)
17
+
18
+ ary = np.empty(100)
19
+ dary = cuda.to_device(ary)
20
+ devicearray.verify_cuda_ndarray_interface(dary)
21
+
22
+ ary = np.asarray(1.234)
23
+ dary = cuda.to_device(ary)
24
+ self.assertEqual(dary.ndim, 0)
25
+ devicearray.verify_cuda_ndarray_interface(dary)
26
+
27
+ def test_device_array_from_readonly(self):
28
+ ary = np.arange(100, dtype=np.float32)
29
+ # Make the array readonly
30
+ ary.flags.writeable = False
31
+ self.assertFalse(ary.flags.writeable)
32
+ # Ensure that we can copy the readonly array
33
+ dary = cuda.to_device(ary)
34
+ retr = dary.copy_to_host()
35
+ np.testing.assert_array_equal(retr, ary)
36
+
37
+ def test_devicearray_dtype(self):
38
+ dary = cuda.device_array(shape=(100,), dtype="f4")
39
+ self.assertEqual(dary.dtype, np.dtype("f4"))
40
+
41
+ def test_devicearray_no_copy(self):
42
+ array = np.arange(100, dtype=np.float32)
43
+ cuda.to_device(array, copy=False)
44
+
45
+ def test_devicearray_shape(self):
46
+ ary = np.arange(2 * 3 * 4).reshape(2, 3, 4)
47
+ dary = cuda.to_device(ary)
48
+ self.assertEqual(ary.shape, dary.shape)
49
+ self.assertEqual(ary.shape[1:], dary.shape[1:])
50
+
51
+ def test_device_array_float(self):
52
+ # Ensure that a float shape raises an TypeError
53
+ with self.assertRaises(TypeError):
54
+ cuda.device_array(shape=1.23)
55
+ with self.assertRaises(TypeError):
56
+ cuda.device_array(shape=np.float64(1.23))
57
+ with self.assertRaises(TypeError):
58
+ cuda.device_array(shape=np.array(1.23))
59
+
60
+ def test_device_array_float_vectors(self):
61
+ # Ensure that np.array, list or tuple inputs with
62
+ # non-ints raise an TypeError
63
+ with self.assertRaises(TypeError):
64
+ cuda.device_array(shape=np.array([1.1]))
65
+ with self.assertRaises(TypeError):
66
+ cuda.device_array(shape=[1.1])
67
+ with self.assertRaises(TypeError):
68
+ cuda.device_array(shape=(1.1,))
69
+ with self.assertRaises(TypeError):
70
+ cuda.device_array(shape=np.array([1.1, 2.2]))
71
+ with self.assertRaises(TypeError):
72
+ cuda.device_array(shape=[1.1, 2.2])
73
+ with self.assertRaises(TypeError):
74
+ cuda.device_array(shape=(1.1, 2.2))
75
+
76
+ def test_device_array_vectors(self):
77
+ # Ensure that np.array or list of inputs with
78
+ # ints still work
79
+ dary = cuda.device_array(shape=np.array([10, 10]), dtype=np.bool)
80
+ self.assertEqual(dary.shape, (10, 10))
81
+ dary = cuda.device_array(shape=[10, 10], dtype=np.bool)
82
+ self.assertEqual(dary.shape, (10, 10))
83
+
84
+ def test_devicearray(self):
85
+ array = np.arange(100, dtype=np.int32)
86
+ original = array.copy()
87
+ gpumem = cuda.to_device(array)
88
+ array[:] = 0
89
+ gpumem.copy_to_host(array)
90
+
91
+ np.testing.assert_array_equal(array, original)
92
+
93
+ def test_stream_bind(self):
94
+ stream = cuda.stream()
95
+ with stream.auto_synchronize():
96
+ arr = cuda.device_array((3, 3), dtype=np.float64, stream=stream)
97
+ self.assertEqual(arr.bind(stream).stream, stream)
98
+ self.assertEqual(arr.stream, stream)
99
+
100
+ def test_len_1d(self):
101
+ ary = np.empty((3,))
102
+ dary = cuda.device_array(3)
103
+ self.assertEqual(len(ary), len(dary))
104
+
105
+ def test_len_2d(self):
106
+ ary = np.empty((3, 5))
107
+ dary = cuda.device_array((3, 5))
108
+ self.assertEqual(len(ary), len(dary))
109
+
110
+ def test_len_3d(self):
111
+ ary = np.empty((3, 5, 7))
112
+ dary = cuda.device_array((3, 5, 7))
113
+ self.assertEqual(len(ary), len(dary))
114
+
115
+ def test_devicearray_partition(self):
116
+ N = 100
117
+ array = np.arange(N, dtype=np.int32)
118
+ original = array.copy()
119
+ gpumem = cuda.to_device(array)
120
+ left, right = gpumem.split(N // 2)
121
+
122
+ array[:] = 0
123
+
124
+ self.assertTrue(np.all(array == 0))
125
+
126
+ right.copy_to_host(array[N // 2 :])
127
+ left.copy_to_host(array[: N // 2])
128
+
129
+ self.assertTrue(np.all(array == original))
130
+
131
+ def test_devicearray_replace(self):
132
+ N = 100
133
+ array = np.arange(N, dtype=np.int32)
134
+ original = array.copy()
135
+ gpumem = cuda.to_device(array)
136
+ cuda.to_device(array * 2, to=gpumem)
137
+ gpumem.copy_to_host(array)
138
+ np.testing.assert_array_equal(array, original * 2)
139
+
140
+ @skip_on_cudasim("This works in the simulator")
141
+ def test_devicearray_transpose_wrongdim(self):
142
+ gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4, 1))
143
+
144
+ with self.assertRaises(NotImplementedError) as e:
145
+ np.transpose(gpumem)
146
+
147
+ self.assertEqual(
148
+ "transposing a non-2D DeviceNDArray isn't supported",
149
+ str(e.exception),
150
+ )
151
+
152
+ def test_devicearray_transpose_identity(self):
153
+ # any-shape identities should work
154
+ original = np.array(np.arange(24)).reshape(3, 4, 2)
155
+ array = np.transpose(
156
+ cuda.to_device(original), axes=(0, 1, 2)
157
+ ).copy_to_host()
158
+ self.assertTrue(np.all(array == original))
159
+
160
+ def test_devicearray_transpose_duplicatedaxis(self):
161
+ gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
162
+
163
+ with self.assertRaises(ValueError) as e:
164
+ np.transpose(gpumem, axes=(0, 0))
165
+
166
+ self.assertIn(
167
+ str(e.exception),
168
+ container=[
169
+ "invalid axes list (0, 0)", # GPU
170
+ "repeated axis in transpose", # sim
171
+ ],
172
+ )
173
+
174
+ def test_devicearray_transpose_wrongaxis(self):
175
+ gpumem = cuda.to_device(np.array(np.arange(12)).reshape(3, 4))
176
+
177
+ with self.assertRaises(ValueError) as e:
178
+ np.transpose(gpumem, axes=(0, 2))
179
+
180
+ self.assertIn(
181
+ str(e.exception),
182
+ container=[
183
+ "invalid axes list (0, 2)", # GPU
184
+ "invalid axis for this array",
185
+ "axis 2 is out of bounds for array of dimension 2", # sim
186
+ ],
187
+ )
188
+
189
+ def test_devicearray_view_ok(self):
190
+ original = np.array(np.arange(12), dtype="i2").reshape(3, 4)
191
+ array = cuda.to_device(original)
192
+ for dtype in ("i4", "u4", "i8", "f8"):
193
+ with self.subTest(dtype=dtype):
194
+ np.testing.assert_array_equal(
195
+ array.view(dtype).copy_to_host(), original.view(dtype)
196
+ )
197
+
198
+ def test_devicearray_view_ok_not_c_contig(self):
199
+ original = np.array(np.arange(32), dtype="i2").reshape(4, 8)
200
+ array = cuda.to_device(original)[:, ::2]
201
+ original = original[:, ::2]
202
+ np.testing.assert_array_equal(
203
+ array.view("u2").copy_to_host(), original.view("u2")
204
+ )
205
+
206
+ def test_devicearray_view_bad_not_c_contig(self):
207
+ original = np.array(np.arange(32), dtype="i2").reshape(4, 8)
208
+ array = cuda.to_device(original)[:, ::2]
209
+ with self.assertRaises(ValueError) as e:
210
+ array.view("i4")
211
+
212
+ msg = str(e.exception)
213
+ self.assertIn("To change to a dtype of a different size,", msg)
214
+
215
+ contiguous_pre_np123 = "the array must be C-contiguous" in msg
216
+ contiguous_post_np123 = "the last axis must be contiguous" in msg
217
+ self.assertTrue(
218
+ contiguous_pre_np123 or contiguous_post_np123,
219
+ "Expected message to mention contiguity",
220
+ )
221
+
222
+ def test_devicearray_view_bad_itemsize(self):
223
+ original = np.array(np.arange(12), dtype="i2").reshape(4, 3)
224
+ array = cuda.to_device(original)
225
+ with self.assertRaises(ValueError) as e:
226
+ array.view("i4")
227
+ self.assertEqual(
228
+ "When changing to a larger dtype,"
229
+ " its size must be a divisor of the total size in bytes"
230
+ " of the last axis of the array.",
231
+ str(e.exception),
232
+ )
233
+
234
+ def test_devicearray_transpose_ok(self):
235
+ original = np.array(np.arange(12)).reshape(3, 4)
236
+ array = np.transpose(cuda.to_device(original)).copy_to_host()
237
+ self.assertTrue(np.all(array == original.T))
238
+
239
+ def test_devicearray_transpose_T(self):
240
+ original = np.array(np.arange(12)).reshape(3, 4)
241
+ array = cuda.to_device(original).T.copy_to_host()
242
+ self.assertTrue(np.all(array == original.T))
243
+
244
+ def test_devicearray_contiguous_slice(self):
245
+ # memcpys are dumb ranges of bytes, so trying to
246
+ # copy to a non-contiguous range shouldn't work!
247
+ a = np.arange(25).reshape(5, 5, order="F")
248
+ s = np.full(fill_value=5, shape=(5,))
249
+
250
+ d = cuda.to_device(a)
251
+ a[2] = s
252
+
253
+ # d is in F-order (not C-order), so d[2] is not contiguous
254
+ # (40-byte strides). This means we can't memcpy to it!
255
+ with self.assertRaises(ValueError) as e:
256
+ d[2].copy_to_device(s)
257
+ self.assertEqual(devicearray.errmsg_contiguous_buffer, str(e.exception))
258
+
259
+ # if d[2].copy_to_device(s), then this would pass:
260
+ # self.assertTrue((a == d.copy_to_host()).all())
261
+
262
+ def _test_devicearray_contiguous_host_copy(self, a_c, a_f):
263
+ """
264
+ Checks host->device memcpys
265
+ """
266
+ self.assertTrue(a_c.flags.c_contiguous)
267
+ self.assertTrue(a_f.flags.f_contiguous)
268
+
269
+ for original, copy in [
270
+ (a_f, a_f),
271
+ (a_f, a_c),
272
+ (a_c, a_f),
273
+ (a_c, a_c),
274
+ ]:
275
+ msg = "%s => %s" % (
276
+ "C" if original.flags.c_contiguous else "F",
277
+ "C" if copy.flags.c_contiguous else "F",
278
+ )
279
+
280
+ d = cuda.to_device(original)
281
+ d.copy_to_device(copy)
282
+ self.assertTrue(np.all(d.copy_to_host() == a_c), msg=msg)
283
+ self.assertTrue(np.all(d.copy_to_host() == a_f), msg=msg)
284
+
285
+ def test_devicearray_contiguous_copy_host_3d(self):
286
+ a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
287
+ a_f = np.array(a_c, order="F")
288
+ self._test_devicearray_contiguous_host_copy(a_c, a_f)
289
+
290
+ def test_devicearray_contiguous_copy_host_1d(self):
291
+ a_c = np.arange(5)
292
+ a_f = np.array(a_c, order="F")
293
+ self._test_devicearray_contiguous_host_copy(a_c, a_f)
294
+
295
+ def test_devicearray_contiguous_copy_device(self):
296
+ a_c = np.arange(5 * 5 * 5).reshape(5, 5, 5)
297
+ a_f = np.array(a_c, order="F")
298
+ self.assertTrue(a_c.flags.c_contiguous)
299
+ self.assertTrue(a_f.flags.f_contiguous)
300
+
301
+ d = cuda.to_device(a_c)
302
+
303
+ with self.assertRaises(ValueError) as e:
304
+ d.copy_to_device(cuda.to_device(a_f))
305
+ self.assertEqual(
306
+ "incompatible strides: {} vs. {}".format(a_c.strides, a_f.strides),
307
+ str(e.exception),
308
+ )
309
+
310
+ d.copy_to_device(cuda.to_device(a_c))
311
+ self.assertTrue(np.all(d.copy_to_host() == a_c))
312
+
313
+ d = cuda.to_device(a_f)
314
+
315
+ with self.assertRaises(ValueError) as e:
316
+ d.copy_to_device(cuda.to_device(a_c))
317
+ self.assertEqual(
318
+ "incompatible strides: {} vs. {}".format(a_f.strides, a_c.strides),
319
+ str(e.exception),
320
+ )
321
+
322
+ d.copy_to_device(cuda.to_device(a_f))
323
+ self.assertTrue(np.all(d.copy_to_host() == a_f))
324
+
325
+ def test_devicearray_broadcast_host_copy(self):
326
+ broadsize = 4
327
+ coreshape = (2, 3)
328
+ coresize = np.prod(coreshape)
329
+ core_c = np.arange(coresize).reshape(coreshape, order="C")
330
+ core_f = np.arange(coresize).reshape(coreshape, order="F")
331
+ for dim in range(len(coreshape)):
332
+ newindex = (slice(None),) * dim + (np.newaxis,)
333
+ broadshape = coreshape[:dim] + (broadsize,) + coreshape[dim:]
334
+ broad_c = np.broadcast_to(core_c[newindex], broadshape)
335
+ broad_f = np.broadcast_to(core_f[newindex], broadshape)
336
+ dbroad_c = cuda.to_device(broad_c)
337
+ dbroad_f = cuda.to_device(broad_f)
338
+ np.testing.assert_array_equal(dbroad_c.copy_to_host(), broad_c)
339
+ np.testing.assert_array_equal(dbroad_f.copy_to_host(), broad_f)
340
+ # Also test copying across different core orderings
341
+ dbroad_c.copy_to_device(broad_f)
342
+ dbroad_f.copy_to_device(broad_c)
343
+ np.testing.assert_array_equal(dbroad_c.copy_to_host(), broad_f)
344
+ np.testing.assert_array_equal(dbroad_f.copy_to_host(), broad_c)
345
+
346
+ def test_devicearray_contiguous_host_strided(self):
347
+ a_c = np.arange(10)
348
+ d = cuda.to_device(a_c)
349
+ arr = np.arange(20)[::2]
350
+ d.copy_to_device(arr)
351
+ np.testing.assert_array_equal(d.copy_to_host(), arr)
352
+
353
+ def test_devicearray_contiguous_device_strided(self):
354
+ d = cuda.to_device(np.arange(20))
355
+ arr = np.arange(20)
356
+
357
+ with self.assertRaises(ValueError) as e:
358
+ d.copy_to_device(cuda.to_device(arr)[::2])
359
+ self.assertEqual(devicearray.errmsg_contiguous_buffer, str(e.exception))
360
+
361
+ @skip_on_cudasim("DeviceNDArray class not present in simulator")
362
+ def test_devicearray_relaxed_strides(self):
363
+ # From the reproducer in Issue #6824.
364
+
365
+ # Construct a device array that is contiguous even though
366
+ # the strides for the first axis (800) are not equal to
367
+ # the strides * size (10 * 8 = 80) for the previous axis,
368
+ # because the first axis size is 1.
369
+ arr = devicearray.DeviceNDArray((1, 10), (800, 8), np.float64)
370
+
371
+ # Ensure we still believe the array to be contiguous because
372
+ # strides checking is relaxed.
373
+ self.assertTrue(arr.flags["C_CONTIGUOUS"])
374
+ self.assertTrue(arr.flags["F_CONTIGUOUS"])
375
+
376
+ def test_c_f_contiguity_matches_numpy(self):
377
+ # From the reproducer in Issue #4943.
378
+
379
+ shapes = ((1, 4), (4, 1))
380
+ orders = ("C", "F")
381
+
382
+ for shape, order in itertools.product(shapes, orders):
383
+ arr = np.ndarray(shape, order=order)
384
+ d_arr = cuda.to_device(arr)
385
+ self.assertEqual(
386
+ arr.flags["C_CONTIGUOUS"], d_arr.flags["C_CONTIGUOUS"]
387
+ )
388
+ self.assertEqual(
389
+ arr.flags["F_CONTIGUOUS"], d_arr.flags["F_CONTIGUOUS"]
390
+ )
391
+
392
+ @skip_on_cudasim("Typing not done in the simulator")
393
+ def test_devicearray_typing_order_simple_c(self):
394
+ # C-order 1D array
395
+ a = np.zeros(10, order="C")
396
+ d = cuda.to_device(a)
397
+ self.assertEqual(d._numba_type_.layout, "C")
398
+
399
+ @skip_on_cudasim("Typing not done in the simulator")
400
+ def test_devicearray_typing_order_simple_f(self):
401
+ # F-order array that is also C layout.
402
+ a = np.zeros(10, order="F")
403
+ d = cuda.to_device(a)
404
+ self.assertEqual(d._numba_type_.layout, "C")
405
+
406
+ @skip_on_cudasim("Typing not done in the simulator")
407
+ def test_devicearray_typing_order_2d_c(self):
408
+ # C-order 2D array
409
+ a = np.zeros((2, 10), order="C")
410
+ d = cuda.to_device(a)
411
+ self.assertEqual(d._numba_type_.layout, "C")
412
+
413
+ @skip_on_cudasim("Typing not done in the simulator")
414
+ def test_devicearray_typing_order_2d_f(self):
415
+ # F-order array that can only be F layout
416
+ a = np.zeros((2, 10), order="F")
417
+ d = cuda.to_device(a)
418
+ self.assertEqual(d._numba_type_.layout, "F")
419
+
420
+ @skip_on_cudasim("Typing not done in the simulator")
421
+ def test_devicearray_typing_order_noncontig_slice_c(self):
422
+ # Non-contiguous slice of C-order array
423
+ a = np.zeros((5, 5), order="C")
424
+ d = cuda.to_device(a)[:, 2]
425
+ self.assertEqual(d._numba_type_.layout, "A")
426
+
427
+ @skip_on_cudasim("Typing not done in the simulator")
428
+ def test_devicearray_typing_order_noncontig_slice_f(self):
429
+ # Non-contiguous slice of F-order array
430
+ a = np.zeros((5, 5), order="F")
431
+ d = cuda.to_device(a)[2, :]
432
+ self.assertEqual(d._numba_type_.layout, "A")
433
+
434
+ @skip_on_cudasim("Typing not done in the simulator")
435
+ def test_devicearray_typing_order_contig_slice_c(self):
436
+ # Contiguous slice of C-order array
437
+ a = np.zeros((5, 5), order="C")
438
+ d = cuda.to_device(a)[2, :]
439
+ self.assertEqual(d._numba_type_.layout, "C")
440
+
441
+ @skip_on_cudasim("Typing not done in the simulator")
442
+ def test_devicearray_typing_order_contig_slice_f(self):
443
+ # Contiguous slice of F-order array - is both C- and F-contiguous, so
444
+ # types as 'C' layout
445
+ a = np.zeros((5, 5), order="F")
446
+ d = cuda.to_device(a)[:, 2]
447
+ self.assertEqual(d._numba_type_.layout, "C")
448
+
449
+ @skip_on_cudasim("Typing not done in the simulator")
450
+ def test_devicearray_typing_order_broadcasted(self):
451
+ # Broadcasted array, similar to that used for passing scalars to ufuncs
452
+ a = np.broadcast_to(np.array([1]), (10,))
453
+ d = cuda.to_device(a)
454
+ self.assertEqual(d._numba_type_.layout, "A")
455
+
456
+ def test_bug6697(self):
457
+ ary = np.arange(10, dtype=np.int16)
458
+ dary = cuda.to_device(ary)
459
+ got = np.asarray(dary)
460
+ self.assertEqual(got.dtype, dary.dtype)
461
+
462
+ @skip_on_cudasim("DeviceNDArray class not present in simulator")
463
+ def test_issue_8477(self):
464
+ # Ensure that we can copy a zero-length device array to a zero-length
465
+ # host array when the strides of the device and host arrays differ -
466
+ # this should be possible because the strides are irrelevant when the
467
+ # length is zero. For more info see
468
+ # https://github.com/numba/numba/issues/8477.
469
+
470
+ # Create a device array with shape (0,) and strides (8,)
471
+ dev_array = devicearray.DeviceNDArray(
472
+ shape=(0,), strides=(8,), dtype=np.int8
473
+ )
474
+
475
+ # Create a host array with shape (0,) and strides (0,)
476
+ host_array = np.ndarray(shape=(0,), strides=(0,), dtype=np.int8)
477
+
478
+ # Sanity check for this test - ensure our destination has the strides
479
+ # we expect, because strides can be ignored in some cases by the
480
+ # ndarray constructor - checking here ensures that we haven't failed to
481
+ # account for unexpected behaviour across different versions of NumPy
482
+ self.assertEqual(host_array.strides, (0,))
483
+
484
+ # Ensure that the copy succeeds in both directions
485
+ dev_array.copy_to_host(host_array)
486
+ dev_array.copy_to_device(host_array)
487
+
488
+ # Ensure that a device-to-device copy also succeeds when the strides
489
+ # differ - one way of doing this is to copy the host array across and
490
+ # use that for copies in both directions.
491
+ dev_array_from_host = cuda.to_device(host_array)
492
+ self.assertEqual(dev_array_from_host.shape, (0,))
493
+ self.assertEqual(dev_array_from_host.strides, (0,))
494
+
495
+ dev_array.copy_to_device(dev_array_from_host)
496
+ dev_array_from_host.copy_to_device(dev_array)
497
+
498
+
499
+ class TestArrayMethod(CUDATestCase):
500
+ """Tests of the __array__() method via np.array"""
501
+
502
+ def test_np_array(self):
503
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
504
+ host_array = np.array(dev_array)
505
+ np.testing.assert_equal(dev_array.copy_to_host(), host_array)
506
+
507
+ def test_np_array_dtype(self):
508
+ dtype = np.int32
509
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
510
+ host_array = np.array(dev_array, dtype=dtype)
511
+ np.testing.assert_equal(
512
+ host_array, dev_array.copy_to_host().astype(dtype)
513
+ )
514
+
515
+ @skip_on_cudasim("Simulator does not use __array__()")
516
+ @unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
517
+ def test_np_array_copy_false(self):
518
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
519
+ with self.assertRaisesRegex(ValueError, "`copy=False` is not"):
520
+ np.array(dev_array, copy=False)
521
+
522
+ @skip_on_cudasim("Simulator does not use __array__()")
523
+ @unittest.skipUnless(IS_NUMPY_2, "NumPy 1.x does not pass copy kwarg")
524
+ def test_np_array_copy_true(self):
525
+ dev_array = cuda.to_device(np.asarray([1.0, 2.0, 3.0]))
526
+ host_array = np.array(dev_array)
527
+ np.testing.assert_equal(dev_array.copy_to_host(), host_array)
528
+
529
+
530
+ class TestRecarray(CUDATestCase):
531
+ def test_recarray(self):
532
+ # From issue #4111
533
+ a = np.recarray(
534
+ (16,),
535
+ dtype=[
536
+ ("value1", np.int64),
537
+ ("value2", np.float64),
538
+ ],
539
+ )
540
+ a.value1 = np.arange(a.size, dtype=np.int64)
541
+ a.value2 = np.arange(a.size, dtype=np.float64) / 100
542
+
543
+ expect1 = a.value1
544
+ expect2 = a.value2
545
+
546
+ def test(x, out1, out2):
547
+ i = cuda.grid(1)
548
+ if i < x.size:
549
+ out1[i] = x.value1[i]
550
+ out2[i] = x.value2[i]
551
+
552
+ got1 = np.zeros_like(expect1)
553
+ got2 = np.zeros_like(expect2)
554
+ cuda.jit(test)[1, a.size](a, got1, got2)
555
+
556
+ np.testing.assert_array_equal(expect1, got1)
557
+ np.testing.assert_array_equal(expect2, got2)
558
+
559
+
560
+ class TestCoreContiguous(CUDATestCase):
561
+ def _test_against_array_core(self, view):
562
+ self.assertEqual(
563
+ devicearray.is_contiguous(view),
564
+ devicearray.array_core(view).flags["C_CONTIGUOUS"],
565
+ )
566
+
567
+ def test_device_array_like_1d(self):
568
+ d_a = cuda.device_array(10, order="C")
569
+ self._test_against_array_core(d_a)
570
+
571
+ def test_device_array_like_2d(self):
572
+ d_a = cuda.device_array((10, 12), order="C")
573
+ self._test_against_array_core(d_a)
574
+
575
+ def test_device_array_like_2d_transpose(self):
576
+ d_a = cuda.device_array((10, 12), order="C")
577
+ self._test_against_array_core(d_a.T)
578
+
579
+ def test_device_array_like_3d(self):
580
+ d_a = cuda.device_array((10, 12, 14), order="C")
581
+ self._test_against_array_core(d_a)
582
+
583
+ def test_device_array_like_1d_f(self):
584
+ d_a = cuda.device_array(10, order="F")
585
+ self._test_against_array_core(d_a)
586
+
587
+ def test_device_array_like_2d_f(self):
588
+ d_a = cuda.device_array((10, 12), order="F")
589
+ self._test_against_array_core(d_a)
590
+
591
+ def test_device_array_like_2d_f_transpose(self):
592
+ d_a = cuda.device_array((10, 12), order="F")
593
+ self._test_against_array_core(d_a.T)
594
+
595
+ def test_device_array_like_3d_f(self):
596
+ d_a = cuda.device_array((10, 12, 14), order="F")
597
+ self._test_against_array_core(d_a)
598
+
599
+ def test_1d_view(self):
600
+ shape = 10
601
+ view = np.zeros(shape)[::2]
602
+ self._test_against_array_core(view)
603
+
604
+ def test_1d_view_f(self):
605
+ shape = 10
606
+ view = np.zeros(shape, order="F")[::2]
607
+ self._test_against_array_core(view)
608
+
609
+ def test_2d_view(self):
610
+ shape = (10, 12)
611
+ view = np.zeros(shape)[::2, ::2]
612
+ self._test_against_array_core(view)
613
+
614
+ def test_2d_view_f(self):
615
+ shape = (10, 12)
616
+ view = np.zeros(shape, order="F")[::2, ::2]
617
+ self._test_against_array_core(view)
618
+
619
+
620
+ if __name__ == "__main__":
621
+ unittest.main()