numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,683 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import multiprocessing
5
+ import os
6
+ import shutil
7
+ import unittest
8
+ import warnings
9
+ import sys
10
+ import stat
11
+ import subprocess
12
+
13
+ from numba import cuda
14
+ from numba.cuda.core.errors import NumbaWarning
15
+ from numba.cuda.testing import (
16
+ CUDATestCase,
17
+ skip_on_cudasim,
18
+ skip_unless_cc_60,
19
+ skip_if_cudadevrt_missing,
20
+ test_data_dir,
21
+ skip_on_standalone_numba_cuda,
22
+ )
23
+ from numba.cuda.tests.support import (
24
+ TestCase,
25
+ temp_directory,
26
+ import_dynamic,
27
+ )
28
+
29
+
30
+ class BaseCacheTest(TestCase):
31
+ # The source file that will be copied
32
+ usecases_file = None
33
+ # Make sure this doesn't conflict with another module
34
+ modname = None
35
+
36
+ def setUp(self):
37
+ self.tempdir = temp_directory("test_cache")
38
+ sys.path.insert(0, self.tempdir)
39
+ self.modfile = os.path.join(self.tempdir, self.modname + ".py")
40
+ self.cache_dir = os.path.join(self.tempdir, "__pycache__")
41
+ shutil.copy(self.usecases_file, self.modfile)
42
+ os.chmod(self.modfile, stat.S_IREAD | stat.S_IWRITE)
43
+ self.maxDiff = None
44
+
45
+ def tearDown(self):
46
+ sys.modules.pop(self.modname, None)
47
+ sys.path.remove(self.tempdir)
48
+
49
+ def import_module(self):
50
+ # Import a fresh version of the test module. All jitted functions
51
+ # in the test module will start anew and load overloads from
52
+ # the on-disk cache if possible.
53
+ old = sys.modules.pop(self.modname, None)
54
+ if old is not None:
55
+ # Make sure cached bytecode is removed
56
+ cached = [old.__cached__]
57
+ for fn in cached:
58
+ try:
59
+ os.unlink(fn)
60
+ except FileNotFoundError:
61
+ pass
62
+ mod = import_dynamic(self.modname)
63
+ self.assertEqual(mod.__file__.rstrip("co"), self.modfile)
64
+ return mod
65
+
66
+ def cache_contents(self):
67
+ try:
68
+ return [
69
+ fn
70
+ for fn in os.listdir(self.cache_dir)
71
+ if not fn.endswith((".pyc", ".pyo"))
72
+ ]
73
+ except FileNotFoundError:
74
+ return []
75
+
76
+ def get_cache_mtimes(self):
77
+ return dict(
78
+ (fn, os.path.getmtime(os.path.join(self.cache_dir, fn)))
79
+ for fn in sorted(self.cache_contents())
80
+ )
81
+
82
+ def check_pycache(self, n):
83
+ c = self.cache_contents()
84
+ self.assertEqual(len(c), n, c)
85
+
86
+ def dummy_test(self):
87
+ pass
88
+
89
+
90
+ class DispatcherCacheUsecasesTest(BaseCacheTest):
91
+ here = os.path.dirname(__file__)
92
+ usecases_file = os.path.join(here, "cache_usecases.py")
93
+ modname = "dispatcher_caching_test_fodder"
94
+
95
+ def run_in_separate_process(self, *, envvars={}):
96
+ # Cached functions can be run from a distinct process.
97
+ # Also stresses issue #1603: uncached function calling cached function
98
+ # shouldn't fail compiling.
99
+ code = """if 1:
100
+ import sys
101
+
102
+ sys.path.insert(0, %(tempdir)r)
103
+ mod = __import__(%(modname)r)
104
+ mod.self_test()
105
+ """ % dict(tempdir=self.tempdir, modname=self.modname)
106
+
107
+ subp_env = os.environ.copy()
108
+ subp_env.update(envvars)
109
+ popen = subprocess.Popen(
110
+ [sys.executable, "-c", code],
111
+ stdout=subprocess.PIPE,
112
+ stderr=subprocess.PIPE,
113
+ env=subp_env,
114
+ )
115
+ out, err = popen.communicate()
116
+ if popen.returncode != 0:
117
+ raise AssertionError(
118
+ "process failed with code %s: \n"
119
+ "stdout follows\n%s\n"
120
+ "stderr follows\n%s\n"
121
+ % (popen.returncode, out.decode(), err.decode()),
122
+ )
123
+
124
+ def check_hits(self, func, hits, misses=None):
125
+ st = func.stats
126
+ self.assertEqual(sum(st.cache_hits.values()), hits, st.cache_hits)
127
+ if misses is not None:
128
+ self.assertEqual(
129
+ sum(st.cache_misses.values()), misses, st.cache_misses
130
+ )
131
+
132
+
133
+ def check_access_is_preventable():
134
+ # This exists to check whether it is possible to prevent access to
135
+ # a file/directory through the use of `chmod 500`. If a user has
136
+ # elevated rights (e.g. root) then writes are likely to be possible
137
+ # anyway. Tests that require functioning access prevention are
138
+ # therefore skipped based on the result of this check.
139
+ tempdir = temp_directory("test_cache")
140
+ test_dir = os.path.join(tempdir, "writable_test")
141
+ os.mkdir(test_dir)
142
+ # check a write is possible
143
+ with open(os.path.join(test_dir, "write_ok"), "wt") as f:
144
+ f.write("check1")
145
+ # now forbid access
146
+ os.chmod(test_dir, 0o500)
147
+ try:
148
+ with open(os.path.join(test_dir, "write_forbidden"), "wt") as f:
149
+ f.write("check2")
150
+ # access prevention is not possible
151
+ return False
152
+ except PermissionError:
153
+ # Check that the cause of the exception is due to access/permission
154
+ # as per
155
+ # https://github.com/conda/conda/blob/4.5.0/conda/gateways/disk/permissions.py#L35-L37 # noqa: E501
156
+ # errno reports access/perm fail so access prevention via
157
+ # `chmod 500` works for this user.
158
+ return True
159
+ finally:
160
+ os.chmod(test_dir, 0o775)
161
+ shutil.rmtree(test_dir)
162
+
163
+
164
+ _access_preventable = check_access_is_preventable()
165
+ _access_msg = "Cannot create a directory to which writes are preventable"
166
+ skip_bad_access = unittest.skipUnless(_access_preventable, _access_msg)
167
+
168
+
169
+ @skip_on_cudasim("Simulator does not implement caching")
170
+ class CUDACachingTest(DispatcherCacheUsecasesTest):
171
+ here = os.path.dirname(__file__)
172
+ usecases_file = os.path.join(here, "cache_usecases.py")
173
+ modname = "cuda_caching_test_fodder"
174
+
175
+ def setUp(self):
176
+ DispatcherCacheUsecasesTest.setUp(self)
177
+ CUDATestCase.setUp(self)
178
+
179
+ def tearDown(self):
180
+ CUDATestCase.tearDown(self)
181
+ DispatcherCacheUsecasesTest.tearDown(self)
182
+
183
+ def test_caching(self):
184
+ self.check_pycache(0)
185
+ mod = self.import_module()
186
+ self.check_pycache(0)
187
+
188
+ f = mod.add_usecase
189
+ self.assertPreciseEqual(f(2, 3), 6)
190
+ self.check_pycache(2) # 1 index, 1 data
191
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
192
+ self.check_pycache(3) # 1 index, 2 data
193
+ self.check_hits(f.func, 0, 2)
194
+
195
+ f = mod.record_return_aligned
196
+ rec = f(mod.aligned_arr, 1)
197
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
198
+
199
+ f = mod.record_return_packed
200
+ rec = f(mod.packed_arr, 1)
201
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
202
+ self.check_pycache(6) # 2 index, 4 data
203
+ self.check_hits(f.func, 0, 2)
204
+
205
+ # Check the code runs ok from another process
206
+ self.run_in_separate_process()
207
+
208
+ def test_no_caching(self):
209
+ mod = self.import_module()
210
+
211
+ f = mod.add_nocache_usecase
212
+ self.assertPreciseEqual(f(2, 3), 6)
213
+ self.check_pycache(0)
214
+
215
+ def test_many_locals(self):
216
+ # Declaring many local arrays creates a very large LLVM IR, which
217
+ # cannot be pickled due to the level of recursion it requires to
218
+ # pickle. This test ensures that kernels with many locals (and
219
+ # therefore large IR) can be cached. See Issue #8373:
220
+ # https://github.com/numba/numba/issues/8373
221
+ self.check_pycache(0)
222
+ mod = self.import_module()
223
+ f = mod.many_locals
224
+ f[1, 1]()
225
+ self.check_pycache(2) # 1 index, 1 data
226
+
227
+ def test_closure(self):
228
+ mod = self.import_module()
229
+
230
+ with warnings.catch_warnings():
231
+ warnings.simplefilter("error", NumbaWarning)
232
+
233
+ f = mod.closure1
234
+ self.assertPreciseEqual(f(3), 6) # 3 + 3 = 6
235
+ f = mod.closure2
236
+ self.assertPreciseEqual(f(3), 8) # 3 + 5 = 8
237
+ f = mod.closure3
238
+ self.assertPreciseEqual(f(3), 10) # 3 + 7 = 10
239
+ f = mod.closure4
240
+ self.assertPreciseEqual(f(3), 12) # 3 + 9 = 12
241
+ self.check_pycache(5) # 1 nbi, 4 nbc
242
+
243
+ def test_cache_reuse(self):
244
+ mod = self.import_module()
245
+ mod.add_usecase(2, 3)
246
+ mod.add_usecase(2.5, 3.5)
247
+ mod.outer_uncached(2, 3)
248
+ mod.outer(2, 3)
249
+ mod.record_return_packed(mod.packed_arr, 0)
250
+ mod.record_return_aligned(mod.aligned_arr, 1)
251
+ mod.simple_usecase_caller(2)
252
+ mtimes = self.get_cache_mtimes()
253
+ # Two signatures compiled
254
+ self.check_hits(mod.add_usecase.func, 0, 2)
255
+
256
+ mod2 = self.import_module()
257
+ self.assertIsNot(mod, mod2)
258
+ f = mod2.add_usecase
259
+ f(2, 3)
260
+ self.check_hits(f.func, 1, 0)
261
+ f(2.5, 3.5)
262
+ self.check_hits(f.func, 2, 0)
263
+
264
+ # The files haven't changed
265
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
266
+
267
+ self.run_in_separate_process()
268
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
269
+
270
+ def test_cache_invalidate(self):
271
+ mod = self.import_module()
272
+ f = mod.add_usecase
273
+ self.assertPreciseEqual(f(2, 3), 6)
274
+
275
+ # This should change the functions' results
276
+ with open(self.modfile, "a") as f:
277
+ f.write("\nZ = 10\n")
278
+
279
+ mod = self.import_module()
280
+ f = mod.add_usecase
281
+ self.assertPreciseEqual(f(2, 3), 15)
282
+
283
+ def test_recompile(self):
284
+ # Explicit call to recompile() should overwrite the cache
285
+ mod = self.import_module()
286
+ f = mod.add_usecase
287
+ self.assertPreciseEqual(f(2, 3), 6)
288
+
289
+ mod = self.import_module()
290
+ f = mod.add_usecase
291
+ mod.Z = 10
292
+ self.assertPreciseEqual(f(2, 3), 6)
293
+ f.func.recompile()
294
+ self.assertPreciseEqual(f(2, 3), 15)
295
+
296
+ # Freshly recompiled version is re-used from other imports
297
+ mod = self.import_module()
298
+ f = mod.add_usecase
299
+ self.assertPreciseEqual(f(2, 3), 15)
300
+
301
+ def test_same_names(self):
302
+ # Function with the same names should still disambiguate
303
+ mod = self.import_module()
304
+ f = mod.renamed_function1
305
+ self.assertPreciseEqual(f(2), 4)
306
+ f = mod.renamed_function2
307
+ self.assertPreciseEqual(f(2), 8)
308
+
309
+ def _test_pycache_fallback(self):
310
+ """
311
+ With a disabled __pycache__, test there is a working fallback
312
+ (e.g. on the user-wide cache dir)
313
+ """
314
+ mod = self.import_module()
315
+ f = mod.add_usecase
316
+ # Remove this function's cache files at the end, to avoid accumulation
317
+ # across test calls.
318
+ self.addCleanup(
319
+ shutil.rmtree, f.func.stats.cache_path, ignore_errors=True
320
+ )
321
+
322
+ self.assertPreciseEqual(f(2, 3), 6)
323
+ # It's a cache miss since the file was copied to a new temp location
324
+ self.check_hits(f.func, 0, 1)
325
+
326
+ # Test re-use
327
+ mod2 = self.import_module()
328
+ f = mod2.add_usecase
329
+ self.assertPreciseEqual(f(2, 3), 6)
330
+ self.check_hits(f.func, 1, 0)
331
+
332
+ # The __pycache__ is empty (otherwise the test's preconditions
333
+ # wouldn't be met)
334
+ self.check_pycache(0)
335
+
336
+ @skip_bad_access
337
+ @unittest.skipIf(
338
+ os.name == "nt", "cannot easily make a directory read-only on Windows"
339
+ )
340
+ def test_non_creatable_pycache(self):
341
+ # Make it impossible to create the __pycache__ directory
342
+ old_perms = os.stat(self.tempdir).st_mode
343
+ os.chmod(self.tempdir, 0o500)
344
+ self.addCleanup(os.chmod, self.tempdir, old_perms)
345
+
346
+ self._test_pycache_fallback()
347
+
348
+ @skip_bad_access
349
+ @unittest.skipIf(
350
+ os.name == "nt", "cannot easily make a directory read-only on Windows"
351
+ )
352
+ def test_non_writable_pycache(self):
353
+ # Make it impossible to write to the __pycache__ directory
354
+ pycache = os.path.join(self.tempdir, "__pycache__")
355
+ os.mkdir(pycache)
356
+ old_perms = os.stat(pycache).st_mode
357
+ os.chmod(pycache, 0o500)
358
+ self.addCleanup(os.chmod, pycache, old_perms)
359
+
360
+ self._test_pycache_fallback()
361
+
362
+ def test_cannot_cache_linking_libraries(self):
363
+ link = str(test_data_dir / "jitlink.ptx")
364
+ msg = "Cannot pickle CUDACodeLibrary with linking files"
365
+ with self.assertRaisesRegex(RuntimeError, msg):
366
+
367
+ @cuda.jit("void()", cache=True, link=[link])
368
+ def f():
369
+ pass
370
+
371
+
372
+ @skip_on_cudasim("Simulator does not implement caching")
373
+ class CUDACooperativeGroupTest(DispatcherCacheUsecasesTest):
374
+ # See Issue #9432: https://github.com/numba/numba/issues/9432
375
+ # If a cached function using CG sync was the first thing to compile,
376
+ # the compile would fail.
377
+ here = os.path.dirname(__file__)
378
+ usecases_file = os.path.join(here, "cg_cache_usecases.py")
379
+ modname = "cuda_cooperative_caching_test_fodder"
380
+
381
+ def setUp(self):
382
+ DispatcherCacheUsecasesTest.setUp(self)
383
+ CUDATestCase.setUp(self)
384
+
385
+ def tearDown(self):
386
+ CUDATestCase.tearDown(self)
387
+ DispatcherCacheUsecasesTest.tearDown(self)
388
+
389
+ @skip_unless_cc_60
390
+ @skip_if_cudadevrt_missing
391
+ def test_cache_cg(self):
392
+ # Functions using cooperative groups should be cacheable. See Issue
393
+ # #8888: https://github.com/numba/numba/issues/8888
394
+ self.check_pycache(0)
395
+ mod = self.import_module()
396
+ self.check_pycache(0)
397
+
398
+ mod.cg_usecase(0)
399
+ self.check_pycache(2) # 1 index, 1 data
400
+
401
+ # Check the code runs ok from another process
402
+ self.run_in_separate_process()
403
+
404
+
405
+ @skip_on_cudasim("Simulator does not implement caching")
406
+ class CUDAAndCPUCachingTest(DispatcherCacheUsecasesTest):
407
+ here = os.path.dirname(__file__)
408
+ usecases_file = os.path.join(here, "cache_with_cpu_usecases.py")
409
+ modname = "cuda_and_cpu_caching_test_fodder"
410
+
411
+ def setUp(self):
412
+ DispatcherCacheUsecasesTest.setUp(self)
413
+ CUDATestCase.setUp(self)
414
+
415
+ def tearDown(self):
416
+ CUDATestCase.tearDown(self)
417
+ DispatcherCacheUsecasesTest.tearDown(self)
418
+
419
+ @skip_on_standalone_numba_cuda
420
+ def test_cpu_and_cuda_targets(self):
421
+ # The same function jitted for CPU and CUDA targets should maintain
422
+ # separate caches for each target.
423
+ self.check_pycache(0)
424
+ mod = self.import_module()
425
+ self.check_pycache(0)
426
+
427
+ f_cpu = mod.assign_cpu
428
+ f_cuda = mod.assign_cuda
429
+ self.assertPreciseEqual(f_cpu(5), 5)
430
+ self.check_pycache(2) # 1 index, 1 data
431
+ self.assertPreciseEqual(f_cuda(5), 5)
432
+ self.check_pycache(3) # 1 index, 2 data
433
+
434
+ self.check_hits(f_cpu.func, 0, 1)
435
+ self.check_hits(f_cuda.func, 0, 1)
436
+
437
+ self.assertPreciseEqual(f_cpu(5.5), 5.5)
438
+ self.check_pycache(4) # 1 index, 3 data
439
+ self.assertPreciseEqual(f_cuda(5.5), 5.5)
440
+ self.check_pycache(5) # 1 index, 4 data
441
+
442
+ self.check_hits(f_cpu.func, 0, 2)
443
+ self.check_hits(f_cuda.func, 0, 2)
444
+
445
+ @skip_on_standalone_numba_cuda
446
+ def test_cpu_and_cuda_reuse(self):
447
+ # Existing cache files for the CPU and CUDA targets are reused.
448
+ mod = self.import_module()
449
+ mod.assign_cpu(5)
450
+ mod.assign_cpu(5.5)
451
+ mod.assign_cuda(5)
452
+ mod.assign_cuda(5.5)
453
+
454
+ mtimes = self.get_cache_mtimes()
455
+
456
+ # Two signatures compiled
457
+ self.check_hits(mod.assign_cpu.func, 0, 2)
458
+ self.check_hits(mod.assign_cuda.func, 0, 2)
459
+
460
+ mod2 = self.import_module()
461
+ self.assertIsNot(mod, mod2)
462
+ f_cpu = mod2.assign_cpu
463
+ f_cuda = mod2.assign_cuda
464
+
465
+ f_cpu(2)
466
+ self.check_hits(f_cpu.func, 1, 0)
467
+ f_cpu(2.5)
468
+ self.check_hits(f_cpu.func, 2, 0)
469
+ f_cuda(2)
470
+ self.check_hits(f_cuda.func, 1, 0)
471
+ f_cuda(2.5)
472
+ self.check_hits(f_cuda.func, 2, 0)
473
+
474
+ # The files haven't changed
475
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
476
+
477
+ self.run_in_separate_process()
478
+ self.assertEqual(self.get_cache_mtimes(), mtimes)
479
+
480
+
481
+ def get_different_cc_gpus():
482
+ # Find two GPUs with different Compute Capabilities and return them as a
483
+ # tuple. If two GPUs with distinct Compute Capabilities cannot be found,
484
+ # then None is returned.
485
+ first_gpu = cuda.gpus[0]
486
+ with first_gpu:
487
+ first_cc = cuda.current_context().device.compute_capability
488
+
489
+ for gpu in cuda.gpus[1:]:
490
+ with gpu:
491
+ cc = cuda.current_context().device.compute_capability
492
+ if cc != first_cc:
493
+ return (first_gpu, gpu)
494
+
495
+ return None
496
+
497
+
498
+ @skip_on_cudasim("Simulator does not implement caching")
499
+ class TestMultiCCCaching(DispatcherCacheUsecasesTest):
500
+ here = os.path.dirname(__file__)
501
+ usecases_file = os.path.join(here, "cache_usecases.py")
502
+ modname = "cuda_multi_cc_caching_test_fodder"
503
+
504
+ def setUp(self):
505
+ DispatcherCacheUsecasesTest.setUp(self)
506
+ CUDATestCase.setUp(self)
507
+
508
+ def tearDown(self):
509
+ CUDATestCase.tearDown(self)
510
+ DispatcherCacheUsecasesTest.tearDown(self)
511
+
512
+ def test_cache(self):
513
+ gpus = get_different_cc_gpus()
514
+ if not gpus:
515
+ self.skipTest("Need two different CCs for multi-CC cache test")
516
+
517
+ self.check_pycache(0)
518
+ mod = self.import_module()
519
+ self.check_pycache(0)
520
+
521
+ # Step 1. Populate the cache with the first GPU
522
+ with gpus[0]:
523
+ f = mod.add_usecase
524
+ self.assertPreciseEqual(f(2, 3), 6)
525
+ self.check_pycache(2) # 1 index, 1 data
526
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
527
+ self.check_pycache(3) # 1 index, 2 data
528
+ self.check_hits(f.func, 0, 2)
529
+
530
+ f = mod.record_return_aligned
531
+ rec = f(mod.aligned_arr, 1)
532
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
533
+
534
+ f = mod.record_return_packed
535
+ rec = f(mod.packed_arr, 1)
536
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
537
+ self.check_pycache(6) # 2 index, 4 data
538
+ self.check_hits(f.func, 0, 2)
539
+
540
+ # Step 2. Run with the second GPU - under present behaviour this
541
+ # doesn't further populate the cache.
542
+ with gpus[1]:
543
+ f = mod.add_usecase
544
+ self.assertPreciseEqual(f(2, 3), 6)
545
+ self.check_pycache(6) # cache unchanged
546
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
547
+ self.check_pycache(6) # cache unchanged
548
+ self.check_hits(f.func, 0, 2)
549
+
550
+ f = mod.record_return_aligned
551
+ rec = f(mod.aligned_arr, 1)
552
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
553
+
554
+ f = mod.record_return_packed
555
+ rec = f(mod.packed_arr, 1)
556
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
557
+ self.check_pycache(6) # cache unchanged
558
+ self.check_hits(f.func, 0, 2)
559
+
560
+ # Step 3. Run in a separate module with the second GPU - this populates
561
+ # the cache for the second CC.
562
+ mod2 = self.import_module()
563
+ self.assertIsNot(mod, mod2)
564
+
565
+ with gpus[1]:
566
+ f = mod2.add_usecase
567
+ self.assertPreciseEqual(f(2, 3), 6)
568
+ self.check_pycache(7) # 2 index, 5 data
569
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
570
+ self.check_pycache(8) # 2 index, 6 data
571
+ self.check_hits(f.func, 0, 2)
572
+
573
+ f = mod2.record_return_aligned
574
+ rec = f(mod.aligned_arr, 1)
575
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
576
+
577
+ f = mod2.record_return_packed
578
+ rec = f(mod.packed_arr, 1)
579
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
580
+ self.check_pycache(10) # 2 index, 8 data
581
+ self.check_hits(f.func, 0, 2)
582
+
583
+ # The following steps check that we can use the NVVM IR loaded from the
584
+ # cache to generate PTX for a different compute capability to the
585
+ # cached cubin's CC. To check this, we create another module that loads
586
+ # the cached version containing a cubin for GPU 1. There will be no
587
+ # cubin for GPU 0, so when we try to use it the PTX must be generated.
588
+
589
+ mod3 = self.import_module()
590
+ self.assertIsNot(mod, mod3)
591
+
592
+ # Step 4. Run with GPU 1 and get a cache hit, loading the cache created
593
+ # during Step 3.
594
+ with gpus[1]:
595
+ f = mod3.add_usecase
596
+ self.assertPreciseEqual(f(2, 3), 6)
597
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
598
+
599
+ f = mod3.record_return_aligned
600
+ rec = f(mod.aligned_arr, 1)
601
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
602
+
603
+ f = mod3.record_return_packed
604
+ rec = f(mod.packed_arr, 1)
605
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
606
+
607
+ # Step 5. Run with GPU 0 using the module from Step 4, to force PTX
608
+ # generation from cached NVVM IR.
609
+ with gpus[0]:
610
+ f = mod3.add_usecase
611
+ self.assertPreciseEqual(f(2, 3), 6)
612
+ self.assertPreciseEqual(f(2.5, 3), 6.5)
613
+
614
+ f = mod3.record_return_aligned
615
+ rec = f(mod.aligned_arr, 1)
616
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
617
+
618
+ f = mod3.record_return_packed
619
+ rec = f(mod.packed_arr, 1)
620
+ self.assertPreciseEqual(tuple(rec), (2, 43.5))
621
+
622
+
623
+ def child_initializer():
624
+ # Disable occupancy and implicit copy warnings in processes in a
625
+ # multiprocessing pool.
626
+ from numba.cuda.core import config
627
+
628
+ config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
629
+ config.CUDA_WARN_ON_IMPLICIT_COPY = 0
630
+
631
+
632
+ @skip_on_cudasim("Simulator does not implement caching")
633
+ class TestMultiprocessCache(DispatcherCacheUsecasesTest):
634
+ here = os.path.dirname(__file__)
635
+ usecases_file = os.path.join(here, "cache_usecases.py")
636
+ modname = "cuda_mp_caching_test_fodder"
637
+
638
+ def setUp(self):
639
+ DispatcherCacheUsecasesTest.setUp(self)
640
+ CUDATestCase.setUp(self)
641
+
642
+ def tearDown(self):
643
+ CUDATestCase.tearDown(self)
644
+ DispatcherCacheUsecasesTest.tearDown(self)
645
+
646
+ def test_multiprocessing(self):
647
+ # Check caching works from multiple processes at once (#2028)
648
+ mod = self.import_module()
649
+ # Calling a pure Python caller of the JIT-compiled function is
650
+ # necessary to reproduce the issue.
651
+ f = mod.simple_usecase_caller
652
+ n = 3
653
+ try:
654
+ ctx = multiprocessing.get_context("spawn")
655
+ except AttributeError:
656
+ ctx = multiprocessing
657
+
658
+ pool = ctx.Pool(n, child_initializer)
659
+
660
+ try:
661
+ res = sum(pool.imap(f, range(n)))
662
+ finally:
663
+ pool.close()
664
+ self.assertEqual(res, n * (n - 1) // 2)
665
+
666
+
667
+ @skip_on_cudasim("Simulator does not implement the CUDACodeLibrary")
668
+ class TestCUDACodeLibrary(CUDATestCase):
669
+ # For tests of miscellaneous CUDACodeLibrary behaviour that we wish to
670
+ # explicitly check
671
+
672
+ def test_cannot_serialize_unfinalized(self):
673
+ # The CUDA codegen failes to import under the simulator, so we cannot
674
+ # import it at the top level
675
+ from numba.cuda.codegen import CUDACodeLibrary
676
+
677
+ # Usually a CodeLibrary requires a real CodeGen, but since we don't
678
+ # interact with it, anything will do
679
+ codegen = object()
680
+ name = "library"
681
+ cl = CUDACodeLibrary(codegen, name)
682
+ with self.assertRaisesRegex(RuntimeError, "Cannot pickle unfinalized"):
683
+ cl._reduce_states()