numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,758 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import namedtuple, defaultdict
5
+ from numba.cuda import types
6
+ from numba.cuda.core import ir
7
+ from numba.cuda.core import errors
8
+ from numba.cuda.core import consts
9
+ import operator
10
+ from functools import reduce
11
+
12
+ from .controlflow import CFGraph
13
+ from numba.cuda.misc import special
14
+
15
+ #
16
+ # Analysis related to variable lifetime
17
+ #
18
+
19
+ _use_defs_result = namedtuple("use_defs_result", "usemap,defmap")
20
+
21
+ # other packages that define new nodes add calls for finding defs
22
+ # format: {type:function}
23
+ ir_extension_usedefs = {}
24
+
25
+
26
+ def compute_use_defs(blocks):
27
+ """
28
+ Find variable use/def per block.
29
+ """
30
+
31
+ var_use_map = {} # { block offset -> set of vars }
32
+ var_def_map = {} # { block offset -> set of vars }
33
+ for offset, ir_block in blocks.items():
34
+ var_use_map[offset] = use_set = set()
35
+ var_def_map[offset] = def_set = set()
36
+ for stmt in ir_block.body:
37
+ if type(stmt) in ir_extension_usedefs:
38
+ func = ir_extension_usedefs[type(stmt)]
39
+ func(stmt, use_set, def_set)
40
+ continue
41
+ if isinstance(stmt, ir.Assign):
42
+ if isinstance(stmt.value, ir.Inst):
43
+ rhs_set = set(var.name for var in stmt.value.list_vars())
44
+ elif isinstance(stmt.value, ir.Var):
45
+ rhs_set = set([stmt.value.name])
46
+ elif isinstance(
47
+ stmt.value, (ir.Arg, ir.Const, ir.Global, ir.FreeVar)
48
+ ):
49
+ rhs_set = ()
50
+ else:
51
+ raise AssertionError("unreachable", type(stmt.value))
52
+ # If lhs not in rhs of the assignment
53
+ if stmt.target.name not in rhs_set:
54
+ def_set.add(stmt.target.name)
55
+
56
+ for var in stmt.list_vars():
57
+ # do not include locally defined vars to use-map
58
+ if var.name not in def_set:
59
+ use_set.add(var.name)
60
+
61
+ return _use_defs_result(usemap=var_use_map, defmap=var_def_map)
62
+
63
+
64
+ def compute_live_map(cfg, blocks, var_use_map, var_def_map):
65
+ """
66
+ Find variables that must be alive at the ENTRY of each block.
67
+ We use a simple fix-point algorithm that iterates until the set of
68
+ live variables is unchanged for each block.
69
+ """
70
+
71
+ def fix_point_progress(dct):
72
+ """Helper function to determine if a fix-point has been reached."""
73
+ return tuple(len(v) for v in dct.values())
74
+
75
+ def fix_point(fn, dct):
76
+ """Helper function to run fix-point algorithm."""
77
+ old_point = None
78
+ new_point = fix_point_progress(dct)
79
+ while old_point != new_point:
80
+ fn(dct)
81
+ old_point = new_point
82
+ new_point = fix_point_progress(dct)
83
+
84
+ def def_reach(dct):
85
+ """Find all variable definition reachable at the entry of a block"""
86
+ for offset in var_def_map:
87
+ used_or_defined = var_def_map[offset] | var_use_map[offset]
88
+ dct[offset] |= used_or_defined
89
+ # Propagate to outgoing nodes
90
+ for out_blk, _ in cfg.successors(offset):
91
+ dct[out_blk] |= dct[offset]
92
+
93
+ def liveness(dct):
94
+ """Find live variables.
95
+
96
+ Push var usage backward.
97
+ """
98
+ for offset in dct:
99
+ # Live vars here
100
+ live_vars = dct[offset]
101
+ for inc_blk, _data in cfg.predecessors(offset):
102
+ # Reachable at the predecessor
103
+ reachable = live_vars & def_reach_map[inc_blk]
104
+ # But not defined in the predecessor
105
+ dct[inc_blk] |= reachable - var_def_map[inc_blk]
106
+
107
+ live_map = {}
108
+ for offset in blocks.keys():
109
+ live_map[offset] = set(var_use_map[offset])
110
+
111
+ def_reach_map = defaultdict(set)
112
+ fix_point(def_reach, def_reach_map)
113
+ fix_point(liveness, live_map)
114
+ return live_map
115
+
116
+
117
+ _dead_maps_result = namedtuple("dead_maps_result", "internal,escaping,combined")
118
+
119
+
120
+ def compute_dead_maps(cfg, blocks, live_map, var_def_map):
121
+ """
122
+ Compute the end-of-live information for variables.
123
+ `live_map` contains a mapping of block offset to all the living
124
+ variables at the ENTRY of the block.
125
+ """
126
+ # The following three dictionaries will be
127
+ # { block offset -> set of variables to delete }
128
+ # all vars that should be deleted at the start of the successors
129
+ escaping_dead_map = defaultdict(set)
130
+ # all vars that should be deleted within this block
131
+ internal_dead_map = defaultdict(set)
132
+ # all vars that should be deleted after the function exit
133
+ exit_dead_map = defaultdict(set)
134
+
135
+ for offset, ir_block in blocks.items():
136
+ # live vars WITHIN the block will include all the locally
137
+ # defined variables
138
+ cur_live_set = live_map[offset] | var_def_map[offset]
139
+ # vars alive in the outgoing blocks
140
+ outgoing_live_map = dict(
141
+ (out_blk, live_map[out_blk])
142
+ for out_blk, _data in cfg.successors(offset)
143
+ )
144
+ # vars to keep alive for the terminator
145
+ terminator_liveset = set(
146
+ v.name for v in ir_block.terminator.list_vars()
147
+ )
148
+ # vars to keep alive in the successors
149
+ combined_liveset = reduce(
150
+ operator.or_, outgoing_live_map.values(), set()
151
+ )
152
+ # include variables used in terminator
153
+ combined_liveset |= terminator_liveset
154
+ # vars that are dead within the block because they are not
155
+ # propagated to any outgoing blocks
156
+ internal_set = cur_live_set - combined_liveset
157
+ internal_dead_map[offset] = internal_set
158
+ # vars that escape this block
159
+ escaping_live_set = cur_live_set - internal_set
160
+ for out_blk, new_live_set in outgoing_live_map.items():
161
+ # successor should delete the unused escaped vars
162
+ new_live_set = new_live_set | var_def_map[out_blk]
163
+ escaping_dead_map[out_blk] |= escaping_live_set - new_live_set
164
+
165
+ # if no outgoing blocks
166
+ if not outgoing_live_map:
167
+ # insert var used by terminator
168
+ exit_dead_map[offset] = terminator_liveset
169
+
170
+ # Verify that the dead maps cover all live variables
171
+ all_vars = reduce(operator.or_, live_map.values(), set())
172
+ internal_dead_vars = reduce(operator.or_, internal_dead_map.values(), set())
173
+ escaping_dead_vars = reduce(operator.or_, escaping_dead_map.values(), set())
174
+ exit_dead_vars = reduce(operator.or_, exit_dead_map.values(), set())
175
+ dead_vars = internal_dead_vars | escaping_dead_vars | exit_dead_vars
176
+ missing_vars = all_vars - dead_vars
177
+ if missing_vars:
178
+ # There are no exit points
179
+ if not cfg.exit_points():
180
+ # We won't be able to verify this
181
+ pass
182
+ else:
183
+ msg = "liveness info missing for vars: {0}".format(missing_vars)
184
+ raise RuntimeError(msg)
185
+
186
+ combined = dict(
187
+ (k, internal_dead_map[k] | escaping_dead_map[k]) for k in blocks
188
+ )
189
+
190
+ return _dead_maps_result(
191
+ internal=internal_dead_map,
192
+ escaping=escaping_dead_map,
193
+ combined=combined,
194
+ )
195
+
196
+
197
+ def compute_live_variables(cfg, blocks, var_def_map, var_dead_map):
198
+ """
199
+ Compute the live variables at the beginning of each block
200
+ and at each yield point.
201
+ The ``var_def_map`` and ``var_dead_map`` indicates the variable defined
202
+ and deleted at each block, respectively.
203
+ """
204
+ # live var at the entry per block
205
+ block_entry_vars = defaultdict(set)
206
+
207
+ def fix_point_progress():
208
+ return tuple(map(len, block_entry_vars.values()))
209
+
210
+ old_point = None
211
+ new_point = fix_point_progress()
212
+
213
+ # Propagate defined variables and still live the successors.
214
+ # (note the entry block automatically gets an empty set)
215
+
216
+ # Note: This is finding the actual available variables at the entry
217
+ # of each block. The algorithm in compute_live_map() is finding
218
+ # the variable that must be available at the entry of each block.
219
+ # This is top-down in the dataflow. The other one is bottom-up.
220
+ while old_point != new_point:
221
+ # We iterate until the result stabilizes. This is necessary
222
+ # because of loops in the graphself.
223
+ for offset in blocks:
224
+ # vars available + variable defined
225
+ avail = block_entry_vars[offset] | var_def_map[offset]
226
+ # subtract variables deleted
227
+ avail -= var_dead_map[offset]
228
+ # add ``avail`` to each successors
229
+ for succ, _data in cfg.successors(offset):
230
+ block_entry_vars[succ] |= avail
231
+
232
+ old_point = new_point
233
+ new_point = fix_point_progress()
234
+
235
+ return block_entry_vars
236
+
237
+
238
+ #
239
+ # Analysis related to controlflow
240
+ #
241
+
242
+
243
+ def compute_cfg_from_blocks(blocks):
244
+ cfg = CFGraph()
245
+ for k in blocks:
246
+ cfg.add_node(k)
247
+
248
+ for k, b in blocks.items():
249
+ term = b.terminator
250
+ for target in term.get_targets():
251
+ cfg.add_edge(k, target)
252
+
253
+ cfg.set_entry_point(min(blocks))
254
+ cfg.process()
255
+ return cfg
256
+
257
+
258
+ def find_top_level_loops(cfg):
259
+ """
260
+ A generator that yields toplevel loops given a control-flow-graph
261
+ """
262
+ blocks_in_loop = set()
263
+ # get loop bodies
264
+ for loop in cfg.loops().values():
265
+ insiders = set(loop.body) | set(loop.entries) | set(loop.exits)
266
+ insiders.discard(loop.header)
267
+ blocks_in_loop |= insiders
268
+ # find loop that is not part of other loops
269
+ for loop in cfg.loops().values():
270
+ if loop.header not in blocks_in_loop:
271
+ yield _fix_loop_exit(cfg, loop)
272
+
273
+
274
+ def _fix_loop_exit(cfg, loop):
275
+ """
276
+ Fixes loop.exits for Py3.8+ bytecode CFG changes.
277
+ This is to handle `break` inside loops.
278
+ """
279
+ # Computes the common postdoms of exit nodes
280
+ postdoms = cfg.post_dominators()
281
+ exits = reduce(
282
+ operator.and_,
283
+ [postdoms[b] for b in loop.exits],
284
+ loop.exits,
285
+ )
286
+ if exits:
287
+ # Put the non-common-exits as body nodes
288
+ body = loop.body | loop.exits - exits
289
+ return loop._replace(exits=exits, body=body)
290
+ else:
291
+ return loop
292
+
293
+
294
+ def rewrite_semantic_constants(func_ir, called_args):
295
+ """
296
+ This rewrites values known to be constant by their semantics as ir.Const
297
+ nodes, this is to give branch pruning the best chance possible of killing
298
+ branches. An example might be rewriting len(tuple) as the literal length.
299
+
300
+ func_ir is the IR
301
+ called_args are the actual arguments with which the function is called
302
+ """
303
+ DEBUG = 0
304
+
305
+ if DEBUG > 1:
306
+ print(
307
+ ("rewrite_semantic_constants: " + func_ir.func_id.func_name).center(
308
+ 80, "-"
309
+ )
310
+ )
311
+ print("before".center(80, "*"))
312
+ func_ir.dump()
313
+
314
+ def rewrite_statement(func_ir, stmt, new_val):
315
+ """
316
+ Rewrites the stmt as a ir.Const new_val and fixes up the entries in
317
+ func_ir._definitions
318
+ """
319
+ stmt.value = ir.Const(new_val, stmt.loc)
320
+ defns = func_ir._definitions[stmt.target.name]
321
+ repl_idx = defns.index(val)
322
+ defns[repl_idx] = stmt.value
323
+
324
+ def rewrite_array_ndim(val, func_ir, called_args):
325
+ # rewrite Array.ndim as const(ndim)
326
+ if getattr(val, "op", None) == "getattr":
327
+ if val.attr == "ndim":
328
+ arg_def = guard(get_definition, func_ir, val.value)
329
+ if isinstance(arg_def, ir.Arg):
330
+ argty = called_args[arg_def.index]
331
+ if isinstance(argty, types.Array):
332
+ rewrite_statement(func_ir, stmt, argty.ndim)
333
+
334
+ def rewrite_tuple_len(val, func_ir, called_args):
335
+ # rewrite len(tuple) as const(len(tuple))
336
+ if getattr(val, "op", None) == "call":
337
+ func = guard(get_definition, func_ir, val.func)
338
+ if (
339
+ func is not None
340
+ and isinstance(func, ir.Global)
341
+ and getattr(func, "value", None) is len
342
+ ):
343
+ (arg,) = val.args
344
+ arg_def = guard(get_definition, func_ir, arg)
345
+ if isinstance(arg_def, ir.Arg):
346
+ argty = called_args[arg_def.index]
347
+ if isinstance(argty, types.BaseTuple):
348
+ rewrite_statement(func_ir, stmt, argty.count)
349
+ elif (
350
+ isinstance(arg_def, ir.Expr)
351
+ and arg_def.op == "typed_getitem"
352
+ ):
353
+ argty = arg_def.dtype
354
+ if isinstance(argty, types.BaseTuple):
355
+ rewrite_statement(func_ir, stmt, argty.count)
356
+
357
+ from numba.cuda.core.ir_utils import get_definition, guard
358
+
359
+ for blk in func_ir.blocks.values():
360
+ for stmt in blk.body:
361
+ if isinstance(stmt, ir.Assign):
362
+ val = stmt.value
363
+ if isinstance(val, ir.Expr):
364
+ rewrite_array_ndim(val, func_ir, called_args)
365
+ rewrite_tuple_len(val, func_ir, called_args)
366
+
367
+ if DEBUG > 1:
368
+ print("after".center(80, "*"))
369
+ func_ir.dump()
370
+ print("-" * 80)
371
+
372
+
373
+ def find_literally_calls(func_ir, argtypes):
374
+ """An analysis to find `numba.literally` call inside the given IR.
375
+ When an unsatisfied literal typing request is found, a `ForceLiteralArg`
376
+ exception is raised.
377
+
378
+ Parameters
379
+ ----------
380
+
381
+ func_ir : numba.ir.FunctionIR
382
+
383
+ argtypes : Sequence[numba.types.Type]
384
+ The argument types.
385
+ """
386
+ from numba.cuda.core import ir_utils
387
+
388
+ marked_args = set()
389
+ first_loc = {}
390
+ # Scan for literally calls
391
+ for blk in func_ir.blocks.values():
392
+ for assign in blk.find_exprs(op="call"):
393
+ var = ir_utils.guard(ir_utils.get_definition, func_ir, assign.func)
394
+ if isinstance(var, (ir.Global, ir.FreeVar)):
395
+ fnobj = var.value
396
+ else:
397
+ fnobj = ir_utils.guard(
398
+ ir_utils.resolve_func_from_module, func_ir, var
399
+ )
400
+ if fnobj is special.literally:
401
+ # Found
402
+ [arg] = assign.args
403
+ defarg = func_ir.get_definition(arg)
404
+ if isinstance(defarg, ir.Arg):
405
+ argindex = defarg.index
406
+ marked_args.add(argindex)
407
+ first_loc.setdefault(argindex, assign.loc)
408
+ # Signal the dispatcher to force literal typing
409
+ for pos in marked_args:
410
+ query_arg = argtypes[pos]
411
+ do_raise = (
412
+ isinstance(query_arg, types.InitialValue)
413
+ and query_arg.initial_value is None
414
+ )
415
+ if do_raise:
416
+ loc = first_loc[pos]
417
+ raise errors.ForceLiteralArg(marked_args, loc=loc)
418
+
419
+ if not isinstance(query_arg, (types.Literal, types.InitialValue)):
420
+ loc = first_loc[pos]
421
+ raise errors.ForceLiteralArg(marked_args, loc=loc)
422
+
423
+
424
+ ir_extension_use_alloca = {}
425
+
426
+
427
+ def must_use_alloca(blocks):
428
+ """
429
+ Analyzes a dictionary of blocks to find variables that must be
430
+ stack allocated with alloca. For each statement in the blocks,
431
+ determine if that statement requires certain variables to be
432
+ stack allocated. This function uses the extension point
433
+ ir_extension_use_alloca to allow other IR node types like parfors
434
+ to register to be processed by this analysis function. At the
435
+ moment, parfors are the only IR node types that may require
436
+ something to be stack allocated.
437
+ """
438
+ use_alloca_vars = set()
439
+
440
+ for ir_block in blocks.values():
441
+ for stmt in ir_block.body:
442
+ if type(stmt) in ir_extension_use_alloca:
443
+ func = ir_extension_use_alloca[type(stmt)]
444
+ func(stmt, use_alloca_vars)
445
+ continue
446
+
447
+ return use_alloca_vars
448
+
449
+
450
+ # Used to describe a nullified condition in dead branch pruning
451
+ nullified = namedtuple("nullified", "condition, taken_br, rewrite_stmt")
452
+
453
+
454
+ def dead_branch_prune(func_ir, called_args):
455
+ """
456
+ Removes dead branches based on constant inference from function args.
457
+ This directly mutates the IR.
458
+
459
+ func_ir is the IR
460
+ called_args are the actual arguments with which the function is called
461
+ """
462
+ from numba.cuda.core.ir_utils import (
463
+ get_definition,
464
+ guard,
465
+ find_const,
466
+ GuardException,
467
+ )
468
+
469
+ DEBUG = 0
470
+
471
+ def find_branches(func_ir):
472
+ # find *all* branches
473
+ branches = []
474
+ for blk in func_ir.blocks.values():
475
+ branch_or_jump = blk.body[-1]
476
+ if isinstance(branch_or_jump, ir.Branch):
477
+ branch = branch_or_jump
478
+ pred = guard(get_definition, func_ir, branch.cond.name)
479
+ if pred is not None and getattr(pred, "op", None) == "call":
480
+ function = guard(get_definition, func_ir, pred.func)
481
+ if (
482
+ function is not None
483
+ and isinstance(function, ir.Global)
484
+ and function.value is bool
485
+ ):
486
+ condition = guard(get_definition, func_ir, pred.args[0])
487
+ if condition is not None:
488
+ branches.append((branch, condition, blk))
489
+ return branches
490
+
491
+ def do_prune(take_truebr, blk):
492
+ keep = branch.truebr if take_truebr else branch.falsebr
493
+ # replace the branch with a direct jump
494
+ jmp = ir.Jump(keep, loc=branch.loc)
495
+ blk.body[-1] = jmp
496
+ return 1 if keep == branch.truebr else 0
497
+
498
+ def prune_by_type(branch, condition, blk, *conds):
499
+ # this prunes a given branch and fixes up the IR
500
+ # at least one needs to be a NoneType
501
+ lhs_cond, rhs_cond = conds
502
+ lhs_none = isinstance(lhs_cond, types.NoneType)
503
+ rhs_none = isinstance(rhs_cond, types.NoneType)
504
+ if lhs_none or rhs_none:
505
+ try:
506
+ take_truebr = condition.fn(lhs_cond, rhs_cond)
507
+ except Exception:
508
+ return False, None
509
+ if DEBUG > 0:
510
+ kill = branch.falsebr if take_truebr else branch.truebr
511
+ print(
512
+ "Pruning %s" % kill,
513
+ branch,
514
+ lhs_cond,
515
+ rhs_cond,
516
+ condition.fn,
517
+ )
518
+ taken = do_prune(take_truebr, blk)
519
+ return True, taken
520
+ return False, None
521
+
522
+ def prune_by_value(branch, condition, blk, *conds):
523
+ lhs_cond, rhs_cond = conds
524
+ try:
525
+ take_truebr = condition.fn(lhs_cond, rhs_cond)
526
+ except Exception:
527
+ return False, None
528
+ if DEBUG > 0:
529
+ kill = branch.falsebr if take_truebr else branch.truebr
530
+ print("Pruning %s" % kill, branch, lhs_cond, rhs_cond, condition.fn)
531
+ do_prune(take_truebr, blk)
532
+ # It is not safe to rewrite the predicate to a nominal value based on
533
+ # which branch is taken, the rewritten const predicate needs to
534
+ # hold the actual computed const value as something else may refer to
535
+ # it!
536
+ return True, take_truebr
537
+
538
+ def prune_by_predicate(branch, pred, blk):
539
+ try:
540
+ # Just to prevent accidents, whilst already guarded, ensure this
541
+ # is an ir.Const
542
+ if not isinstance(pred, (ir.Const, ir.FreeVar, ir.Global)):
543
+ raise TypeError("Expected constant Numba IR node")
544
+ take_truebr = bool(pred.value)
545
+ except TypeError:
546
+ return False, None
547
+ if DEBUG > 0:
548
+ kill = branch.falsebr if take_truebr else branch.truebr
549
+ print("Pruning %s" % kill, branch, pred)
550
+ taken = do_prune(take_truebr, blk)
551
+ return True, taken
552
+
553
+ class Unknown(object):
554
+ pass
555
+
556
+ def resolve_input_arg_const(input_arg_idx):
557
+ """
558
+ Resolves an input arg to a constant (if possible)
559
+ """
560
+ input_arg_ty = called_args[input_arg_idx]
561
+
562
+ # comparing to None?
563
+ if isinstance(input_arg_ty, types.NoneType):
564
+ return input_arg_ty
565
+
566
+ # is it a kwarg default
567
+ if isinstance(input_arg_ty, types.Omitted):
568
+ val = input_arg_ty.value
569
+ if isinstance(val, types.NoneType):
570
+ return val
571
+ elif val is None:
572
+ return types.NoneType("none")
573
+
574
+ # literal type, return the type itself so comparisons like `x == None`
575
+ # still work as e.g. x = types.int64 will never be None/NoneType so
576
+ # the branch can still be pruned
577
+ return getattr(input_arg_ty, "literal_type", Unknown())
578
+
579
+ if DEBUG > 1:
580
+ print("before".center(80, "-"))
581
+ print(func_ir.dump())
582
+
583
+ phi2lbl = dict()
584
+ phi2asgn = dict()
585
+ for lbl, blk in func_ir.blocks.items():
586
+ for stmt in blk.body:
587
+ if isinstance(stmt, ir.Assign):
588
+ if isinstance(stmt.value, ir.Expr) and stmt.value.op == "phi":
589
+ phi2lbl[stmt.value] = lbl
590
+ phi2asgn[stmt.value] = stmt
591
+
592
+ # This looks for branches where:
593
+ # at least one arg of the condition is in input args and const
594
+ # at least one an arg of the condition is a const
595
+ # if the condition is met it will replace the branch with a jump
596
+ branch_info = find_branches(func_ir)
597
+ # stores conditions that have no impact post prune
598
+ nullified_conditions = []
599
+
600
+ for branch, condition, blk in branch_info:
601
+ const_conds = []
602
+ if isinstance(condition, ir.Expr) and condition.op == "binop":
603
+ prune = prune_by_value
604
+ for arg in [condition.lhs, condition.rhs]:
605
+ resolved_const = Unknown()
606
+ arg_def = guard(get_definition, func_ir, arg)
607
+ if isinstance(arg_def, ir.Arg):
608
+ # it's an e.g. literal argument to the function
609
+ resolved_const = resolve_input_arg_const(arg_def.index)
610
+ prune = prune_by_type
611
+ else:
612
+ # it's some const argument to the function, cannot use guard
613
+ # here as the const itself may be None
614
+ try:
615
+ resolved_const = find_const(func_ir, arg)
616
+ if resolved_const is None:
617
+ resolved_const = types.NoneType("none")
618
+ except GuardException:
619
+ pass
620
+
621
+ if not isinstance(resolved_const, Unknown):
622
+ const_conds.append(resolved_const)
623
+
624
+ # lhs/rhs are consts
625
+ if len(const_conds) == 2:
626
+ # prune the branch, switch the branch for an unconditional jump
627
+ prune_stat, taken = prune(branch, condition, blk, *const_conds)
628
+ if prune_stat:
629
+ # add the condition to the list of nullified conditions
630
+ nullified_conditions.append(
631
+ nullified(condition, taken, True)
632
+ )
633
+ else:
634
+ # see if this is a branch on a constant value predicate
635
+ resolved_const = Unknown()
636
+ try:
637
+ pred_call = get_definition(func_ir, branch.cond)
638
+ resolved_const = find_const(func_ir, pred_call.args[0])
639
+ if resolved_const is None:
640
+ resolved_const = types.NoneType("none")
641
+ except GuardException:
642
+ pass
643
+
644
+ if not isinstance(resolved_const, Unknown):
645
+ prune_stat, taken = prune_by_predicate(branch, condition, blk)
646
+ if prune_stat:
647
+ # add the condition to the list of nullified conditions
648
+ nullified_conditions.append(
649
+ nullified(condition, taken, False)
650
+ )
651
+
652
+ # 'ERE BE DRAGONS...
653
+ # It is the evaluation of the condition expression that often trips up type
654
+ # inference, so ideally it would be removed as it is effectively rendered
655
+ # dead by the unconditional jump if a branch was pruned. However, there may
656
+ # be references to the condition that exist in multiple places (e.g. dels)
657
+ # and we cannot run DCE here as typing has not taken place to give enough
658
+ # information to run DCE safely. Upshot of all this is the condition gets
659
+ # rewritten below into a benign const that typing will be happy with and DCE
660
+ # can remove it and its reference post typing when it is safe to do so
661
+ # (if desired). It is required that the const is assigned a value that
662
+ # indicates the branch taken as its mutated value would be read in the case
663
+ # of object mode fall back in place of the condition itself. For
664
+ # completeness the func_ir._definitions and ._consts are also updated to
665
+ # make the IR state self consistent.
666
+
667
+ deadcond = [x.condition for x in nullified_conditions]
668
+ for _, cond, blk in branch_info:
669
+ if cond in deadcond:
670
+ for x in blk.body:
671
+ if isinstance(x, ir.Assign) and x.value is cond:
672
+ # rewrite the condition as a true/false bit
673
+ nullified_info = nullified_conditions[deadcond.index(cond)]
674
+ # only do a rewrite of conditions, predicates need to retain
675
+ # their value as they may be used later.
676
+ if nullified_info.rewrite_stmt:
677
+ branch_bit = nullified_info.taken_br
678
+ x.value = ir.Const(branch_bit, loc=x.loc)
679
+ # update the specific definition to the new const
680
+ defns = func_ir._definitions[x.target.name]
681
+ repl_idx = defns.index(cond)
682
+ defns[repl_idx] = x.value
683
+
684
+ # Check post dominators of dead nodes from in the original CFG for use of
685
+ # vars that are being removed in the dead blocks which might be referred to
686
+ # by phi nodes.
687
+ #
688
+ # Multiple things to fix up:
689
+ #
690
+ # 1. Cases like:
691
+ #
692
+ # A A
693
+ # |\ |
694
+ # | B --> B
695
+ # |/ |
696
+ # C C
697
+ #
698
+ # i.e. the branch is dead but the block is still alive. In this case CFG
699
+ # simplification will fuse A-B-C and any phi in C can be updated as an
700
+ # direct assignment from the last assigned version in the dominators of the
701
+ # fused block.
702
+ #
703
+ # 2. Cases like:
704
+ #
705
+ # A A
706
+ # / \ |
707
+ # B C --> B
708
+ # \ / |
709
+ # D D
710
+ #
711
+ # i.e. the block C is dead. In this case the phis in D need updating to
712
+ # reflect the collapse of the phi condition. This should result in a direct
713
+ # assignment of the surviving version in B to the LHS of the phi in D.
714
+
715
+ new_cfg = compute_cfg_from_blocks(func_ir.blocks)
716
+ dead_blocks = new_cfg.dead_nodes()
717
+
718
+ # for all phis that are still in live blocks.
719
+ for phi, lbl in phi2lbl.items():
720
+ if lbl in dead_blocks:
721
+ continue
722
+ new_incoming = [x[0] for x in new_cfg.predecessors(lbl)]
723
+ if set(new_incoming) != set(phi.incoming_blocks):
724
+ # Something has changed in the CFG...
725
+ if len(new_incoming) == 1:
726
+ # There's now just one incoming. Replace the PHI node by a
727
+ # direct assignment
728
+ idx = phi.incoming_blocks.index(new_incoming[0])
729
+ phi2asgn[phi].value = phi.incoming_values[idx]
730
+ else:
731
+ # There's more than one incoming still, then look through the
732
+ # incoming and remove dead
733
+ ic_val_tmp = []
734
+ ic_blk_tmp = []
735
+ for ic_val, ic_blk in zip(
736
+ phi.incoming_values, phi.incoming_blocks
737
+ ):
738
+ if ic_blk in dead_blocks:
739
+ continue
740
+ else:
741
+ ic_val_tmp.append(ic_val)
742
+ ic_blk_tmp.append(ic_blk)
743
+ phi.incoming_values.clear()
744
+ phi.incoming_values.extend(ic_val_tmp)
745
+ phi.incoming_blocks.clear()
746
+ phi.incoming_blocks.extend(ic_blk_tmp)
747
+
748
+ # Remove dead blocks, this is safe as it relies on the CFG only.
749
+ for dead in dead_blocks:
750
+ del func_ir.blocks[dead]
751
+
752
+ # if conditions were nullified then consts were rewritten, update
753
+ if nullified_conditions:
754
+ func_ir._consts = consts.ConstantInference(func_ir)
755
+
756
+ if DEBUG > 1:
757
+ print("after".center(80, "-"))
758
+ print(func_ir.dump())