numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,867 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import abc
5
+ from contextlib import contextmanager
6
+ from collections import defaultdict, namedtuple
7
+ from copy import copy
8
+ import warnings
9
+
10
+ from numba.cuda.core import typeinfer
11
+ from numba.cuda.core import (
12
+ errors,
13
+ )
14
+ from numba.cuda.core import ir
15
+ from numba.cuda import typing, types, lowering
16
+ from numba.cuda.core.compiler_machinery import (
17
+ FunctionPass,
18
+ LoweringPass,
19
+ AnalysisPass,
20
+ register_pass,
21
+ )
22
+ from numba.cuda.core.annotations import type_annotations
23
+ from numba.cuda.core.ir_utils import (
24
+ raise_on_unsupported_feature,
25
+ warn_deprecated,
26
+ check_and_legalize_ir,
27
+ guard,
28
+ dead_code_elimination,
29
+ simplify_CFG,
30
+ get_definition,
31
+ compute_cfg_from_blocks,
32
+ is_operator_or_getitem,
33
+ )
34
+
35
+ from numba.cuda.core import postproc, rewrites, funcdesc, config
36
+
37
+
38
+ try:
39
+ # llvmlite < 0.45
40
+ from llvmlite.binding import passmanagers
41
+ except ImportError:
42
+ # llvmlite >= 0.45
43
+ from llvmlite.binding import newpassmanagers as passmanagers
44
+
45
+ # Outputs of type inference pass
46
+ _TypingResults = namedtuple(
47
+ "_TypingResults",
48
+ [
49
+ "typemap",
50
+ "return_type",
51
+ "calltypes",
52
+ "typing_errors",
53
+ ],
54
+ )
55
+
56
+
57
+ @contextmanager
58
+ def fallback_context(state, msg):
59
+ """
60
+ Wraps code that would signal a fallback to object mode
61
+ """
62
+ try:
63
+ yield
64
+ except Exception as e:
65
+ if not state.status.can_fallback:
66
+ raise
67
+ else:
68
+ # Clear all references attached to the traceback
69
+ e = e.with_traceback(None)
70
+ # this emits a warning containing the error message body in the
71
+ # case of fallback from npm to objmode
72
+ loop_lift = "" if state.flags.enable_looplift else "OUT"
73
+ warnings.warn_explicit(
74
+ "Compilation is falling back to object mode "
75
+ f"WITH{loop_lift} looplifting enabled because {msg} due to: {e}",
76
+ errors.NumbaWarning,
77
+ state.func_id.filename,
78
+ state.func_id.firstlineno,
79
+ )
80
+ raise
81
+
82
+
83
+ def type_inference_stage(
84
+ typingctx,
85
+ targetctx,
86
+ interp,
87
+ args,
88
+ return_type,
89
+ locals=None,
90
+ raise_errors=True,
91
+ ):
92
+ if locals is None:
93
+ locals = {}
94
+ if len(args) != interp.arg_count:
95
+ raise TypeError("Mismatch number of argument types")
96
+ warnings = errors.WarningsFixer(errors.NumbaWarning)
97
+
98
+ infer = typeinfer.TypeInferer(typingctx, interp, warnings)
99
+ callstack_ctx = typingctx.callstack.register(
100
+ targetctx.target, infer, interp.func_id, args
101
+ )
102
+ # Setup two contexts: 1) callstack setup/teardown 2) flush warnings
103
+ with callstack_ctx, warnings:
104
+ # Seed argument types
105
+ for index, (name, ty) in enumerate(zip(interp.arg_names, args)):
106
+ infer.seed_argument(name, index, ty)
107
+
108
+ # Seed return type
109
+ if return_type is not None:
110
+ infer.seed_return(return_type)
111
+
112
+ # Seed local types
113
+ for k, v in locals.items():
114
+ infer.seed_type(k, v)
115
+
116
+ infer.build_constraint()
117
+ # return errors in case of partial typing
118
+ errs = infer.propagate(raise_errors=raise_errors)
119
+ typemap, restype, calltypes = infer.unify(raise_errors=raise_errors)
120
+
121
+ return _TypingResults(typemap, restype, calltypes, errs)
122
+
123
+
124
+ class BaseTypeInference(FunctionPass):
125
+ _raise_errors = True
126
+
127
+ def __init__(self):
128
+ FunctionPass.__init__(self)
129
+
130
+ def run_pass(self, state):
131
+ """
132
+ Type inference and legalization
133
+ """
134
+ with fallback_context(
135
+ state,
136
+ 'Function "%s" failed type inference' % (state.func_id.func_name,),
137
+ ):
138
+ # Type inference
139
+ typemap, return_type, calltypes, errs = type_inference_stage(
140
+ state.typingctx,
141
+ state.targetctx,
142
+ state.func_ir,
143
+ state.args,
144
+ state.return_type,
145
+ state.locals,
146
+ raise_errors=self._raise_errors,
147
+ )
148
+ state.typemap = typemap
149
+ # save errors in case of partial typing
150
+ state.typing_errors = errs
151
+ if self._raise_errors:
152
+ state.return_type = return_type
153
+ state.calltypes = calltypes
154
+
155
+ def legalize_return_type(return_type, interp, targetctx):
156
+ """
157
+ Only accept array return type iff it is passed into the function.
158
+ Reject function object return types if in nopython mode.
159
+ """
160
+ if not targetctx.enable_nrt and isinstance(
161
+ return_type, types.Array
162
+ ):
163
+ # Walk IR to discover all arguments and all return statements
164
+ retstmts = []
165
+ caststmts = {}
166
+ argvars = set()
167
+ for bid, blk in interp.blocks.items():
168
+ for inst in blk.body:
169
+ if isinstance(inst, ir.Return):
170
+ retstmts.append(inst.value.name)
171
+ elif isinstance(inst, ir.Assign):
172
+ if (
173
+ isinstance(inst.value, ir.Expr)
174
+ and inst.value.op == "cast"
175
+ ):
176
+ caststmts[inst.target.name] = inst.value
177
+ elif isinstance(inst.value, ir.Arg):
178
+ argvars.add(inst.target.name)
179
+
180
+ assert retstmts, "No return statements?"
181
+
182
+ for var in retstmts:
183
+ cast = caststmts.get(var)
184
+ if cast is None or cast.value.name not in argvars:
185
+ if self._raise_errors:
186
+ msg = (
187
+ "Only accept returning of array passed into "
188
+ "the function as argument"
189
+ )
190
+ raise errors.NumbaTypeError(msg)
191
+
192
+ elif isinstance(return_type, types.Function) or isinstance(
193
+ return_type, types.Phantom
194
+ ):
195
+ if self._raise_errors:
196
+ msg = "Can't return function object ({}) in nopython mode"
197
+ raise errors.NumbaTypeError(msg.format(return_type))
198
+
199
+ with fallback_context(
200
+ state,
201
+ 'Function "%s" has invalid return type'
202
+ % (state.func_id.func_name,),
203
+ ):
204
+ legalize_return_type(
205
+ state.return_type, state.func_ir, state.targetctx
206
+ )
207
+ return True
208
+
209
+
210
+ @register_pass(mutates_CFG=True, analysis_only=False)
211
+ class NopythonTypeInference(BaseTypeInference):
212
+ _name = "nopython_type_inference"
213
+
214
+
215
+ @register_pass(mutates_CFG=True, analysis_only=False)
216
+ class PartialTypeInference(BaseTypeInference):
217
+ _name = "partial_type_inference"
218
+ _raise_errors = False
219
+
220
+
221
+ @register_pass(mutates_CFG=False, analysis_only=False)
222
+ class AnnotateTypes(AnalysisPass):
223
+ _name = "annotate_types"
224
+
225
+ def __init__(self):
226
+ AnalysisPass.__init__(self)
227
+
228
+ def get_analysis_usage(self, AU):
229
+ AU.add_required(IRLegalization)
230
+
231
+ def run_pass(self, state):
232
+ """
233
+ Create type annotation after type inference
234
+ """
235
+ func_ir = state.func_ir.copy()
236
+ state.type_annotation = type_annotations.TypeAnnotation(
237
+ func_ir=func_ir,
238
+ typemap=state.typemap,
239
+ calltypes=state.calltypes,
240
+ lifted=state.lifted,
241
+ lifted_from=state.lifted_from,
242
+ args=state.args,
243
+ return_type=state.return_type,
244
+ html_output=config.HTML,
245
+ )
246
+
247
+ if config.ANNOTATE:
248
+ print("ANNOTATION".center(80, "-"))
249
+ print(state.type_annotation)
250
+ print("=" * 80)
251
+ if config.HTML:
252
+ with open(config.HTML, "w") as fout:
253
+ state.type_annotation.html_annotate(fout)
254
+
255
+ return False
256
+
257
+
258
+ @register_pass(mutates_CFG=True, analysis_only=False)
259
+ class NopythonRewrites(FunctionPass):
260
+ _name = "nopython_rewrites"
261
+
262
+ def __init__(self):
263
+ FunctionPass.__init__(self)
264
+
265
+ def run_pass(self, state):
266
+ """
267
+ Perform any intermediate representation rewrites after type
268
+ inference.
269
+ """
270
+ # a bunch of these passes are either making assumptions or rely on some
271
+ # very picky and slightly bizarre state particularly in relation to
272
+ # ir.Del presence. To accommodate, ir.Dels are added ahead of running
273
+ # this pass and stripped at the end.
274
+
275
+ # Ensure we have an IR and type information.
276
+ assert state.func_ir
277
+ assert isinstance(getattr(state, "typemap", None), dict)
278
+ assert isinstance(getattr(state, "calltypes", None), dict)
279
+ msg = (
280
+ "Internal error in post-inference rewriting "
281
+ "pass encountered during compilation of "
282
+ 'function "%s"' % (state.func_id.func_name,)
283
+ )
284
+
285
+ pp = postproc.PostProcessor(state.func_ir)
286
+ pp.run(True)
287
+ with fallback_context(state, msg):
288
+ rewrites.rewrite_registry.apply("after-inference", state)
289
+ pp.remove_dels()
290
+ return True
291
+
292
+
293
+ class BaseNativeLowering(abc.ABC, LoweringPass):
294
+ """The base class for a lowering pass. The lowering functionality must be
295
+ specified in inheriting classes by providing an appropriate lowering class
296
+ implementation in the overridden `lowering_class` property."""
297
+
298
+ _name = None
299
+
300
+ def __init__(self):
301
+ LoweringPass.__init__(self)
302
+
303
+ @property
304
+ @abc.abstractmethod
305
+ def lowering_class(self):
306
+ """Returns the class that performs the lowering of the IR describing the
307
+ function that is the target of the current compilation."""
308
+ pass
309
+
310
+ def run_pass(self, state):
311
+ if state.library is None:
312
+ codegen = state.targetctx.codegen()
313
+ state.library = codegen.create_library(state.func_id.func_qualname)
314
+ # Enable object caching upfront, so that the library can
315
+ # be later serialized.
316
+ state.library.enable_object_caching()
317
+
318
+ library = state.library
319
+ targetctx = state.targetctx
320
+ interp = state.func_ir # why is it called this?!
321
+ typemap = state.typemap
322
+ restype = state.return_type
323
+ calltypes = state.calltypes
324
+ flags = state.flags
325
+ metadata = state.metadata
326
+ pre_stats = passmanagers.dump_refprune_stats()
327
+
328
+ msg = "Function %s failed at nopython mode lowering" % (
329
+ state.func_id.func_name,
330
+ )
331
+ with fallback_context(state, msg):
332
+ # Lowering
333
+ fndesc = (
334
+ funcdesc.PythonFunctionDescriptor.from_specialized_function(
335
+ interp,
336
+ typemap,
337
+ restype,
338
+ calltypes,
339
+ mangler=targetctx.mangler,
340
+ inline=flags.forceinline,
341
+ noalias=flags.noalias,
342
+ abi_tags=[flags.get_mangle_string()],
343
+ )
344
+ )
345
+
346
+ with targetctx.push_code_library(library):
347
+ lower = self.lowering_class(
348
+ targetctx, library, fndesc, interp, metadata=metadata
349
+ )
350
+ lower.lower()
351
+ if not flags.no_cpython_wrapper:
352
+ lower.create_cpython_wrapper(flags.release_gil)
353
+
354
+ if not flags.no_cfunc_wrapper:
355
+ # skip cfunc wrapper generation if unsupported
356
+ # argument or return types are used
357
+ for t in state.args:
358
+ if isinstance(t, (types.Omitted, types.Generator)):
359
+ break
360
+ else:
361
+ if isinstance(
362
+ restype, (types.Optional, types.Generator)
363
+ ):
364
+ pass
365
+ else:
366
+ lower.create_cfunc_wrapper()
367
+
368
+ env = lower.env
369
+ call_helper = lower.call_helper
370
+ del lower
371
+
372
+ from numba.cuda.compiler import _LowerResult # TODO: move this
373
+
374
+ if flags.no_compile:
375
+ state["cr"] = _LowerResult(
376
+ fndesc, call_helper, cfunc=None, env=env
377
+ )
378
+ else:
379
+ # Prepare for execution
380
+ # Insert native function for use by other jitted-functions.
381
+ # We also register its library to allow for inlining.
382
+ cfunc = targetctx.get_executable(library, fndesc, env)
383
+ targetctx.insert_user_function(cfunc, fndesc, [library])
384
+ state["cr"] = _LowerResult(
385
+ fndesc, call_helper, cfunc=cfunc, env=env
386
+ )
387
+
388
+ # capture pruning stats
389
+ post_stats = passmanagers.dump_refprune_stats()
390
+ metadata["prune_stats"] = post_stats - pre_stats
391
+
392
+ # Save the LLVM pass timings
393
+ metadata["llvm_pass_timings"] = library.recorded_timings
394
+ return True
395
+
396
+
397
+ @register_pass(mutates_CFG=True, analysis_only=False)
398
+ class NativeLowering(BaseNativeLowering):
399
+ """Lowering pass for a native function IR described solely in terms of
400
+ Numba's standard `numba.cuda.core.ir` nodes."""
401
+
402
+ _name = "native_lowering"
403
+
404
+ @property
405
+ def lowering_class(self):
406
+ return lowering.Lower
407
+
408
+
409
+ @register_pass(mutates_CFG=False, analysis_only=True)
410
+ class NoPythonSupportedFeatureValidation(AnalysisPass):
411
+ """NoPython Mode check: Validates the IR to ensure that features in use are
412
+ in a form that is supported"""
413
+
414
+ _name = "nopython_supported_feature_validation"
415
+
416
+ def __init__(self):
417
+ AnalysisPass.__init__(self)
418
+
419
+ def run_pass(self, state):
420
+ raise_on_unsupported_feature(state.func_ir, state.typemap)
421
+ warn_deprecated(state.func_ir, state.typemap)
422
+ return False
423
+
424
+
425
+ @register_pass(mutates_CFG=False, analysis_only=True)
426
+ class IRLegalization(AnalysisPass):
427
+ _name = "ir_legalization"
428
+
429
+ def __init__(self):
430
+ AnalysisPass.__init__(self)
431
+
432
+ def run_pass(self, state):
433
+ # NOTE: this function call must go last, it checks and fixes invalid IR!
434
+ check_and_legalize_ir(state.func_ir, flags=state.flags)
435
+ return True
436
+
437
+
438
+ @register_pass(mutates_CFG=True, analysis_only=False)
439
+ class NoPythonBackend(LoweringPass):
440
+ _name = "nopython_backend"
441
+
442
+ def __init__(self):
443
+ LoweringPass.__init__(self)
444
+
445
+ def run_pass(self, state):
446
+ """
447
+ Back-end: Generate LLVM IR from Numba IR, compile to machine code
448
+ """
449
+ lowered = state["cr"]
450
+ signature = typing.signature(state.return_type, *state.args)
451
+
452
+ from numba.cuda.compiler import cuda_compile_result
453
+
454
+ state.cr = cuda_compile_result(
455
+ typing_context=state.typingctx,
456
+ target_context=state.targetctx,
457
+ entry_point=lowered.cfunc,
458
+ typing_error=state.status.fail_reason,
459
+ type_annotation=state.type_annotation,
460
+ library=state.library,
461
+ call_helper=lowered.call_helper,
462
+ signature=signature,
463
+ objectmode=False,
464
+ lifted=state.lifted,
465
+ fndesc=lowered.fndesc,
466
+ environment=lowered.env,
467
+ metadata=state.metadata,
468
+ reload_init=state.reload_init,
469
+ )
470
+ return True
471
+
472
+
473
+ @register_pass(mutates_CFG=True, analysis_only=False)
474
+ class InlineOverloads(FunctionPass):
475
+ """
476
+ This pass will inline a function wrapped by the numba.cuda.extending.overload
477
+ decorator directly into the site of its call depending on the value set in
478
+ the 'inline' kwarg to the decorator.
479
+
480
+ This is a typed pass. CFG simplification and DCE are performed on
481
+ completion.
482
+ """
483
+
484
+ _name = "inline_overloads"
485
+
486
+ def __init__(self):
487
+ FunctionPass.__init__(self)
488
+
489
+ _DEBUG = False
490
+
491
+ def run_pass(self, state):
492
+ """Run inlining of overloads"""
493
+ if self._DEBUG:
494
+ print("before overload inline".center(80, "-"))
495
+ print(state.func_id.unique_name)
496
+ print(state.func_ir.dump())
497
+ print("".center(80, "-"))
498
+ from numba.cuda.core.inline_closurecall import (
499
+ InlineWorker,
500
+ callee_ir_validator,
501
+ )
502
+
503
+ inline_worker = InlineWorker(
504
+ state.typingctx,
505
+ state.targetctx,
506
+ state.locals,
507
+ state.pipeline,
508
+ state.flags,
509
+ callee_ir_validator,
510
+ state.typemap,
511
+ state.calltypes,
512
+ )
513
+ modified = False
514
+ work_list = list(state.func_ir.blocks.items())
515
+ # use a work list, look for call sites via `ir.Expr.op == call` and
516
+ # then pass these to `self._do_work` to make decisions about inlining.
517
+ while work_list:
518
+ label, block = work_list.pop()
519
+ for i, instr in enumerate(block.body):
520
+ # TO-DO: other statements (setitem)
521
+ if isinstance(instr, ir.Assign):
522
+ expr = instr.value
523
+ if isinstance(expr, ir.Expr):
524
+ workfn = self._do_work_expr
525
+
526
+ if guard(
527
+ workfn,
528
+ state,
529
+ work_list,
530
+ block,
531
+ i,
532
+ expr,
533
+ inline_worker,
534
+ ):
535
+ modified = True
536
+ break # because block structure changed
537
+
538
+ if self._DEBUG:
539
+ print("after overload inline".center(80, "-"))
540
+ print(state.func_id.unique_name)
541
+ print(state.func_ir.dump())
542
+ print("".center(80, "-"))
543
+
544
+ if modified:
545
+ # Remove dead blocks, this is safe as it relies on the CFG only.
546
+ cfg = compute_cfg_from_blocks(state.func_ir.blocks)
547
+ for dead in cfg.dead_nodes():
548
+ del state.func_ir.blocks[dead]
549
+ # clean up blocks
550
+ dead_code_elimination(state.func_ir, typemap=state.typemap)
551
+ # clean up unconditional branches that appear due to inlined
552
+ # functions introducing blocks
553
+ state.func_ir.blocks = simplify_CFG(state.func_ir.blocks)
554
+
555
+ if self._DEBUG:
556
+ print("after overload inline DCE".center(80, "-"))
557
+ print(state.func_id.unique_name)
558
+ print(state.func_ir.dump())
559
+ print("".center(80, "-"))
560
+ return True
561
+
562
+ def _get_attr_info(self, state, expr):
563
+ recv_type = state.typemap[expr.value.name]
564
+ recv_type = types.unliteral(recv_type)
565
+ matched = state.typingctx.find_matching_getattr_template(
566
+ recv_type,
567
+ expr.attr,
568
+ )
569
+ if not matched:
570
+ return None
571
+
572
+ template = matched["template"]
573
+ if getattr(template, "is_method", False):
574
+ # The attribute template is representing a method.
575
+ # Don't inline the getattr.
576
+ return None
577
+
578
+ templates = [template]
579
+ sig = typing.signature(matched["return_type"], recv_type)
580
+ arg_typs = sig.args
581
+ is_method = False
582
+
583
+ return templates, sig, arg_typs, is_method
584
+
585
+ def _get_callable_info(self, state, expr):
586
+ def get_func_type(state, expr):
587
+ func_ty = None
588
+ if expr.op == "call":
589
+ # check this is a known and typed function
590
+ try:
591
+ func_ty = state.typemap[expr.func.name]
592
+ except KeyError:
593
+ # e.g. Calls to CUDA Intrinsic have no mapped type
594
+ # so KeyError
595
+ return None
596
+ if not hasattr(func_ty, "get_call_type"):
597
+ return None
598
+
599
+ elif is_operator_or_getitem(expr):
600
+ func_ty = state.typingctx.resolve_value_type(expr.fn)
601
+ else:
602
+ return None
603
+
604
+ return func_ty
605
+
606
+ if expr.op == "call":
607
+ # try and get a definition for the call, this isn't always
608
+ # possible as it might be a eval(str)/part generated
609
+ # awaiting update etc. (parfors)
610
+ to_inline = None
611
+ try:
612
+ to_inline = state.func_ir.get_definition(expr.func)
613
+ except Exception:
614
+ return None
615
+
616
+ # do not handle closure inlining here, another pass deals with that
617
+ if getattr(to_inline, "op", False) == "make_function":
618
+ return None
619
+
620
+ func_ty = get_func_type(state, expr)
621
+ if func_ty is None:
622
+ return None
623
+
624
+ sig = state.calltypes[expr]
625
+ if not sig:
626
+ return None
627
+
628
+ templates, arg_typs, is_method = None, None, False
629
+ if getattr(func_ty, "template", None) is not None:
630
+ # @overload_method
631
+ is_method = True
632
+ templates = [func_ty.template]
633
+ arg_typs = (func_ty.template.this,) + sig.args
634
+ else:
635
+ # @overload case
636
+ templates = getattr(func_ty, "templates", None)
637
+ arg_typs = sig.args
638
+
639
+ return templates, sig, arg_typs, is_method
640
+
641
+ def _do_work_expr(self, state, work_list, block, i, expr, inline_worker):
642
+ def select_template(templates, args):
643
+ if templates is None:
644
+ return None
645
+
646
+ impl = None
647
+ for template in templates:
648
+ inline_type = getattr(template, "_inline", None)
649
+ if inline_type is None:
650
+ # inline not defined
651
+ continue
652
+ if args not in template._inline_overloads:
653
+ # skip overloads not matching signature
654
+ continue
655
+ if not inline_type.is_never_inline:
656
+ try:
657
+ impl = template._overload_func(*args)
658
+ if impl is None:
659
+ raise Exception # abort for this template
660
+ break
661
+ except Exception:
662
+ continue
663
+ else:
664
+ return None
665
+
666
+ return template, inline_type, impl
667
+
668
+ inlinee_info = None
669
+ if expr.op == "getattr":
670
+ inlinee_info = self._get_attr_info(state, expr)
671
+ else:
672
+ inlinee_info = self._get_callable_info(state, expr)
673
+
674
+ if not inlinee_info:
675
+ return False
676
+
677
+ templates, sig, arg_typs, is_method = inlinee_info
678
+ inlinee = select_template(templates, arg_typs)
679
+ if inlinee is None:
680
+ return False
681
+ template, inlinee_type, impl = inlinee
682
+
683
+ return self._run_inliner(
684
+ state,
685
+ inlinee_type,
686
+ sig,
687
+ template,
688
+ arg_typs,
689
+ expr,
690
+ i,
691
+ impl,
692
+ block,
693
+ work_list,
694
+ is_method,
695
+ inline_worker,
696
+ )
697
+
698
+ def _run_inliner(
699
+ self,
700
+ state,
701
+ inline_type,
702
+ sig,
703
+ template,
704
+ arg_typs,
705
+ expr,
706
+ i,
707
+ impl,
708
+ block,
709
+ work_list,
710
+ is_method,
711
+ inline_worker,
712
+ ):
713
+ do_inline = True
714
+ if not inline_type.is_always_inline:
715
+ from numba.cuda.typing.templates import _inline_info
716
+
717
+ caller_inline_info = _inline_info(
718
+ state.func_ir, state.typemap, state.calltypes, sig
719
+ )
720
+
721
+ # must be a cost-model function, run the function
722
+ iinfo = template._inline_overloads[arg_typs]["iinfo"]
723
+ if inline_type.has_cost_model:
724
+ do_inline = inline_type.value(expr, caller_inline_info, iinfo)
725
+ else:
726
+ assert "unreachable"
727
+
728
+ if do_inline:
729
+ if is_method:
730
+ if not self._add_method_self_arg(state, expr):
731
+ return False
732
+ arg_typs = template._inline_overloads[arg_typs]["folded_args"]
733
+ iinfo = template._inline_overloads[arg_typs]["iinfo"]
734
+ freevars = iinfo.func_ir.func_id.func.__code__.co_freevars
735
+ _, _, _, new_blocks = inline_worker.inline_ir(
736
+ state.func_ir,
737
+ block,
738
+ i,
739
+ iinfo.func_ir,
740
+ freevars,
741
+ arg_typs=arg_typs,
742
+ )
743
+ if work_list is not None:
744
+ for blk in new_blocks:
745
+ work_list.append(blk)
746
+ return True
747
+ else:
748
+ return False
749
+
750
+ def _add_method_self_arg(self, state, expr):
751
+ func_def = guard(get_definition, state.func_ir, expr.func)
752
+ if func_def is None:
753
+ return False
754
+ expr.args.insert(0, func_def.value)
755
+ return True
756
+
757
+
758
+ @register_pass(mutates_CFG=False, analysis_only=False)
759
+ class DeadCodeElimination(FunctionPass):
760
+ """
761
+ Does dead code elimination
762
+ """
763
+
764
+ _name = "dead_code_elimination"
765
+
766
+ def __init__(self):
767
+ FunctionPass.__init__(self)
768
+
769
+ def run_pass(self, state):
770
+ dead_code_elimination(state.func_ir, state.typemap)
771
+ return True
772
+
773
+
774
+ @register_pass(mutates_CFG=False, analysis_only=False)
775
+ class PreLowerStripPhis(FunctionPass):
776
+ """Remove phi nodes (ir.Expr.phi) introduced by SSA.
777
+
778
+ This is needed before Lowering because the phi nodes in Numba IR do not
779
+ match the semantics of phi nodes in LLVM IR. In Numba IR, phi nodes may
780
+ expand into multiple LLVM instructions.
781
+ """
782
+
783
+ _name = "strip_phis"
784
+
785
+ def __init__(self):
786
+ FunctionPass.__init__(self)
787
+
788
+ def run_pass(self, state):
789
+ state.func_ir = self._strip_phi_nodes(state.func_ir)
790
+
791
+ # Rerun postprocessor to update metadata
792
+ post_proc = postproc.PostProcessor(state.func_ir)
793
+ post_proc.run(emit_dels=False)
794
+
795
+ # Ensure we are not in objectmode generator
796
+ if (
797
+ state.func_ir.generator_info is not None
798
+ and state.typemap is not None
799
+ ):
800
+ # Rebuild generator type
801
+ # TODO: move this into PostProcessor
802
+ gentype = state.return_type
803
+ state_vars = state.func_ir.generator_info.state_vars
804
+ state_types = [state.typemap[k] for k in state_vars]
805
+ state.return_type = types.Generator(
806
+ gen_func=gentype.gen_func,
807
+ yield_type=gentype.yield_type,
808
+ arg_types=gentype.arg_types,
809
+ state_types=state_types,
810
+ has_finalizer=gentype.has_finalizer,
811
+ )
812
+ return True
813
+
814
+ def _strip_phi_nodes(self, func_ir):
815
+ """Strip Phi nodes from ``func_ir``
816
+
817
+ For each phi node, put incoming value to their respective incoming
818
+ basic-block at possibly the latest position (i.e. after the latest
819
+ assignment to the corresponding variable).
820
+ """
821
+ exporters = defaultdict(list)
822
+ phis = set()
823
+ # Find all variables that needs to be exported
824
+ for label, block in func_ir.blocks.items():
825
+ for assign in block.find_insts(ir.Assign):
826
+ if isinstance(assign.value, ir.Expr):
827
+ if assign.value.op == "phi":
828
+ phis.add(assign)
829
+ phi = assign.value
830
+ for ib, iv in zip(
831
+ phi.incoming_blocks, phi.incoming_values
832
+ ):
833
+ exporters[ib].append((assign.target, iv))
834
+
835
+ # Rewrite the blocks with the new exporting assignments
836
+ newblocks = {}
837
+ for label, block in func_ir.blocks.items():
838
+ newblk = copy(block)
839
+ newblocks[label] = newblk
840
+
841
+ # strip phis
842
+ newblk.body = [stmt for stmt in block.body if stmt not in phis]
843
+
844
+ # insert exporters
845
+ for target, rhs in exporters[label]:
846
+ # If RHS is undefined
847
+ if rhs is ir.UNDEFINED:
848
+ # Put in a NULL initializer, set the location to be in what
849
+ # will eventually materialize as the prologue.
850
+ rhs = ir.Expr.null(loc=func_ir.loc)
851
+
852
+ assign = ir.Assign(target=target, value=rhs, loc=rhs.loc)
853
+ # Insert at the earliest possible location; i.e. after the
854
+ # last assignment to rhs
855
+ assignments = [
856
+ stmt
857
+ for stmt in newblk.find_insts(ir.Assign)
858
+ if stmt.target == rhs
859
+ ]
860
+ if assignments:
861
+ last_assignment = assignments[-1]
862
+ newblk.insert_after(assign, last_assignment)
863
+ else:
864
+ newblk.prepend(assign)
865
+
866
+ func_ir.blocks = newblocks
867
+ return func_ir