numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1332 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ from collections import defaultdict
5
+ import copy
6
+ import importlib
7
+ import sys
8
+ from itertools import permutations, takewhile
9
+ from contextlib import contextmanager
10
+ from functools import cached_property
11
+
12
+ from llvmlite import ir as llvmir
13
+ from llvmlite.ir import Constant
14
+
15
+ from numba.cuda.core import imputils, targetconfig, funcdesc
16
+ from numba.cuda import cgutils, debuginfo, types, utils, datamodel, config
17
+ from numba.cuda.core import errors
18
+ from numba.cuda.core.compiler_lock import global_compiler_lock
19
+ from numba.cuda.core.pythonapi import PythonAPI
20
+ from numba.cuda.core.imputils import (
21
+ user_function,
22
+ user_generator,
23
+ builtin_registry,
24
+ impl_ret_borrowed,
25
+ RegistryLoader,
26
+ )
27
+
28
+ GENERIC_POINTER = llvmir.PointerType(llvmir.IntType(8))
29
+ PYOBJECT = GENERIC_POINTER
30
+ void_ptr = GENERIC_POINTER
31
+
32
+
33
+ class OverloadSelector(object):
34
+ """
35
+ An object matching an actual signature against a registry of formal
36
+ signatures and choosing the best candidate, if any.
37
+
38
+ In the current implementation:
39
+ - a "signature" is a tuple of type classes or type instances
40
+ - the "best candidate" is the most specific match
41
+ """
42
+
43
+ def __init__(self):
44
+ # A list of (formal args tuple, value)
45
+ self.versions = []
46
+ self._cache = {}
47
+
48
+ def find(self, sig):
49
+ out = self._cache.get(sig)
50
+ if out is None:
51
+ out = self._find(sig)
52
+ self._cache[sig] = out
53
+ return out
54
+
55
+ def _find(self, sig):
56
+ candidates = self._select_compatible(sig)
57
+ if candidates:
58
+ return candidates[self._best_signature(candidates)]
59
+ else:
60
+ raise errors.NumbaNotImplementedError(f"{self}, {sig}")
61
+
62
+ def _select_compatible(self, sig):
63
+ """
64
+ Select all compatible signatures and their implementation.
65
+ """
66
+ out = {}
67
+ for ver_sig, impl in self.versions:
68
+ if self._match_arglist(ver_sig, sig):
69
+ out[ver_sig] = impl
70
+ return out
71
+
72
+ def _best_signature(self, candidates):
73
+ """
74
+ Returns the best signature out of the candidates
75
+ """
76
+ ordered, genericity = self._sort_signatures(candidates)
77
+ # check for ambiguous signatures
78
+ if len(ordered) > 1:
79
+ firstscore = genericity[ordered[0]]
80
+ same = list(
81
+ takewhile(lambda x: genericity[x] == firstscore, ordered)
82
+ )
83
+ if len(same) > 1:
84
+ msg = ["{n} ambiguous signatures".format(n=len(same))]
85
+ for sig in same:
86
+ msg += ["{0} => {1}".format(sig, candidates[sig])]
87
+ raise errors.NumbaTypeError("\n".join(msg))
88
+ return ordered[0]
89
+
90
+ def _sort_signatures(self, candidates):
91
+ """
92
+ Sort signatures in ascending level of genericity.
93
+
94
+ Returns a 2-tuple:
95
+
96
+ * ordered list of signatures
97
+ * dictionary containing genericity scores
98
+ """
99
+ # score by genericity
100
+ genericity = defaultdict(int)
101
+ for this, other in permutations(candidates.keys(), r=2):
102
+ matched = self._match_arglist(formal_args=this, actual_args=other)
103
+ if matched:
104
+ # genericity score +1 for every another compatible signature
105
+ genericity[this] += 1
106
+ # order candidates in ascending level of genericity
107
+ ordered = sorted(candidates.keys(), key=lambda x: genericity[x])
108
+ return ordered, genericity
109
+
110
+ def _match_arglist(self, formal_args, actual_args):
111
+ """
112
+ Returns True if the signature is "matching".
113
+ A formal signature is "matching" if the actual signature matches exactly
114
+ or if the formal signature is a compatible generic signature.
115
+ """
116
+ # normalize VarArg
117
+ if formal_args and isinstance(formal_args[-1], types.VarArg):
118
+ ndiff = len(actual_args) - len(formal_args) + 1
119
+ formal_args = formal_args[:-1] + (formal_args[-1].dtype,) * ndiff
120
+
121
+ if len(formal_args) != len(actual_args):
122
+ return False
123
+
124
+ for formal, actual in zip(formal_args, actual_args):
125
+ if not self._match(formal, actual):
126
+ return False
127
+
128
+ return True
129
+
130
+ def _match(self, formal, actual):
131
+ if formal == actual:
132
+ # formal argument matches actual arguments
133
+ return True
134
+ elif types.Any == formal:
135
+ # formal argument is any
136
+ return True
137
+ elif isinstance(formal, type) and issubclass(formal, types.Type):
138
+ if isinstance(actual, type) and issubclass(actual, formal):
139
+ # formal arg is a type class and actual arg is a subclass
140
+ return True
141
+ elif isinstance(actual, formal):
142
+ # formal arg is a type class of which actual arg is an instance
143
+ return True
144
+
145
+ def append(self, value, sig):
146
+ """
147
+ Add a formal signature and its associated value.
148
+ """
149
+ assert isinstance(sig, tuple), (value, sig)
150
+ self.versions.append((sig, value))
151
+ self._cache.clear()
152
+
153
+
154
+ class BaseContext(object):
155
+ """
156
+
157
+ Notes on Structure
158
+ ------------------
159
+
160
+ Most objects are lowered as plain-old-data structure in the generated
161
+ llvm. They are passed around by reference (a pointer to the structure).
162
+ Only POD structure can live across function boundaries by copying the
163
+ data.
164
+ """
165
+
166
+ # True if the target requires strict alignment
167
+ # Causes exception to be raised if the record members are not aligned.
168
+ strict_alignment = False
169
+
170
+ # Force powi implementation as math.pow call
171
+ implement_powi_as_math_call = False
172
+ implement_pow_as_math_call = False
173
+
174
+ # Emit Debug info
175
+ enable_debuginfo = False
176
+ DIBuilder = debuginfo.DIBuilder
177
+
178
+ # Bound checking
179
+ @property
180
+ def enable_boundscheck(self):
181
+ if config.BOUNDSCHECK is not None:
182
+ return config.BOUNDSCHECK
183
+ return self._boundscheck
184
+
185
+ @enable_boundscheck.setter
186
+ def enable_boundscheck(self, value):
187
+ self._boundscheck = value
188
+
189
+ # NRT
190
+ enable_nrt = False
191
+
192
+ # Auto parallelization
193
+ auto_parallel = False
194
+
195
+ # PYCC
196
+ aot_mode = False
197
+
198
+ # Error model for various operations (only FP exceptions currently)
199
+ error_model = None
200
+
201
+ # Whether dynamic globals (CPU runtime addresses) is allowed
202
+ allow_dynamic_globals = False
203
+
204
+ # Fast math flags
205
+ fastmath = False
206
+
207
+ # python execution environment
208
+ environment = None
209
+
210
+ # the function descriptor
211
+ fndesc = None
212
+
213
+ def __init__(self, typing_context, target):
214
+ self.address_size = utils.MACHINE_BITS
215
+ self.typing_context = typing_context
216
+ self.target_name = target
217
+
218
+ if importlib.util.find_spec("numba"):
219
+ from numba.core.target_extension import CUDA
220
+
221
+ # Used only in Numba's target_extension implementation.
222
+ # Numba-CUDA has the target_extension implementation removed, and
223
+ # references to it hardcoded to values specific to the CUDA target.
224
+ self.target = CUDA
225
+
226
+ # A mapping of installed registries to their loaders
227
+ self._registries = {}
228
+ # Declarations loaded from registries and other sources
229
+ self._defns = defaultdict(OverloadSelector)
230
+ self._getattrs = defaultdict(OverloadSelector)
231
+ self._setattrs = defaultdict(OverloadSelector)
232
+ self._casts = OverloadSelector()
233
+ self._get_constants = OverloadSelector()
234
+ # Other declarations
235
+ self._generators = {}
236
+ self.special_ops = {}
237
+ self.cached_internal_func = {}
238
+ self._pid = None
239
+ self._codelib_stack = []
240
+
241
+ self._boundscheck = False
242
+
243
+ self.data_model_manager = datamodel.default_manager
244
+
245
+ # Initialize
246
+ self.init()
247
+
248
+ def init(self):
249
+ """
250
+ For subclasses to add initializer
251
+ """
252
+
253
+ def refresh(self):
254
+ """
255
+ Refresh context with new declarations from known registries.
256
+ Useful for third-party extensions.
257
+ """
258
+ # load target specific registries
259
+ self.load_additional_registries()
260
+
261
+ # Populate the builtin registry, this has to happen after loading
262
+ # additional registries as some of the "additional" registries write
263
+ # their implementations into the builtin_registry and would be missed if
264
+ # this ran first.
265
+ self.install_registry(builtin_registry)
266
+
267
+ # Also refresh typing context, since @overload declarations can
268
+ # affect it.
269
+ self.typing_context.refresh()
270
+
271
+ def load_additional_registries(self):
272
+ """
273
+ Load target-specific registries. Can be overridden by subclasses.
274
+ """
275
+
276
+ def mangler(self, name, types, *, abi_tags=(), uid=None):
277
+ """
278
+ Perform name mangling.
279
+ """
280
+ return funcdesc.default_mangler(name, types, abi_tags=abi_tags, uid=uid)
281
+
282
+ def get_env_name(self, fndesc):
283
+ """Get the environment name given a FunctionDescriptor.
284
+
285
+ Use this instead of the ``fndesc.env_name`` so that the target-context
286
+ can provide necessary mangling of the symbol to meet ABI requirements.
287
+ """
288
+ return fndesc.env_name
289
+
290
+ def declare_env_global(self, module, envname):
291
+ """Declare the Environment pointer as a global of the module.
292
+
293
+ The pointer is initialized to NULL. It must be filled by the runtime
294
+ with the actual address of the Env before the associated function
295
+ can be executed.
296
+
297
+ Parameters
298
+ ----------
299
+ module :
300
+ The LLVM Module
301
+ envname : str
302
+ The name of the global variable.
303
+ """
304
+ if envname not in module.globals:
305
+ gv = llvmir.GlobalVariable(module, cgutils.voidptr_t, name=envname)
306
+ gv.linkage = "common"
307
+ gv.initializer = cgutils.get_null_value(gv.type.pointee)
308
+
309
+ return module.globals[envname]
310
+
311
+ def get_arg_packer(self, fe_args):
312
+ return datamodel.ArgPacker(self.data_model_manager, fe_args)
313
+
314
+ def get_data_packer(self, fe_types):
315
+ return datamodel.DataPacker(self.data_model_manager, fe_types)
316
+
317
+ @property
318
+ def target_data(self):
319
+ raise NotImplementedError
320
+
321
+ @cached_property
322
+ def nonconst_module_attrs(self):
323
+ """
324
+ All module attrs are constant for targets using BaseContext.
325
+ """
326
+ return tuple()
327
+
328
+ @cached_property
329
+ def nrt(self):
330
+ from numba.cuda.memory_management.nrt_context import NRTContext
331
+
332
+ return NRTContext(self, self.enable_nrt)
333
+
334
+ def subtarget(self, **kws):
335
+ obj = copy.copy(self) # shallow copy
336
+ for k, v in kws.items():
337
+ if not hasattr(obj, k):
338
+ raise NameError("unknown option {0!r}".format(k))
339
+ setattr(obj, k, v)
340
+ if obj.codegen() is not self.codegen():
341
+ # We can't share functions across different codegens
342
+ obj.cached_internal_func = {}
343
+ return obj
344
+
345
+ def install_registry(self, registry):
346
+ """
347
+ Install a *registry* (a imputils.Registry instance) of function
348
+ and attribute implementations.
349
+ """
350
+ try:
351
+ loader = self._registries[registry]
352
+ except KeyError:
353
+ loader = RegistryLoader(registry)
354
+ self._registries[registry] = loader
355
+ self.insert_func_defn(loader.new_registrations("functions"))
356
+ self._insert_getattr_defn(loader.new_registrations("getattrs"))
357
+ self._insert_setattr_defn(loader.new_registrations("setattrs"))
358
+ self._insert_cast_defn(loader.new_registrations("casts"))
359
+ self._insert_get_constant_defn(loader.new_registrations("constants"))
360
+
361
+ def install_external_registry(self, registry):
362
+ """
363
+ Install only third-party registrations from a shared registry like Numba's builtin_registry.
364
+ Exclude Numba's own implementations in this case (i.e., anything from numba.* namespace).
365
+
366
+ This is useful for selectively installing third-party implementations
367
+ present in the shared builtin_registry from Numba without pulling in any CPU-specific
368
+ implementations from Numba.
369
+
370
+ Note: For getattrs/setattrs, we check the TYPE's __module__ (from the signature)
371
+ rather than the implementation's __module__, because @lower_getattr/@lower_setattr decorators
372
+ always set impl.__module__ = "numba.*" regardless of where they are called from.
373
+ """
374
+
375
+ def is_external(obj):
376
+ """Check if object is from outside numba.* namespace."""
377
+ try:
378
+ return not obj.__module__.startswith("numba.")
379
+ except AttributeError:
380
+ return True
381
+
382
+ def is_external_type_sig(sig):
383
+ """Check if type in signature is from outside numba.* namespace."""
384
+ try:
385
+ return sig and is_external(sig[0])
386
+ except (AttributeError, IndexError):
387
+ return True
388
+
389
+ try:
390
+ loader = self._registries[registry]
391
+ except KeyError:
392
+ loader = RegistryLoader(registry)
393
+ self._registries[registry] = loader
394
+
395
+ # Filter registrations
396
+ funcs = [
397
+ (impl, func, sig)
398
+ for impl, func, sig in loader.new_registrations("functions")
399
+ if is_external(impl)
400
+ ]
401
+ getattrs = [
402
+ (impl, attr, sig)
403
+ for impl, attr, sig in loader.new_registrations("getattrs")
404
+ if is_external_type_sig(sig)
405
+ ]
406
+ setattrs = [
407
+ (impl, attr, sig)
408
+ for impl, attr, sig in loader.new_registrations("setattrs")
409
+ if is_external_type_sig(sig)
410
+ ]
411
+ casts = [
412
+ (impl, sig)
413
+ for impl, sig in loader.new_registrations("casts")
414
+ if is_external(impl)
415
+ ]
416
+ constants = [
417
+ (impl, sig)
418
+ for impl, sig in loader.new_registrations("constants")
419
+ if is_external(impl)
420
+ ]
421
+
422
+ self.insert_func_defn(funcs)
423
+ self._insert_getattr_defn(getattrs)
424
+ self._insert_setattr_defn(setattrs)
425
+ self._insert_cast_defn(casts)
426
+ self._insert_get_constant_defn(constants)
427
+
428
+ def insert_func_defn(self, defns):
429
+ for impl, func, sig in defns:
430
+ self._defns[func].append(impl, sig)
431
+
432
+ def _insert_getattr_defn(self, defns):
433
+ for impl, attr, sig in defns:
434
+ self._getattrs[attr].append(impl, sig)
435
+
436
+ def _insert_setattr_defn(self, defns):
437
+ for impl, attr, sig in defns:
438
+ self._setattrs[attr].append(impl, sig)
439
+
440
+ def _insert_cast_defn(self, defns):
441
+ for impl, sig in defns:
442
+ self._casts.append(impl, sig)
443
+
444
+ def _insert_get_constant_defn(self, defns):
445
+ for impl, sig in defns:
446
+ self._get_constants.append(impl, sig)
447
+
448
+ def insert_user_function(self, func, fndesc, libs=()):
449
+ impl = user_function(fndesc, libs)
450
+ self._defns[func].append(impl, impl.signature)
451
+
452
+ def insert_generator(self, genty, gendesc, libs=()):
453
+ assert isinstance(genty, types.Generator)
454
+ impl = user_generator(gendesc, libs)
455
+ self._generators[genty] = gendesc, impl
456
+
457
+ def remove_user_function(self, func):
458
+ """
459
+ Remove user function *func*.
460
+ KeyError is raised if the function isn't known to us.
461
+ """
462
+ del self._defns[func]
463
+
464
+ def get_external_function_type(self, fndesc):
465
+ argtypes = [self.get_argument_type(aty) for aty in fndesc.argtypes]
466
+ # don't wrap in pointer
467
+ restype = self.get_argument_type(fndesc.restype)
468
+ fnty = llvmir.FunctionType(restype, argtypes)
469
+ return fnty
470
+
471
+ def declare_function(self, module, fndesc):
472
+ fnty = self.call_conv.get_function_type(fndesc.restype, fndesc.argtypes)
473
+ fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name)
474
+ self.call_conv.decorate_function(
475
+ fn, fndesc.args, fndesc.argtypes, noalias=fndesc.noalias
476
+ )
477
+ if fndesc.inline:
478
+ fn.attributes.add("alwaysinline")
479
+ # alwaysinline overrides optnone
480
+ fn.attributes.discard("noinline")
481
+ fn.attributes.discard("optnone")
482
+ return fn
483
+
484
+ def declare_external_function(self, module, fndesc):
485
+ fnty = self.get_external_function_type(fndesc)
486
+ fn = cgutils.get_or_insert_function(module, fnty, fndesc.mangled_name)
487
+ assert fn.is_declaration
488
+ for ak, av in zip(fndesc.args, fn.args):
489
+ av.name = "arg.%s" % ak
490
+ return fn
491
+
492
+ def insert_const_string(self, mod, string):
493
+ """
494
+ Insert constant *string* (a str object) into module *mod*.
495
+ """
496
+ stringtype = GENERIC_POINTER
497
+ name = ".const.%s" % string
498
+ text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00")
499
+ gv = self.insert_unique_const(mod, name, text)
500
+ return Constant.bitcast(gv, stringtype)
501
+
502
+ def insert_const_bytes(self, mod, bytes, name=None):
503
+ """
504
+ Insert constant *byte* (a `bytes` object) into module *mod*.
505
+ """
506
+ stringtype = GENERIC_POINTER
507
+ name = ".bytes.%s" % (name or hash(bytes))
508
+ text = cgutils.make_bytearray(bytes)
509
+ gv = self.insert_unique_const(mod, name, text)
510
+ return Constant.bitcast(gv, stringtype)
511
+
512
+ def insert_unique_const(self, mod, name, val):
513
+ """
514
+ Insert a unique internal constant named *name*, with LLVM value
515
+ *val*, into module *mod*.
516
+ """
517
+ try:
518
+ gv = mod.get_global(name)
519
+ except KeyError:
520
+ return cgutils.global_constant(mod, name, val)
521
+ else:
522
+ return gv
523
+
524
+ def get_argument_type(self, ty):
525
+ return self.data_model_manager[ty].get_argument_type()
526
+
527
+ def get_return_type(self, ty):
528
+ return self.data_model_manager[ty].get_return_type()
529
+
530
+ def get_data_type(self, ty):
531
+ """
532
+ Get a LLVM data representation of the Numba type *ty* that is safe
533
+ for storage. Record data are stored as byte array.
534
+
535
+ The return value is a llvmlite.ir.Type object, or None if the type
536
+ is an opaque pointer (???).
537
+ """
538
+ return self.data_model_manager[ty].get_data_type()
539
+
540
+ def get_value_type(self, ty):
541
+ return self.data_model_manager[ty].get_value_type()
542
+
543
+ def pack_value(self, builder, ty, value, ptr, align=None):
544
+ """
545
+ Pack value into the array storage at *ptr*.
546
+ If *align* is given, it is the guaranteed alignment for *ptr*
547
+ (by default, the standard ABI alignment).
548
+ """
549
+ dataval = self.data_model_manager[ty].as_data(builder, value)
550
+ builder.store(dataval, ptr, align=align)
551
+
552
+ def unpack_value(self, builder, ty, ptr, align=None):
553
+ """
554
+ Unpack value from the array storage at *ptr*.
555
+ If *align* is given, it is the guaranteed alignment for *ptr*
556
+ (by default, the standard ABI alignment).
557
+ """
558
+ dm = self.data_model_manager[ty]
559
+ return dm.load_from_data_pointer(builder, ptr, align)
560
+
561
+ def get_constant_generic(self, builder, ty, val):
562
+ """
563
+ Return a LLVM constant representing value *val* of Numba type *ty*.
564
+ """
565
+ try:
566
+ impl = self._get_constants.find((ty,))
567
+ return impl(self, builder, ty, val)
568
+ except NotImplementedError:
569
+ raise NotImplementedError(
570
+ "Cannot lower constant of type '%s'" % (ty,)
571
+ )
572
+
573
+ def get_constant(self, ty, val):
574
+ """
575
+ Same as get_constant_generic(), but without specifying *builder*.
576
+ Works only for simple types.
577
+ """
578
+ # HACK: pass builder=None to preserve get_constant() API
579
+ return self.get_constant_generic(None, ty, val)
580
+
581
+ def get_constant_undef(self, ty):
582
+ lty = self.get_value_type(ty)
583
+ return Constant(lty, llvmir.Undefined)
584
+
585
+ def get_constant_null(self, ty):
586
+ lty = self.get_value_type(ty)
587
+ return Constant(lty, None)
588
+
589
+ def get_function(self, fn, sig, _firstcall=True):
590
+ """
591
+ Return the implementation of function *fn* for signature *sig*.
592
+ The return value is a callable with the signature (builder, args).
593
+ """
594
+ assert sig is not None
595
+ sig = sig.as_function()
596
+ if isinstance(fn, types.Callable):
597
+ key = fn.get_impl_key(sig)
598
+ overloads = self._defns[key]
599
+ else:
600
+ key = fn
601
+ overloads = self._defns[key]
602
+
603
+ try:
604
+ return _wrap_impl(overloads.find(sig.args), self, sig)
605
+ except errors.NumbaNotImplementedError:
606
+ pass
607
+ if isinstance(fn, types.Type):
608
+ # It's a type instance => try to find a definition for the type class
609
+ try:
610
+ return self.get_function(type(fn), sig)
611
+ except NotImplementedError:
612
+ # Raise exception for the type instance, for a better error message
613
+ pass
614
+
615
+ # Automatically refresh the context to load new registries if we are
616
+ # calling the first time.
617
+ if _firstcall:
618
+ self.refresh()
619
+ return self.get_function(fn, sig, _firstcall=False)
620
+
621
+ raise NotImplementedError(
622
+ "No definition for lowering %s%s" % (key, sig)
623
+ )
624
+
625
+ def get_generator_desc(self, genty):
626
+ """ """
627
+ return self._generators[genty][0]
628
+
629
+ def get_generator_impl(self, genty):
630
+ """ """
631
+ res = self._generators[genty][1]
632
+ self.add_linking_libs(getattr(res, "libs", ()))
633
+ return res
634
+
635
+ def get_bound_function(self, builder, obj, ty):
636
+ assert self.get_value_type(ty) == obj.type
637
+ return obj
638
+
639
+ def get_getattr(self, typ, attr):
640
+ """
641
+ Get the getattr() implementation for the given type and attribute name.
642
+ The return value is a callable with the signature
643
+ (context, builder, typ, val, attr).
644
+ """
645
+ const_attr = (typ, attr) not in self.nonconst_module_attrs
646
+ is_module = isinstance(typ, types.Module)
647
+ if is_module and const_attr:
648
+ # Implement getattr for module-level globals that we treat as
649
+ # constants.
650
+ # XXX We shouldn't have to retype this
651
+ attrty = self.typing_context.resolve_module_constants(typ, attr)
652
+ if attrty is None or isinstance(attrty, types.Dummy):
653
+ # No implementation required for dummies (functions, modules...),
654
+ # which are dealt with later
655
+ return None
656
+ else:
657
+ pyval = getattr(typ.pymod, attr)
658
+
659
+ def imp(context, builder, typ, val, attr):
660
+ llval = self.get_constant_generic(builder, attrty, pyval)
661
+ return impl_ret_borrowed(context, builder, attrty, llval)
662
+
663
+ return imp
664
+
665
+ # Lookup specific getattr implementation for this type and attribute
666
+ overloads = self._getattrs[attr]
667
+ try:
668
+ return overloads.find((typ,))
669
+ except errors.NumbaNotImplementedError:
670
+ pass
671
+ # Lookup generic getattr implementation for this type
672
+ overloads = self._getattrs[None]
673
+ try:
674
+ return overloads.find((typ,))
675
+ except errors.NumbaNotImplementedError:
676
+ pass
677
+
678
+ raise NotImplementedError(
679
+ "No definition for lowering %s.%s" % (typ, attr)
680
+ )
681
+
682
+ def get_setattr(self, attr, sig):
683
+ """
684
+ Get the setattr() implementation for the given attribute name
685
+ and signature.
686
+ The return value is a callable with the signature (builder, args).
687
+ """
688
+ assert len(sig.args) == 2
689
+ typ = sig.args[0]
690
+ valty = sig.args[1]
691
+
692
+ def wrap_setattr(impl):
693
+ def wrapped(builder, args):
694
+ return impl(self, builder, sig, args, attr)
695
+
696
+ return wrapped
697
+
698
+ # Lookup specific setattr implementation for this type and attribute
699
+ overloads = self._setattrs[attr]
700
+ try:
701
+ return wrap_setattr(overloads.find((typ, valty)))
702
+ except errors.NumbaNotImplementedError:
703
+ pass
704
+ # Lookup generic setattr implementation for this type
705
+ overloads = self._setattrs[None]
706
+ try:
707
+ return wrap_setattr(overloads.find((typ, valty)))
708
+ except errors.NumbaNotImplementedError:
709
+ pass
710
+
711
+ raise NotImplementedError(
712
+ "No definition for lowering %s.%s = %s" % (typ, attr, valty)
713
+ )
714
+
715
+ def get_argument_value(self, builder, ty, val):
716
+ """
717
+ Argument representation to local value representation
718
+ """
719
+ return self.data_model_manager[ty].from_argument(builder, val)
720
+
721
+ def get_returned_value(self, builder, ty, val):
722
+ """
723
+ Return value representation to local value representation
724
+ """
725
+ return self.data_model_manager[ty].from_return(builder, val)
726
+
727
+ def get_return_value(self, builder, ty, val):
728
+ """
729
+ Local value representation to return type representation
730
+ """
731
+ return self.data_model_manager[ty].as_return(builder, val)
732
+
733
+ def get_value_as_argument(self, builder, ty, val):
734
+ """Prepare local value representation as argument type representation"""
735
+ return self.data_model_manager[ty].as_argument(builder, val)
736
+
737
+ def get_value_as_data(self, builder, ty, val):
738
+ return self.data_model_manager[ty].as_data(builder, val)
739
+
740
+ def get_data_as_value(self, builder, ty, val):
741
+ return self.data_model_manager[ty].from_data(builder, val)
742
+
743
+ def pair_first(self, builder, val, ty):
744
+ """
745
+ Extract the first element of a heterogeneous pair.
746
+ """
747
+ pair = self.make_helper(builder, ty, val)
748
+ return pair.first
749
+
750
+ def pair_second(self, builder, val, ty):
751
+ """
752
+ Extract the second element of a heterogeneous pair.
753
+ """
754
+ pair = self.make_helper(builder, ty, val)
755
+ return pair.second
756
+
757
+ def cast(self, builder, val, fromty, toty):
758
+ """
759
+ Cast a value of type *fromty* to type *toty*.
760
+ This implements implicit conversions as can happen due to the
761
+ granularity of the Numba type system, or lax Python semantics.
762
+ """
763
+ if fromty is types._undef_var:
764
+ # Special case for undefined variable
765
+ return self.get_constant_null(toty)
766
+ elif fromty == toty or toty == types.Any:
767
+ return val
768
+ try:
769
+ impl = self._casts.find((fromty, toty))
770
+ return impl(self, builder, fromty, toty, val)
771
+ except errors.NumbaNotImplementedError:
772
+ raise errors.NumbaNotImplementedError(
773
+ "Cannot cast %s to %s: %s" % (fromty, toty, val)
774
+ )
775
+
776
+ def generic_compare(self, builder, key, argtypes, args):
777
+ """
778
+ Compare the given LLVM values of the given Numba types using
779
+ the comparison *key* (e.g. '=='). The values are first cast to
780
+ a common safe conversion type.
781
+ """
782
+ at, bt = argtypes
783
+ av, bv = args
784
+ ty = self.typing_context.unify_types(at, bt)
785
+ assert ty is not None
786
+ cav = self.cast(builder, av, at, ty)
787
+ cbv = self.cast(builder, bv, bt, ty)
788
+ fnty = self.typing_context.resolve_value_type(key)
789
+ # the sig is homogeneous in the unified casted type
790
+ cmpsig = fnty.get_call_type(self.typing_context, (ty, ty), {})
791
+ cmpfunc = self.get_function(fnty, cmpsig)
792
+ self.add_linking_libs(getattr(cmpfunc, "libs", ()))
793
+ return cmpfunc(builder, (cav, cbv))
794
+
795
+ def make_optional_none(self, builder, valtype):
796
+ optval = self.make_helper(builder, types.Optional(valtype))
797
+ optval.valid = cgutils.false_bit
798
+ return optval._getvalue()
799
+
800
+ def make_optional_value(self, builder, valtype, value):
801
+ optval = self.make_helper(builder, types.Optional(valtype))
802
+ optval.valid = cgutils.true_bit
803
+ optval.data = value
804
+ return optval._getvalue()
805
+
806
+ def is_true(self, builder, typ, val):
807
+ """
808
+ Return the truth value of a value of the given Numba type.
809
+ """
810
+ fnty = self.typing_context.resolve_value_type(bool)
811
+ sig = fnty.get_call_type(self.typing_context, (typ,), {})
812
+ impl = self.get_function(fnty, sig)
813
+ return impl(builder, (val,))
814
+
815
+ def get_c_value(self, builder, typ, name, dllimport=False):
816
+ """
817
+ Get a global value through its C-accessible *name*, with the given
818
+ LLVM type.
819
+ If *dllimport* is true, the symbol will be marked as imported
820
+ from a DLL (necessary for AOT compilation under Windows).
821
+ """
822
+ module = builder.function.module
823
+ try:
824
+ gv = module.globals[name]
825
+ except KeyError:
826
+ gv = cgutils.add_global_variable(module, typ, name)
827
+ if dllimport and self.aot_mode and sys.platform == "win32":
828
+ gv.storage_class = "dllimport"
829
+ return gv
830
+
831
+ def call_external_function(self, builder, callee, argtys, args):
832
+ args = [
833
+ self.get_value_as_argument(builder, ty, arg)
834
+ for ty, arg in zip(argtys, args)
835
+ ]
836
+ retval = builder.call(callee, args)
837
+ return retval
838
+
839
+ def get_function_pointer_type(self, typ):
840
+ return self.data_model_manager[typ].get_data_type()
841
+
842
+ def call_function_pointer(self, builder, funcptr, args, cconv=None):
843
+ return builder.call(funcptr, args, cconv=cconv)
844
+
845
+ def print_string(self, builder, text):
846
+ mod = builder.module
847
+ cstring = GENERIC_POINTER
848
+ fnty = llvmir.FunctionType(llvmir.IntType(32), [cstring])
849
+ puts = cgutils.get_or_insert_function(mod, fnty, "puts")
850
+ return builder.call(puts, [text])
851
+
852
+ def debug_print(self, builder, text):
853
+ mod = builder.module
854
+ cstr = self.insert_const_string(mod, str(text))
855
+ self.print_string(builder, cstr)
856
+
857
+ def printf(self, builder, format_string, *args):
858
+ mod = builder.module
859
+ if isinstance(format_string, str):
860
+ cstr = self.insert_const_string(mod, format_string)
861
+ else:
862
+ cstr = format_string
863
+ fnty = llvmir.FunctionType(
864
+ llvmir.IntType(32), (GENERIC_POINTER,), var_arg=True
865
+ )
866
+ fn = cgutils.get_or_insert_function(mod, fnty, "printf")
867
+ return builder.call(fn, (cstr,) + tuple(args))
868
+
869
+ def get_struct_type(self, struct):
870
+ """
871
+ Get the LLVM struct type for the given Structure class *struct*.
872
+ """
873
+ fields = [self.get_value_type(v) for _, v in struct._fields]
874
+ return llvmir.LiteralStructType(fields)
875
+
876
+ def get_dummy_value(self):
877
+ return Constant(self.get_dummy_type(), None)
878
+
879
+ def get_dummy_type(self):
880
+ return GENERIC_POINTER
881
+
882
+ def _compile_subroutine_no_cache(
883
+ self, builder, impl, sig, locals={}, flags=None
884
+ ):
885
+ """
886
+ Invoke the compiler to compile a function to be used inside a
887
+ nopython function, but without generating code to call that
888
+ function.
889
+
890
+ Note this context's flags are not inherited.
891
+ """
892
+ # Compile
893
+ from numba.cuda import compiler
894
+ from numba.cuda.flags import Flags
895
+
896
+ with global_compiler_lock:
897
+ codegen = self.codegen()
898
+ library = codegen.create_library(impl.__name__)
899
+ if flags is None:
900
+ cstk = targetconfig.ConfigStack()
901
+ flags = Flags()
902
+ if cstk:
903
+ tls_flags = cstk.top()
904
+ if tls_flags.is_set("nrt") and tls_flags.nrt:
905
+ flags.nrt = True
906
+
907
+ flags.no_compile = True
908
+ flags.no_cpython_wrapper = True
909
+ flags.no_cfunc_wrapper = True
910
+
911
+ cres = compiler.compile_internal(
912
+ self.typing_context,
913
+ self,
914
+ library,
915
+ impl,
916
+ sig.args,
917
+ sig.return_type,
918
+ flags,
919
+ locals=locals,
920
+ )
921
+
922
+ # Allow inlining the function inside callers.
923
+ self.active_code_library.add_linking_library(cres.library)
924
+ return cres
925
+
926
+ def compile_subroutine(
927
+ self, builder, impl, sig, locals={}, flags=None, caching=True
928
+ ):
929
+ """
930
+ Compile the function *impl* for the given *sig* (in nopython mode).
931
+ Return an instance of CompileResult.
932
+
933
+ If *caching* evaluates True, the function keeps the compiled function
934
+ for reuse in *.cached_internal_func*.
935
+ """
936
+ cache_key = (impl.__code__, sig, type(self.error_model))
937
+ if not caching:
938
+ cached = None
939
+ else:
940
+ if impl.__closure__:
941
+ # XXX This obviously won't work if a cell's value is
942
+ # unhashable.
943
+ cache_key += tuple(c.cell_contents for c in impl.__closure__)
944
+ cached = self.cached_internal_func.get(cache_key)
945
+ if cached is None:
946
+ cres = self._compile_subroutine_no_cache(
947
+ builder, impl, sig, locals=locals, flags=flags
948
+ )
949
+ self.cached_internal_func[cache_key] = cres
950
+
951
+ cres = self.cached_internal_func[cache_key]
952
+ # Allow inlining the function inside callers.
953
+ self.active_code_library.add_linking_library(cres.library)
954
+ return cres
955
+
956
+ def compile_internal(self, builder, impl, sig, args, locals={}):
957
+ """
958
+ Like compile_subroutine(), but also call the function with the given
959
+ *args*.
960
+ """
961
+ cres = self.compile_subroutine(builder, impl, sig, locals)
962
+ return self.call_internal(builder, cres.fndesc, sig, args)
963
+
964
+ def call_internal(self, builder, fndesc, sig, args):
965
+ """
966
+ Given the function descriptor of an internally compiled function,
967
+ emit a call to that function with the given arguments.
968
+ """
969
+ status, res = self.call_internal_no_propagate(
970
+ builder, fndesc, sig, args
971
+ )
972
+ with cgutils.if_unlikely(builder, status.is_error):
973
+ self.call_conv.return_status_propagate(builder, status)
974
+
975
+ res = imputils.fix_returning_optional(self, builder, sig, status, res)
976
+ return res
977
+
978
+ def call_internal_no_propagate(self, builder, fndesc, sig, args):
979
+ """Similar to `.call_internal()` but does not handle or propagate
980
+ the return status automatically.
981
+ """
982
+ # Add call to the generated function
983
+ llvm_mod = builder.module
984
+ fn = self.declare_function(llvm_mod, fndesc)
985
+ status, res = self.call_conv.call_function(
986
+ builder, fn, sig.return_type, sig.args, args
987
+ )
988
+ return status, res
989
+
990
+ def call_unresolved(self, builder, name, sig, args):
991
+ """
992
+ Insert a function call to an unresolved symbol with the given *name*.
993
+
994
+ Note: this is used for recursive call.
995
+
996
+ In the mutual recursion case::
997
+
998
+ @njit
999
+ def foo(): ... # calls bar()
1000
+
1001
+
1002
+ @njit
1003
+ def bar(): ... # calls foo()
1004
+
1005
+
1006
+ foo()
1007
+
1008
+ When foo() is called, the compilation of bar() is fully completed
1009
+ (codegen'ed and loaded) before foo() is. Since MCJIT's eager compilation
1010
+ doesn't allow loading modules with declare-only functions (which is
1011
+ needed for foo() in bar()), the call_unresolved injects a global
1012
+ variable that the "linker" can update even after the module is loaded by
1013
+ MCJIT. The linker would allocate space for the global variable before
1014
+ the bar() module is loaded. When later foo() module is defined, it will
1015
+ update bar()'s reference to foo().
1016
+
1017
+ The legacy lazy JIT and the new ORC JIT would allow a declare-only
1018
+ function be used in a module as long as it is defined by the time of its
1019
+ first use.
1020
+ """
1021
+ # Insert an unresolved reference to the function being called.
1022
+ codegen = self.codegen()
1023
+ fnty = self.call_conv.get_function_type(sig.return_type, sig.args)
1024
+ fn = codegen.insert_unresolved_ref(builder, fnty, name)
1025
+ # Normal call sequence
1026
+ status, res = self.call_conv.call_function(
1027
+ builder, fn, sig.return_type, sig.args, args
1028
+ )
1029
+ with cgutils.if_unlikely(builder, status.is_error):
1030
+ self.call_conv.return_status_propagate(builder, status)
1031
+
1032
+ res = imputils.fix_returning_optional(self, builder, sig, status, res)
1033
+ return res
1034
+
1035
+ def get_executable(self, func, fndesc, env):
1036
+ raise NotImplementedError
1037
+
1038
+ def get_python_api(self, builder):
1039
+ return PythonAPI(self, builder)
1040
+
1041
+ def sentry_record_alignment(self, rectyp, attr):
1042
+ """
1043
+ Assumes offset starts from a properly aligned location
1044
+ """
1045
+ if self.strict_alignment:
1046
+ offset = rectyp.offset(attr)
1047
+ elemty = rectyp.typeof(attr)
1048
+ if isinstance(elemty, types.NestedArray):
1049
+ # For a NestedArray we need to consider the data type of
1050
+ # elements of the array for alignment, not the array structure
1051
+ # itself
1052
+ elemty = elemty.dtype
1053
+ align = self.get_abi_alignment(self.get_data_type(elemty))
1054
+ if offset % align:
1055
+ msg = "{rec}.{attr} of type {type} is not aligned".format(
1056
+ rec=rectyp, attr=attr, type=elemty
1057
+ )
1058
+ raise errors.NumbaTypeError(msg)
1059
+
1060
+ def get_helper_class(self, typ, kind="value"):
1061
+ """
1062
+ Get a helper class for the given *typ*.
1063
+ """
1064
+ # XXX handle all types: complex, array, etc.
1065
+ # XXX should it be a method on the model instead? this would allow a default kind...
1066
+ return cgutils.create_struct_proxy(typ, kind)
1067
+
1068
+ def _make_helper(self, builder, typ, value=None, ref=None, kind="value"):
1069
+ cls = self.get_helper_class(typ, kind)
1070
+ return cls(self, builder, value=value, ref=ref)
1071
+
1072
+ def make_helper(self, builder, typ, value=None, ref=None):
1073
+ """
1074
+ Get a helper object to access the *typ*'s members,
1075
+ for the given value or reference.
1076
+ """
1077
+ return self._make_helper(builder, typ, value, ref, kind="value")
1078
+
1079
+ def make_data_helper(self, builder, typ, ref=None):
1080
+ """
1081
+ As make_helper(), but considers the value as stored in memory,
1082
+ rather than a live value.
1083
+ """
1084
+ return self._make_helper(builder, typ, ref=ref, kind="data")
1085
+
1086
+ def make_array(self, typ):
1087
+ from numba.cuda.np import arrayobj
1088
+
1089
+ return arrayobj.make_array(typ)
1090
+
1091
+ def populate_array(self, arr, **kwargs):
1092
+ """
1093
+ Populate array structure.
1094
+ """
1095
+ from numba.cuda.np import arrayobj
1096
+
1097
+ return arrayobj.populate_array(arr, **kwargs)
1098
+
1099
+ def make_complex(self, builder, typ, value=None):
1100
+ """
1101
+ Get a helper object to access the given complex numbers' members.
1102
+ """
1103
+ assert isinstance(typ, types.Complex), typ
1104
+ return self.make_helper(builder, typ, value)
1105
+
1106
+ def make_tuple(self, builder, typ, values):
1107
+ """
1108
+ Create a tuple of the given *typ* containing the *values*.
1109
+ """
1110
+ tup = self.get_constant_undef(typ)
1111
+ for i, val in enumerate(values):
1112
+ tup = builder.insert_value(tup, val, i)
1113
+ return tup
1114
+
1115
+ def make_constant_array(self, builder, typ, ary):
1116
+ """
1117
+ Create an array structure reifying the given constant array.
1118
+ A low-level contiguous array constant is created in the LLVM IR.
1119
+ """
1120
+ datatype = self.get_data_type(typ.dtype)
1121
+ # don't freeze ary of non-contig or bigger than 1MB
1122
+ size_limit = 10**6
1123
+
1124
+ if self.allow_dynamic_globals and (
1125
+ typ.layout not in "FC" or ary.nbytes > size_limit
1126
+ ):
1127
+ # get pointer from the ary
1128
+ dataptr = ary.ctypes.data
1129
+ data = self.add_dynamic_addr(
1130
+ builder, dataptr, info=str(type(dataptr))
1131
+ )
1132
+ rt_addr = self.add_dynamic_addr(
1133
+ builder, id(ary), info=str(type(ary))
1134
+ )
1135
+ else:
1136
+ # Handle data: reify the flattened array in "C" or "F" order as a
1137
+ # global array of bytes.
1138
+ flat = ary.flatten(order=typ.layout)
1139
+ # Note: we use `bytearray(flat.data)` instead of `bytearray(flat)` to
1140
+ # workaround issue #1850 which is due to numpy issue #3147
1141
+ consts = cgutils.create_constant_array(
1142
+ llvmir.IntType(8), bytearray(flat.data)
1143
+ )
1144
+ data = cgutils.global_constant(builder, ".const.array.data", consts)
1145
+ # Ensure correct data alignment (issue #1933)
1146
+ data.align = self.get_abi_alignment(datatype)
1147
+ # No reference to parent ndarray
1148
+ rt_addr = None
1149
+
1150
+ # Handle shape
1151
+ llintp = self.get_value_type(types.intp)
1152
+ shapevals = [self.get_constant(types.intp, s) for s in ary.shape]
1153
+ cshape = cgutils.create_constant_array(llintp, shapevals)
1154
+
1155
+ # Handle strides
1156
+ stridevals = [self.get_constant(types.intp, s) for s in ary.strides]
1157
+ cstrides = cgutils.create_constant_array(llintp, stridevals)
1158
+
1159
+ # Create array structure
1160
+ cary = self.make_array(typ)(self, builder)
1161
+
1162
+ intp_itemsize = self.get_constant(types.intp, ary.dtype.itemsize)
1163
+ self.populate_array(
1164
+ cary,
1165
+ data=builder.bitcast(data, cary.data.type),
1166
+ shape=cshape,
1167
+ strides=cstrides,
1168
+ itemsize=intp_itemsize,
1169
+ parent=rt_addr,
1170
+ meminfo=None,
1171
+ )
1172
+
1173
+ return cary._getvalue()
1174
+
1175
+ def add_dynamic_addr(self, builder, intaddr, info):
1176
+ """
1177
+ Returns dynamic address as a void pointer `i8*`.
1178
+
1179
+ Internally, a global variable is added to inform the lowerer about
1180
+ the usage of dynamic addresses. Caching will be disabled.
1181
+ """
1182
+ assert self.allow_dynamic_globals, "dyn globals disabled in this target"
1183
+ assert isinstance(intaddr, int), "dyn addr not of int type"
1184
+ mod = builder.module
1185
+ llvoidptr = self.get_value_type(types.voidptr)
1186
+ addr = self.get_constant(types.uintp, intaddr).inttoptr(llvoidptr)
1187
+ # Use a unique name by embedding the address value
1188
+ symname = "numba.dynamic.globals.{:x}".format(intaddr)
1189
+ gv = cgutils.add_global_variable(mod, llvoidptr, symname)
1190
+ # Use linkonce linkage to allow merging with other GV of the same name.
1191
+ # And, avoid optimization from assuming its value.
1192
+ gv.linkage = "linkonce"
1193
+ gv.initializer = addr
1194
+ return builder.load(gv)
1195
+
1196
+ def get_abi_sizeof(self, ty):
1197
+ """
1198
+ Get the ABI size of LLVM type *ty*.
1199
+ """
1200
+ assert isinstance(ty, llvmir.Type), "Expected LLVM type"
1201
+ return ty.get_abi_size(self.target_data)
1202
+
1203
+ def get_abi_alignment(self, ty):
1204
+ """
1205
+ Get the ABI alignment of LLVM type *ty*.
1206
+ """
1207
+ assert isinstance(ty, llvmir.Type), "Expected LLVM type"
1208
+ return ty.get_abi_alignment(self.target_data)
1209
+
1210
+ def get_preferred_array_alignment(context, ty):
1211
+ """
1212
+ Get preferred array alignment for Numba type *ty*.
1213
+ """
1214
+ # AVX prefers 32-byte alignment
1215
+ return 32
1216
+
1217
+ def post_lowering(self, mod, library):
1218
+ """Run target specific post-lowering transformation here."""
1219
+
1220
+ def create_module(self, name):
1221
+ """Create a LLVM module
1222
+
1223
+ The default implementation in BaseContext always raises a
1224
+ ``NotImplementedError`` exception. Subclasses should implement
1225
+ this method.
1226
+ """
1227
+ raise NotImplementedError
1228
+
1229
+ @property
1230
+ def active_code_library(self):
1231
+ """Get the active code library"""
1232
+ return self._codelib_stack[-1]
1233
+
1234
+ @contextmanager
1235
+ def push_code_library(self, lib):
1236
+ """Push the active code library for the context"""
1237
+ self._codelib_stack.append(lib)
1238
+ try:
1239
+ yield
1240
+ finally:
1241
+ self._codelib_stack.pop()
1242
+
1243
+ def add_linking_libs(self, libs):
1244
+ """Add iterable of linking libraries to the *active_code_library*."""
1245
+ colib = self.active_code_library
1246
+ for lib in libs:
1247
+ colib.add_linking_library(lib)
1248
+
1249
+ def get_ufunc_info(self, ufunc_key):
1250
+ """Get the ufunc implementation for a given ufunc object.
1251
+
1252
+ The default implementation in BaseContext always raises a
1253
+ ``NotImplementedError`` exception. Subclasses may raise ``KeyError``
1254
+ to signal that the given ``ufunc_key`` is not available.
1255
+
1256
+ Parameters
1257
+ ----------
1258
+ ufunc_key : NumPy ufunc
1259
+
1260
+ Returns
1261
+ -------
1262
+ res : dict[str, callable]
1263
+ A mapping of a NumPy ufunc type signature to a lower-level
1264
+ implementation.
1265
+ """
1266
+ raise NotImplementedError(f"{self} does not support ufunc")
1267
+
1268
+
1269
+ class _wrap_impl(object):
1270
+ """
1271
+ A wrapper object to call an implementation function with some predefined
1272
+ (context, signature) arguments.
1273
+ The wrapper also forwards attribute queries, which is important.
1274
+ """
1275
+
1276
+ def __init__(self, imp, context, sig):
1277
+ self._callable = _wrap_missing_loc(imp)
1278
+ self._imp = self._callable()
1279
+ self._context = context
1280
+ self._sig = sig
1281
+
1282
+ def __call__(self, builder, args, loc=None):
1283
+ res = self._imp(self._context, builder, self._sig, args, loc=loc)
1284
+ self._context.add_linking_libs(getattr(self, "libs", ()))
1285
+ return res
1286
+
1287
+ def __getattr__(self, item):
1288
+ return getattr(self._imp, item)
1289
+
1290
+ def __repr__(self):
1291
+ return "<wrapped %s>" % repr(self._callable)
1292
+
1293
+
1294
+ def _has_loc(fn):
1295
+ """Does function *fn* take ``loc`` argument?"""
1296
+ sig = utils.pysignature(fn)
1297
+ return "loc" in sig.parameters
1298
+
1299
+
1300
+ class _wrap_missing_loc(object):
1301
+ def __init__(self, fn):
1302
+ self.func = fn # store this to help with debug
1303
+
1304
+ def __call__(self):
1305
+ """Wrap function for missing ``loc`` keyword argument.
1306
+ Otherwise, return the original *fn*.
1307
+ """
1308
+ fn = self.func
1309
+ if not _has_loc(fn):
1310
+
1311
+ def wrapper(*args, **kwargs):
1312
+ kwargs.pop("loc") # drop unused loc
1313
+ return fn(*args, **kwargs)
1314
+
1315
+ # Copy the following attributes from the wrapped.
1316
+ # Following similar implementation as functools.wraps but
1317
+ # ignore attributes if not available (i.e fix py2.7)
1318
+ attrs = "__name__", "libs"
1319
+ for attr in attrs:
1320
+ try:
1321
+ val = getattr(fn, attr)
1322
+ except AttributeError:
1323
+ pass
1324
+ else:
1325
+ setattr(wrapper, attr, val)
1326
+
1327
+ return wrapper
1328
+ else:
1329
+ return fn
1330
+
1331
+ def __repr__(self):
1332
+ return "<wrapped %s>" % self.func