numba-cuda 0.22.0__cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-312-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-312-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-312-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-312-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-312-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,505 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import re
5
+ from functools import cached_property
6
+ import llvmlite.binding as ll
7
+ from llvmlite import ir
8
+ import warnings
9
+ import importlib.util
10
+ import numpy as np
11
+
12
+ from numba.cuda import types
13
+ from numba.cuda import HAS_NUMBA
14
+ from numba.cuda.core.compiler_lock import global_compiler_lock
15
+ from numba.cuda.core.errors import NumbaWarning
16
+ from numba.cuda.core.base import BaseContext
17
+ from numba.cuda.typing import cmathdecl
18
+ from numba.cuda import datamodel
19
+
20
+ from .cudadrv import nvvm
21
+ from numba.cuda import (
22
+ cgutils,
23
+ itanium_mangler,
24
+ compiler,
25
+ codegen,
26
+ ufuncs,
27
+ typing,
28
+ )
29
+ from numba.cuda.debuginfo import CUDADIBuilder
30
+ from numba.cuda.flags import CUDAFlags
31
+ from numba.cuda.models import cuda_data_manager
32
+ from numba.cuda.core.callconv import BaseCallConv, MinimalCallConv
33
+ from numba.cuda.core import config, targetconfig
34
+
35
+
36
+ # -----------------------------------------------------------------------------
37
+ # Typing
38
+
39
+
40
+ class CUDATypingContext(typing.BaseContext):
41
+ def load_additional_registries(self):
42
+ from . import (
43
+ cudadecl,
44
+ cudamath,
45
+ fp16,
46
+ bf16,
47
+ libdevicedecl,
48
+ vector_types,
49
+ )
50
+ from numba.cuda.typing import enumdecl, cffi_utils, npydecl
51
+
52
+ self.install_registry(cudadecl.registry)
53
+ self.install_registry(cffi_utils.registry)
54
+ self.install_registry(cudamath.registry)
55
+ self.install_registry(cmathdecl.registry)
56
+ self.install_registry(libdevicedecl.registry)
57
+ self.install_registry(npydecl.registry)
58
+ self.install_registry(enumdecl.registry)
59
+ self.install_registry(vector_types.typing_registry)
60
+ self.install_registry(fp16.typing_registry)
61
+ self.install_registry(bf16.typing_registry)
62
+
63
+ def resolve_value_type(self, val):
64
+ # treat other dispatcher object as another device function
65
+ from numba.cuda.dispatcher import CUDADispatcher
66
+ from numba.core.dispatcher import Dispatcher
67
+
68
+ if HAS_NUMBA:
69
+ if isinstance(val, Dispatcher) and not isinstance(
70
+ val, CUDADispatcher
71
+ ):
72
+ try:
73
+ # use cached device function
74
+ val = val.__dispatcher
75
+ except AttributeError:
76
+ if not val._can_compile:
77
+ raise ValueError(
78
+ "using cpu function on device "
79
+ "but its compilation is disabled"
80
+ )
81
+ targetoptions = val.targetoptions.copy()
82
+ targetoptions["device"] = True
83
+ targetoptions["debug"] = targetoptions.get("debug", False)
84
+ targetoptions["opt"] = targetoptions.get("opt", True)
85
+ disp = CUDADispatcher(val.py_func, targetoptions)
86
+ # cache the device function for future use and to avoid
87
+ # duplicated copy of the same function.
88
+ val.__dispatcher = disp
89
+ val = disp
90
+
91
+ # continue with parent logic
92
+ return super(CUDATypingContext, self).resolve_value_type(val)
93
+
94
+ def can_convert(self, fromty, toty):
95
+ """
96
+ Check whether conversion is possible from *fromty* to *toty*.
97
+ If successful, return a numba.cuda.typeconv.Conversion instance;
98
+ otherwise None is returned.
99
+ """
100
+
101
+ # This implementation works around the issue addressed in Numba PR
102
+ # #10047, "Fix IntEnumMember.can_convert_to() when no conversions
103
+ # found", https://github.com/numba/numba/pull/10047.
104
+ #
105
+ # This should be gated on the version of Numba that the fix is
106
+ # incorporated into, and eventually removed when the minimum supported
107
+ # Numba version includes the fix.
108
+
109
+ try:
110
+ return super().can_convert(fromty, toty)
111
+ except TypeError:
112
+ if isinstance(fromty, types.IntEnumMember):
113
+ # IntEnumMember fails to correctly handle impossible
114
+ # conversions - in this scenario the correct thing to do is to
115
+ # return None to signal that the conversion was not possible
116
+ return None
117
+ else:
118
+ # Any failure involving conversion from a non-IntEnumMember is
119
+ # almost certainly a real and separate issue
120
+ raise
121
+
122
+
123
+ # -----------------------------------------------------------------------------
124
+ # Implementation
125
+
126
+
127
+ VALID_CHARS = re.compile(r"[^a-z0-9]", re.I)
128
+
129
+
130
+ class CUDATargetContext(BaseContext):
131
+ implement_powi_as_math_call = True
132
+ strict_alignment = True
133
+
134
+ def __init__(self, typingctx, target="cuda"):
135
+ super().__init__(typingctx, target)
136
+ self.data_model_manager = cuda_data_manager.chain(
137
+ datamodel.default_manager
138
+ )
139
+
140
+ @property
141
+ def enable_nrt(self):
142
+ return getattr(config, "CUDA_ENABLE_NRT", False)
143
+
144
+ @property
145
+ def DIBuilder(self):
146
+ return CUDADIBuilder
147
+
148
+ @property
149
+ def enable_boundscheck(self):
150
+ # Unconditionally disabled
151
+ return False
152
+
153
+ # Overrides
154
+ def create_module(self, name):
155
+ return self._internal_codegen._create_empty_module(name)
156
+
157
+ def init(self):
158
+ self._internal_codegen = codegen.JITCUDACodegen("numba.cuda.jit")
159
+ self._target_data = None
160
+
161
+ def load_additional_registries(self):
162
+ # side effect of import needed for numba.cuda.cpython.*, the builtins
163
+ # registry is updated at import time.
164
+ from numba.cuda.cpython import (
165
+ numbers,
166
+ slicing,
167
+ iterators,
168
+ listobj,
169
+ unicode,
170
+ charseq,
171
+ cmathimpl,
172
+ mathimpl,
173
+ tupleobj,
174
+ rangeobj,
175
+ enumimpl,
176
+ )
177
+ from numba.cuda.cpython import builtins as cpython_builtins
178
+ from numba.cuda.core import optional # noqa: F401
179
+ from numba.cuda.misc import cffiimpl
180
+ from numba.cuda.np import arrayobj, npdatetime, polynomial, arraymath
181
+
182
+ from . import (
183
+ cudaimpl,
184
+ fp16,
185
+ printimpl,
186
+ libdeviceimpl,
187
+ mathimpl as cuda_mathimpl,
188
+ vector_types,
189
+ bf16,
190
+ )
191
+
192
+ # fix for #8940
193
+ from numba.cuda.np.unsafe import ndarray # noqa F401
194
+
195
+ self.install_registry(cudaimpl.registry)
196
+ self.install_registry(cffiimpl.registry)
197
+ self.install_registry(printimpl.registry)
198
+ self.install_registry(libdeviceimpl.registry)
199
+ self.install_registry(cmathimpl.registry)
200
+ self.install_registry(mathimpl.registry)
201
+ self.install_registry(numbers.registry)
202
+ self.install_registry(optional.registry)
203
+ self.install_registry(cuda_mathimpl.registry)
204
+ self.install_registry(vector_types.impl_registry)
205
+ self.install_registry(fp16.target_registry)
206
+ self.install_registry(bf16.target_registry)
207
+ self.install_registry(slicing.registry)
208
+ self.install_registry(iterators.registry)
209
+ self.install_registry(listobj.registry)
210
+ self.install_registry(unicode.registry)
211
+ self.install_registry(charseq.registry)
212
+ self.install_registry(tupleobj.registry)
213
+ self.install_registry(rangeobj.registry)
214
+ self.install_registry(enumimpl.registry)
215
+ self.install_registry(cpython_builtins.registry)
216
+
217
+ # install np registries
218
+ self.install_registry(polynomial.registry)
219
+ self.install_registry(npdatetime.registry)
220
+ self.install_registry(arrayobj.registry)
221
+ self.install_registry(arraymath.registry)
222
+
223
+ # Install only implementations that are defined outside of numba (i.e.,
224
+ # in third-party extensions) from Numba's builtin_registry.
225
+ if importlib.util.find_spec("numba.core.imputils") is not None:
226
+ from numba.core.imputils import builtin_registry
227
+
228
+ self.install_external_registry(builtin_registry)
229
+
230
+ def codegen(self):
231
+ return self._internal_codegen
232
+
233
+ @property
234
+ def target_data(self):
235
+ if self._target_data is None:
236
+ self._target_data = ll.create_target_data(nvvm.NVVM().data_layout)
237
+ return self._target_data
238
+
239
+ def build_list(self, builder, list_type, items):
240
+ """
241
+ Build a list from the Numba *list_type* and its initial *items*.
242
+ """
243
+ from numba.cuda.cpython import listobj
244
+
245
+ return listobj.build_list(self, builder, list_type, items)
246
+
247
+ @cached_property
248
+ def nonconst_module_attrs(self):
249
+ """
250
+ Some CUDA intrinsics are at the module level, but cannot be treated as
251
+ constants, because they are loaded from a special register in the PTX.
252
+ These include threadIdx, blockDim, etc.
253
+ """
254
+ from numba import cuda
255
+
256
+ nonconsts = (
257
+ "threadIdx",
258
+ "blockDim",
259
+ "blockIdx",
260
+ "gridDim",
261
+ "laneid",
262
+ "warpsize",
263
+ )
264
+ nonconsts_with_mod = tuple(
265
+ [(types.Module(cuda), nc) for nc in nonconsts]
266
+ )
267
+ return nonconsts_with_mod
268
+
269
+ @cached_property
270
+ def call_conv(self):
271
+ return CUDACallConv(self)
272
+
273
+ def mangler(self, name, argtypes, *, abi_tags=(), uid=None):
274
+ return itanium_mangler.mangle(
275
+ name, argtypes, abi_tags=abi_tags, uid=uid
276
+ )
277
+
278
+ def make_constant_array(self, builder, aryty, arr):
279
+ """
280
+ Unlike the parent version. This returns a a pointer in the constant
281
+ addrspace.
282
+ """
283
+
284
+ # Ensure we have a contiguous buffer with non-negative strides. views with
285
+ # negative strides must be materialized so that the
286
+ # constant bytes and the data pointer/strides are consistent.
287
+ if any(s < 0 for s in arr.strides) or not (
288
+ arr.flags.c_contiguous or arr.flags.f_contiguous
289
+ ):
290
+ arr = np.ascontiguousarray(arr)
291
+
292
+ lmod = builder.module
293
+
294
+ constvals = [
295
+ self.get_constant(types.byte, i)
296
+ for i in iter(arr.tobytes(order="A"))
297
+ ]
298
+ constaryty = ir.ArrayType(ir.IntType(8), len(constvals))
299
+ constary = ir.Constant(constaryty, constvals)
300
+
301
+ addrspace = nvvm.ADDRSPACE_CONSTANT
302
+ gv = cgutils.add_global_variable(
303
+ lmod, constary.type, "_cudapy_cmem", addrspace=addrspace
304
+ )
305
+ gv.linkage = "internal"
306
+ gv.global_constant = True
307
+ gv.initializer = constary
308
+
309
+ # Preserve the underlying alignment
310
+ lldtype = self.get_data_type(aryty.dtype)
311
+ align = self.get_abi_sizeof(lldtype)
312
+ gv.align = 2 ** (align - 1).bit_length()
313
+
314
+ # Convert to generic address-space
315
+ ptrty = ir.PointerType(ir.IntType(8))
316
+ genptr = builder.addrspacecast(gv, ptrty, "generic")
317
+
318
+ # Create array object
319
+ ary = self.make_array(aryty)(self, builder)
320
+ kshape = [self.get_constant(types.intp, s) for s in arr.shape]
321
+ kstrides = [self.get_constant(types.intp, s) for s in arr.strides]
322
+ self.populate_array(
323
+ ary,
324
+ data=builder.bitcast(genptr, ary.data.type),
325
+ shape=kshape,
326
+ strides=kstrides,
327
+ itemsize=ary.itemsize,
328
+ parent=ary.parent,
329
+ meminfo=None,
330
+ )
331
+
332
+ return ary._getvalue()
333
+
334
+ def insert_const_string(self, mod, string):
335
+ """
336
+ Unlike the parent version. This returns a a pointer in the constant
337
+ addrspace.
338
+ """
339
+ text = cgutils.make_bytearray(string.encode("utf-8") + b"\x00")
340
+ name = "$".join(
341
+ ["__conststring__", itanium_mangler.mangle_identifier(string)]
342
+ )
343
+ # Try to reuse existing global
344
+ gv = mod.globals.get(name)
345
+ if gv is None:
346
+ # Not defined yet
347
+ gv = cgutils.add_global_variable(
348
+ mod, text.type, name, addrspace=nvvm.ADDRSPACE_CONSTANT
349
+ )
350
+ gv.linkage = "internal"
351
+ gv.global_constant = True
352
+ gv.initializer = text
353
+
354
+ # Cast to a i8* pointer
355
+ charty = gv.type.pointee.element
356
+ return gv.bitcast(charty.as_pointer(nvvm.ADDRSPACE_CONSTANT))
357
+
358
+ def insert_string_const_addrspace(self, builder, string):
359
+ """
360
+ Insert a constant string in the constant addresspace and return a
361
+ generic i8 pointer to the data.
362
+
363
+ This function attempts to deduplicate.
364
+ """
365
+ lmod = builder.module
366
+ gv = self.insert_const_string(lmod, string)
367
+ charptrty = ir.PointerType(ir.IntType(8))
368
+ return builder.addrspacecast(gv, charptrty, "generic")
369
+
370
+ def optimize_function(self, func):
371
+ """Run O1 function passes"""
372
+ pass
373
+ ## XXX skipped for now
374
+ # fpm = lp.FunctionPassManager.new(func.module)
375
+ #
376
+ # lp.PassManagerBuilder.new().populate(fpm)
377
+ #
378
+ # fpm.initialize()
379
+ # fpm.run(func)
380
+ # fpm.finalize()
381
+
382
+ def get_ufunc_info(self, ufunc_key):
383
+ return ufuncs.get_ufunc_info(ufunc_key)
384
+
385
+ def _compile_subroutine_no_cache(
386
+ self, builder, impl, sig, locals=None, flags=None
387
+ ):
388
+ # Overrides numba.core.base.BaseContext._compile_subroutine_no_cache().
389
+ # Modified to use flags from the context stack if they are not provided
390
+ # (pending a fix in Numba upstream).
391
+
392
+ if locals is None:
393
+ locals = {}
394
+
395
+ with global_compiler_lock:
396
+ codegen = self.codegen()
397
+ library = codegen.create_library(impl.__name__)
398
+ if flags is None:
399
+ cstk = targetconfig.ConfigStack()
400
+ if cstk:
401
+ flags = cstk.top().copy()
402
+ else:
403
+ msg = "There should always be a context stack; none found."
404
+ warnings.warn(msg, NumbaWarning)
405
+ flags = CUDAFlags()
406
+
407
+ flags.no_compile = True
408
+ flags.no_cpython_wrapper = True
409
+ flags.no_cfunc_wrapper = True
410
+
411
+ cres = compiler.compile_internal(
412
+ self.typing_context,
413
+ self,
414
+ library,
415
+ impl,
416
+ sig.args,
417
+ sig.return_type,
418
+ flags,
419
+ locals=locals,
420
+ )
421
+
422
+ # Allow inlining the function inside callers
423
+ self.active_code_library.add_linking_library(cres.library)
424
+ return cres
425
+
426
+
427
+ class CUDACallConv(MinimalCallConv):
428
+ def decorate_function(self, fn, args, fe_argtypes, noalias=False):
429
+ """
430
+ Set names and attributes of function arguments.
431
+ """
432
+ assert not noalias
433
+ arginfo = self._get_arg_packer(fe_argtypes)
434
+ # Do not prefix "arg." on argument name, so that nvvm compiler
435
+ # can track debug info of argument more accurately
436
+ arginfo.assign_names(self.get_arguments(fn), args)
437
+ fn.args[0].name = ".ret"
438
+
439
+
440
+ class CUDACABICallConv(BaseCallConv):
441
+ """
442
+ Calling convention aimed at matching the CUDA C/C++ ABI. The implemented
443
+ function signature is:
444
+
445
+ <Python return type> (<Python arguments>)
446
+
447
+ Exceptions are unsupported in this convention.
448
+ """
449
+
450
+ def _make_call_helper(self, builder):
451
+ # Call helpers are used to help report exceptions back to Python, so
452
+ # none is required here.
453
+ return None
454
+
455
+ def return_value(self, builder, retval):
456
+ return builder.ret(retval)
457
+
458
+ def return_user_exc(
459
+ self, builder, exc, exc_args=None, loc=None, func_name=None
460
+ ):
461
+ msg = "Python exceptions are unsupported in the CUDA C/C++ ABI"
462
+ raise NotImplementedError(msg)
463
+
464
+ def return_status_propagate(self, builder, status):
465
+ msg = "Return status is unsupported in the CUDA C/C++ ABI"
466
+ raise NotImplementedError(msg)
467
+
468
+ def get_function_type(self, restype, argtypes):
469
+ """
470
+ Get the LLVM IR Function type for *restype* and *argtypes*.
471
+ """
472
+ arginfo = self._get_arg_packer(argtypes)
473
+ argtypes = list(arginfo.argument_types)
474
+ fnty = ir.FunctionType(self.get_return_type(restype), argtypes)
475
+ return fnty
476
+
477
+ def decorate_function(self, fn, args, fe_argtypes, noalias=False):
478
+ """
479
+ Set names and attributes of function arguments.
480
+ """
481
+ assert not noalias
482
+ arginfo = self._get_arg_packer(fe_argtypes)
483
+ arginfo.assign_names(self.get_arguments(fn), ["arg." + a for a in args])
484
+
485
+ def get_arguments(self, func):
486
+ """
487
+ Get the Python-level arguments of LLVM *func*.
488
+ """
489
+ return func.args
490
+
491
+ def call_function(self, builder, callee, resty, argtys, args):
492
+ """
493
+ Call the Numba-compiled *callee*.
494
+ """
495
+ arginfo = self._get_arg_packer(argtys)
496
+ realargs = arginfo.as_arguments(builder, args)
497
+ code = builder.call(callee, realargs)
498
+ # No status required as we don't support exceptions or a distinct None
499
+ # value in a C ABI.
500
+ status = None
501
+ out = self.context.get_returned_value(builder, resty, code)
502
+ return status, out
503
+
504
+ def get_return_type(self, ty):
505
+ return self.context.data_model_manager[ty].get_return_type()