numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (353) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
  9. numba_cuda/numba/cuda/api.py +9 -1
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +288 -2
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +5 -2
  15. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  16. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  17. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  18. numba_cuda/numba/cuda/codegen.py +4 -1
  19. numba_cuda/numba/cuda/compiler.py +376 -30
  20. numba_cuda/numba/cuda/core/analysis.py +319 -0
  21. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  22. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  23. numba_cuda/numba/cuda/core/base.py +1289 -0
  24. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  25. numba_cuda/numba/cuda/core/caching.py +5 -2
  26. numba_cuda/numba/cuda/core/callconv.py +3 -0
  27. numba_cuda/numba/cuda/core/codegen.py +3 -0
  28. numba_cuda/numba/cuda/core/compiler.py +9 -14
  29. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  30. numba_cuda/numba/cuda/core/config.py +747 -0
  31. numba_cuda/numba/cuda/core/consts.py +124 -0
  32. numba_cuda/numba/cuda/core/cpu.py +370 -0
  33. numba_cuda/numba/cuda/core/environment.py +68 -0
  34. numba_cuda/numba/cuda/core/event.py +511 -0
  35. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  36. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  37. numba_cuda/numba/cuda/core/interpreter.py +52 -27
  38. numba_cuda/numba/cuda/core/ir_utils.py +17 -29
  39. numba_cuda/numba/cuda/core/options.py +262 -0
  40. numba_cuda/numba/cuda/core/postproc.py +249 -0
  41. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  42. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  43. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  44. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  45. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  46. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  47. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  48. numba_cuda/numba/cuda/core/sigutils.py +3 -0
  49. numba_cuda/numba/cuda/core/ssa.py +496 -0
  50. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  51. numba_cuda/numba/cuda/core/tracing.py +231 -0
  52. numba_cuda/numba/cuda/core/transforms.py +952 -0
  53. numba_cuda/numba/cuda/core/typed_passes.py +741 -7
  54. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  55. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  56. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  57. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  58. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  59. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  60. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  61. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  62. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  63. numba_cuda/numba/cuda/cuda_paths.py +425 -246
  64. numba_cuda/numba/cuda/cudadecl.py +4 -1
  65. numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
  66. numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
  67. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  68. numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
  69. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  70. numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
  71. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  72. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  73. numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
  74. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  75. numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
  76. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  77. numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
  78. numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
  79. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  80. numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
  81. numba_cuda/numba/cuda/cudaimpl.py +8 -1
  82. numba_cuda/numba/cuda/cudamath.py +3 -0
  83. numba_cuda/numba/cuda/debuginfo.py +88 -2
  84. numba_cuda/numba/cuda/decorators.py +6 -3
  85. numba_cuda/numba/cuda/descriptor.py +6 -4
  86. numba_cuda/numba/cuda/device_init.py +3 -0
  87. numba_cuda/numba/cuda/deviceufunc.py +69 -2
  88. numba_cuda/numba/cuda/dispatcher.py +21 -39
  89. numba_cuda/numba/cuda/errors.py +10 -0
  90. numba_cuda/numba/cuda/extending.py +3 -0
  91. numba_cuda/numba/cuda/flags.py +143 -1
  92. numba_cuda/numba/cuda/fp16.py +3 -2
  93. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  94. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  95. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  96. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  97. numba_cuda/numba/cuda/initialize.py +4 -0
  98. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  99. numba_cuda/numba/cuda/intrinsics.py +3 -0
  100. numba_cuda/numba/cuda/itanium_mangler.py +3 -0
  101. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  102. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  103. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  104. numba_cuda/numba/cuda/libdevice.py +4 -0
  105. numba_cuda/numba/cuda/libdevicedecl.py +3 -0
  106. numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
  107. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  108. numba_cuda/numba/cuda/locks.py +3 -0
  109. numba_cuda/numba/cuda/lowering.py +59 -159
  110. numba_cuda/numba/cuda/mathimpl.py +5 -1
  111. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  112. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  113. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  114. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  115. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  116. numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
  117. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  118. numba_cuda/numba/cuda/models.py +12 -1
  119. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  120. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  121. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  122. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  123. numba_cuda/numba/cuda/nvvmutils.py +4 -1
  124. numba_cuda/numba/cuda/printimpl.py +15 -1
  125. numba_cuda/numba/cuda/random.py +4 -1
  126. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  127. numba_cuda/numba/cuda/serialize.py +4 -1
  128. numba_cuda/numba/cuda/simulator/__init__.py +4 -1
  129. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  130. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  131. numba_cuda/numba/cuda/simulator/api.py +4 -1
  132. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  133. numba_cuda/numba/cuda/simulator/compiler.py +7 -0
  134. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  135. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
  136. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  137. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
  138. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  139. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  140. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  141. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  142. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  143. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  144. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  145. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  146. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  147. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  148. numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
  149. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  150. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
  151. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  152. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  153. numba_cuda/numba/cuda/simulator_init.py +3 -0
  154. numba_cuda/numba/cuda/stubs.py +3 -0
  155. numba_cuda/numba/cuda/target.py +38 -17
  156. numba_cuda/numba/cuda/testing.py +7 -19
  157. numba_cuda/numba/cuda/tests/__init__.py +4 -1
  158. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  159. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  160. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  161. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
  162. numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
  163. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  164. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  166. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  167. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
  169. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
  171. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
  173. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
  175. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  177. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  178. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  179. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
  182. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
  184. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
  185. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  197. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  198. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
  199. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
  202. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  203. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
  206. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
  207. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
  208. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
  211. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
  214. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
  215. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
  218. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
  219. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
  223. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  224. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
  225. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
  226. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
  228. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  230. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
  231. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  240. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
  242. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  244. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
  245. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
  246. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  247. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  248. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
  249. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  250. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
  251. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  252. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  253. numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
  254. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
  255. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  256. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  257. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  258. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  259. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  260. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  261. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  262. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  263. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  264. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  265. numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
  266. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  267. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  268. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
  269. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  270. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
  271. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  272. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
  273. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  274. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
  275. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  276. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  277. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  278. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
  279. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  280. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  281. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
  282. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
  283. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
  284. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  285. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  286. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
  287. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  288. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  289. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
  290. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
  291. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  292. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  293. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  294. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  295. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  296. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  297. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  298. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  299. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  300. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  301. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  302. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  303. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  304. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  305. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  306. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
  307. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
  308. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
  309. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
  310. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
  311. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
  312. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
  313. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  314. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
  315. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
  316. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
  317. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
  318. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  319. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  320. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  321. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  322. numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
  323. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
  324. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  325. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  326. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
  327. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
  328. numba_cuda/numba/cuda/tests/support.py +58 -15
  329. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
  330. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
  331. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  332. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  333. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  334. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  335. numba_cuda/numba/cuda/types.py +59 -0
  336. numba_cuda/numba/cuda/typing/__init__.py +12 -1
  337. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  338. numba_cuda/numba/cuda/typing/context.py +751 -0
  339. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  340. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  341. numba_cuda/numba/cuda/typing/templates.py +10 -14
  342. numba_cuda/numba/cuda/ufuncs.py +6 -3
  343. numba_cuda/numba/cuda/utils.py +9 -112
  344. numba_cuda/numba/cuda/vector_types.py +3 -0
  345. numba_cuda/numba/cuda/vectorizers.py +3 -0
  346. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
  347. numba_cuda-0.20.0.dist-info/RECORD +357 -0
  348. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
  349. numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
  350. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
  351. numba_cuda-0.19.0.dist-info/RECORD +0 -301
  352. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  353. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import builtins
2
5
  import collections
3
6
  import dis
@@ -5,28 +8,29 @@ import operator
5
8
  import logging
6
9
  import textwrap
7
10
 
8
- from numba.core import errors, ir, config
11
+ from numba.core import errors, ir
12
+ from numba.cuda.core import config
13
+ from numba.cuda.errors import UnsupportedBytecodeError
9
14
  from numba.core.errors import (
10
15
  NotDefinedError,
11
- UnsupportedBytecodeError,
12
16
  error_extras,
13
17
  )
14
18
  from numba.cuda.core import ir_utils
15
- from numba.core.utils import (
19
+ from numba.cuda.utils import (
16
20
  PYVERSION,
17
21
  BINOPS_TO_OPERATORS,
18
22
  INPLACE_BINOPS_TO_OPERATORS,
19
- _lazy_pformat,
20
23
  )
24
+ from numba.cuda.utils import _lazy_pformat
21
25
  from numba.core.byteflow import Flow, AdaptDFA, AdaptCFA, BlockKind
22
- from numba.core.unsafe import eh
26
+ from numba.cuda.core.unsafe import eh
23
27
  from numba.cpython.unsafe.tuple import unpack_single_tuple
24
28
 
25
29
 
26
30
  if PYVERSION in ((3, 12), (3, 13)):
27
31
  # Operands for CALL_INTRINSIC_1
28
32
  from numba.core.byteflow import CALL_INTRINSIC_1_Operand as ci1op
29
- elif PYVERSION in ((3, 10), (3, 11)):
33
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
30
34
  pass
31
35
  else:
32
36
  raise NotImplementedError(PYVERSION)
@@ -1392,7 +1396,7 @@ class Interpreter(object):
1392
1396
  if entry.start < self.last_active_offset
1393
1397
  ]
1394
1398
  )
1395
- elif PYVERSION in ((3, 10), (3, 11)):
1399
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
1396
1400
  pass
1397
1401
  else:
1398
1402
  raise NotImplementedError(PYVERSION)
@@ -1407,7 +1411,10 @@ class Interpreter(object):
1407
1411
  if PYVERSION in ((3, 11), (3, 12), (3, 13)):
1408
1412
  # Insert end of try markers
1409
1413
  self._end_try_blocks()
1410
- elif PYVERSION in ((3, 10),):
1414
+ elif PYVERSION in (
1415
+ (3, 9),
1416
+ (3, 10),
1417
+ ):
1411
1418
  pass
1412
1419
  else:
1413
1420
  raise NotImplementedError(PYVERSION)
@@ -1431,7 +1438,7 @@ class Interpreter(object):
1431
1438
  peepholes = []
1432
1439
  if PYVERSION in ((3, 11), (3, 12), (3, 13)):
1433
1440
  peepholes.append(peep_hole_split_at_pop_block)
1434
- if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
1441
+ if PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12), (3, 13)):
1435
1442
  peepholes.append(peep_hole_list_to_tuple)
1436
1443
  peepholes.append(peep_hole_delete_with_exit)
1437
1444
  if PYVERSION in ((3, 10), (3, 11), (3, 12), (3, 13)):
@@ -1589,7 +1596,10 @@ class Interpreter(object):
1589
1596
  if newtryblk is not None:
1590
1597
  if newtryblk is not tryblk:
1591
1598
  self._insert_try_block_begin()
1592
- elif PYVERSION in ((3, 10),):
1599
+ elif PYVERSION in (
1600
+ (3, 9),
1601
+ (3, 10),
1602
+ ):
1593
1603
  while self.syntax_blocks:
1594
1604
  if offset >= self.syntax_blocks[-1].exit:
1595
1605
  self.syntax_blocks.pop()
@@ -1826,7 +1836,10 @@ class Interpreter(object):
1826
1836
  if inst.offset >= top.exit:
1827
1837
  self.current_block.append(ir.PopBlock(loc=self.loc))
1828
1838
  self.syntax_blocks.pop()
1829
- elif PYVERSION in ((3, 10),):
1839
+ elif PYVERSION in (
1840
+ (3, 9),
1841
+ (3, 10),
1842
+ ):
1830
1843
  pass
1831
1844
  else:
1832
1845
  raise NotImplementedError(PYVERSION)
@@ -2023,7 +2036,7 @@ class Interpreter(object):
2023
2036
  target = self.get(container)
2024
2037
  expr = ir.Expr.getitem(target, index=index, loc=self.loc)
2025
2038
  self.store(expr, res)
2026
- elif PYVERSION in ((3, 10), (3, 11)):
2039
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
2027
2040
  pass
2028
2041
  else:
2029
2042
  raise NotImplementedError(PYVERSION)
@@ -2049,7 +2062,7 @@ class Interpreter(object):
2049
2062
  target=target, index=index, value=value, loc=self.loc
2050
2063
  )
2051
2064
  self.current_block.append(stmt)
2052
- elif PYVERSION in ((3, 10), (3, 11)):
2065
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
2053
2066
  pass
2054
2067
  else:
2055
2068
  raise NotImplementedError(PYVERSION)
@@ -2342,7 +2355,7 @@ class Interpreter(object):
2342
2355
  dstname = self.code_locals[oparg2]
2343
2356
  self.store(value=self.get(value2), name=dstname)
2344
2357
 
2345
- elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
2358
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
2346
2359
  pass
2347
2360
  else:
2348
2361
  raise NotImplementedError(PYVERSION)
@@ -2360,7 +2373,7 @@ class Interpreter(object):
2360
2373
  undef = ir.Expr.undef(loc=self.loc)
2361
2374
  self.store(undef, name=res)
2362
2375
 
2363
- elif PYVERSION in ((3, 10), (3, 11)):
2376
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
2364
2377
  pass
2365
2378
  else:
2366
2379
  raise NotImplementedError(PYVERSION)
@@ -2400,7 +2413,7 @@ class Interpreter(object):
2400
2413
  item = self.get(item)
2401
2414
  if PYVERSION in ((3, 12), (3, 13)):
2402
2415
  attr = self.code_names[inst.arg >> 1]
2403
- elif PYVERSION in ((3, 10), (3, 11)):
2416
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
2404
2417
  attr = self.code_names[inst.arg]
2405
2418
  else:
2406
2419
  raise NotImplementedError(PYVERSION)
@@ -2436,7 +2449,10 @@ class Interpreter(object):
2436
2449
  value = self.get_global_value(name)
2437
2450
  gl = ir.Global(name, value, loc=self.loc)
2438
2451
  self.store(gl, res)
2439
- elif PYVERSION in ((3, 10),):
2452
+ elif PYVERSION in (
2453
+ (3, 9),
2454
+ (3, 10),
2455
+ ):
2440
2456
 
2441
2457
  def op_LOAD_GLOBAL(self, inst, res):
2442
2458
  name = self.code_names[inst.arg]
@@ -2464,7 +2480,10 @@ class Interpreter(object):
2464
2480
  value = self.get_closure_value(idx)
2465
2481
  gl = ir.FreeVar(idx, name, value, loc=self.loc)
2466
2482
  self.store(gl, res)
2467
- elif PYVERSION in ((3, 10),):
2483
+ elif PYVERSION in (
2484
+ (3, 9),
2485
+ (3, 10),
2486
+ ):
2468
2487
 
2469
2488
  def op_LOAD_DEREF(self, inst, res):
2470
2489
  n_cellvars = len(self.code_cellvars)
@@ -2491,7 +2510,10 @@ class Interpreter(object):
2491
2510
  name = self.func_id.func.__code__._varname_from_oparg(inst.arg)
2492
2511
  value = self.get(value)
2493
2512
  self.store(value=value, name=name)
2494
- elif PYVERSION in ((3, 10),):
2513
+ elif PYVERSION in (
2514
+ (3, 9),
2515
+ (3, 10),
2516
+ ):
2495
2517
 
2496
2518
  def op_STORE_DEREF(self, inst, value):
2497
2519
  n_cellvars = len(self.code_cellvars)
@@ -2543,7 +2565,7 @@ class Interpreter(object):
2543
2565
  if ex.target == end
2544
2566
  ]
2545
2567
  )
2546
- elif PYVERSION in ((3, 10), (3, 11)):
2568
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
2547
2569
  pass
2548
2570
  else:
2549
2571
  raise NotImplementedError(PYVERSION)
@@ -3104,7 +3126,7 @@ class Interpreter(object):
3104
3126
  self.store(ir.Expr.cast(self.get(retval), loc=self.loc), castval)
3105
3127
  ret = ir.Return(self.get(castval), loc=self.loc)
3106
3128
  self.current_block.append(ret)
3107
- elif PYVERSION in ((3, 10), (3, 11)):
3129
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
3108
3130
  pass
3109
3131
  else:
3110
3132
  raise NotImplementedError(PYVERSION)
@@ -3114,7 +3136,7 @@ class Interpreter(object):
3114
3136
  def op_TO_BOOL(self, inst, val, res):
3115
3137
  self.store(self.get(val), res) # TODO: just a lazy hack
3116
3138
 
3117
- elif PYVERSION in ((3, 10), (3, 11), (3, 12)):
3139
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11), (3, 12)):
3118
3140
  pass
3119
3141
  else:
3120
3142
  raise NotImplementedError(PYVERSION)
@@ -3125,7 +3147,7 @@ class Interpreter(object):
3125
3147
  # TODO: fifth lowest bit now indicates a forced version to bool.
3126
3148
  elif PYVERSION in ((3, 12),):
3127
3149
  op = dis.cmp_op[inst.arg >> 4]
3128
- elif PYVERSION in ((3, 10), (3, 11)):
3150
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
3129
3151
  op = dis.cmp_op[inst.arg]
3130
3152
  else:
3131
3153
  raise NotImplementedError(PYVERSION)
@@ -3256,7 +3278,7 @@ class Interpreter(object):
3256
3278
 
3257
3279
  def op_POP_JUMP_IF_NOT_NONE(self, inst, pred):
3258
3280
  self._jump_if_none(inst, pred, False)
3259
- elif PYVERSION in ((3, 10), (3, 11)):
3281
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
3260
3282
  pass
3261
3283
  else:
3262
3284
  raise NotImplementedError(PYVERSION)
@@ -3426,7 +3448,10 @@ class Interpreter(object):
3426
3448
  assert 0, "unreachable"
3427
3449
  self.store(gl, res)
3428
3450
 
3429
- elif PYVERSION in ((3, 10),):
3451
+ elif PYVERSION in (
3452
+ (3, 9),
3453
+ (3, 10),
3454
+ ):
3430
3455
 
3431
3456
  def op_LOAD_CLOSURE(self, inst, res):
3432
3457
  n_cellvars = len(self.code_cellvars)
@@ -3576,7 +3601,7 @@ class Interpreter(object):
3576
3601
  return
3577
3602
  else:
3578
3603
  raise NotImplementedError(operand)
3579
- elif PYVERSION in ((3, 10), (3, 11)):
3604
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
3580
3605
  pass
3581
3606
  else:
3582
3607
  raise NotImplementedError(PYVERSION)
@@ -3586,7 +3611,7 @@ if PYVERSION in ((3, 12), (3, 13)):
3586
3611
 
3587
3612
  class INTRINSIC_STOPITERATION_ERROR(AssertionError):
3588
3613
  pass
3589
- elif PYVERSION in ((3, 10), (3, 11)):
3614
+ elif PYVERSION in ((3, 9), (3, 10), (3, 11)):
3590
3615
  pass
3591
3616
  else:
3592
3617
  raise NotImplementedError(PYVERSION)
@@ -1,7 +1,6 @@
1
- #
2
- # Copyright (c) 2017 Intel Corporation
1
+ # SPDX-FileCopyrightText: Copyright (c) 2017 Intel Corporation
2
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
3
  # SPDX-License-Identifier: BSD-2-Clause
4
- #
5
4
 
6
5
  import numpy
7
6
  import math
@@ -12,7 +11,9 @@ import warnings
12
11
 
13
12
  import numba
14
13
  from numba.core.extending import _Intrinsic
15
- from numba.core import types, typing, ir, analysis, postproc, rewrites, config
14
+ from numba.core import types, ir, analysis
15
+ from numba.cuda import typing
16
+ from numba.cuda.core import postproc, rewrites, config
16
17
  from numba.core.typing.templates import signature
17
18
  from numba.core.analysis import (
18
19
  compute_live_map,
@@ -249,12 +250,7 @@ def mk_range_block(typemap, start, stop, step, calltypes, scope, loc):
249
250
  range_call_assign = ir.Assign(range_call, range_call_var, loc)
250
251
  # iter_var = getiter(range_call_var)
251
252
  iter_call = ir.Expr.getiter(range_call_var, loc)
252
- if config.USE_LEGACY_TYPE_SYSTEM:
253
- calltype_sig = signature(
254
- types.range_iter64_type, types.range_state64_type
255
- )
256
- else:
257
- calltype_sig = signature(types.range_iter_type, types.range_state_type)
253
+ calltype_sig = signature(types.range_iter64_type, types.range_state64_type)
258
254
  calltypes[iter_call] = calltype_sig
259
255
  iter_var = ir.Var(scope, mk_unique_var("$iter_var"), loc)
260
256
  typemap[iter_var.name] = types.iterators.RangeIteratorType(types.intp)
@@ -333,10 +329,7 @@ def mk_loop_header(typemap, phi_var, calltypes, scope, loc):
333
329
  types.intp, types.boolean
334
330
  )
335
331
  iternext_call = ir.Expr.iternext(phi_var, loc)
336
- if config.USE_LEGACY_TYPE_SYSTEM:
337
- range_iter_type = types.range_iter64_type
338
- else:
339
- range_iter_type = types.range_iter_type
332
+ range_iter_type = types.range_iter64_type
340
333
  calltypes[iternext_call] = signature(
341
334
  types.containers.Pair(types.intp, types.boolean), range_iter_type
342
335
  )
@@ -813,8 +806,6 @@ def has_no_side_effect(rhs, lives, call_table):
813
806
  """Returns True if this expression has no side effects that
814
807
  would prevent re-ordering.
815
808
  """
816
- from numba.parfors import array_analysis, parfor
817
- from numba.misc.special import prange
818
809
 
819
810
  if isinstance(rhs, ir.Expr) and rhs.op == "call":
820
811
  func_name = rhs.func.name
@@ -827,11 +818,7 @@ def has_no_side_effect(rhs, lives, call_table):
827
818
  or call_list == ["stencil", numba]
828
819
  or call_list == ["log", numpy]
829
820
  or call_list == ["dtype", numpy]
830
- or call_list == [array_analysis.wrap_index]
831
- or call_list == [prange]
832
- or call_list == ["prange", numba]
833
821
  or call_list == ["pndindex", numba]
834
- or call_list == [parfor.internal_prange]
835
822
  or call_list == ["ceil", math]
836
823
  or call_list == [max]
837
824
  or call_list == [int]
@@ -1894,7 +1881,7 @@ def compile_to_numba_ir(
1894
1881
  if typingctx and other typing inputs are available and update typemap and
1895
1882
  calltypes.
1896
1883
  """
1897
- from numba.core import typed_passes
1884
+ from numba.cuda.core import typed_passes
1898
1885
 
1899
1886
  # mk_func can be actual function or make_function node, or a njit function
1900
1887
  if hasattr(mk_func, "code"):
@@ -1976,7 +1963,8 @@ def get_ir_of_code(glbls, fcode):
1976
1963
  fcode, func_env, func_arg, func_clo, glbls
1977
1964
  )
1978
1965
 
1979
- from numba.core import compiler
1966
+ from numba.cuda import compiler
1967
+ from numba.cuda.core.compiler import StateDict
1980
1968
 
1981
1969
  ir = compiler.run_frontend(f)
1982
1970
 
@@ -1985,7 +1973,7 @@ def get_ir_of_code(glbls, fcode):
1985
1973
  # for example, Raise nodes need to become StaticRaise before type inference
1986
1974
  class DummyPipeline(object):
1987
1975
  def __init__(self, f_ir):
1988
- self.state = compiler.StateDict()
1976
+ self.state = StateDict()
1989
1977
  self.state.typingctx = None
1990
1978
  self.state.targetctx = None
1991
1979
  self.state.args = None
@@ -1998,10 +1986,10 @@ def get_ir_of_code(glbls, fcode):
1998
1986
  rewrites.rewrite_registry.apply("before-inference", state)
1999
1987
  # call inline pass to handle cases like stencils and comprehensions
2000
1988
  swapped = {} # TODO: get this from diagnostics store
2001
- import numba.core.inline_closurecall
1989
+ from numba.cuda.core.inline_closurecall import InlineClosureCallPass
2002
1990
 
2003
- inline_pass = numba.core.inline_closurecall.InlineClosureCallPass(
2004
- ir, numba.core.cpu.ParallelOptions(False), swapped
1991
+ inline_pass = InlineClosureCallPass(
1992
+ ir, numba.cuda.core.options.ParallelOptions(False), swapped
2005
1993
  )
2006
1994
  inline_pass.run()
2007
1995
 
@@ -2014,8 +2002,8 @@ def get_ir_of_code(glbls, fcode):
2014
2002
  # added to create valid IR.
2015
2003
 
2016
2004
  # rebuild IR in SSA form
2017
- from numba.core.untyped_passes import ReconstructSSA
2018
- from numba.core.typed_passes import PreLowerStripPhis
2005
+ from numba.cuda.core.untyped_passes import ReconstructSSA
2006
+ from numba.cuda.core.typed_passes import PreLowerStripPhis
2019
2007
 
2020
2008
  reconstruct_ssa = ReconstructSSA()
2021
2009
  phistrip = PreLowerStripPhis()
@@ -2495,7 +2483,7 @@ def legalize_single_scope(blocks):
2495
2483
  return len({blk.scope for blk in blocks.values()}) == 1
2496
2484
 
2497
2485
 
2498
- def check_and_legalize_ir(func_ir, flags: "numba.core.compiler.Flags"):
2486
+ def check_and_legalize_ir(func_ir, flags: "numba.core.flags.Flags"):
2499
2487
  """
2500
2488
  This checks that the IR presented is legal
2501
2489
  """
@@ -0,0 +1,262 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Defines CUDA Options for use in the CUDA target
6
+ """
7
+
8
+ from abc import ABCMeta, abstractmethod
9
+
10
+
11
+ class AbstractOptionValue(metaclass=ABCMeta):
12
+ """Abstract base class for custom option values."""
13
+
14
+ @abstractmethod
15
+ def encode(self) -> str:
16
+ """Returns an encoding of the values"""
17
+ ...
18
+
19
+ def __repr__(self) -> str:
20
+ return f"{self.__class__.__name__}({self.encode()})"
21
+
22
+
23
+ class FastMathOptions(AbstractOptionValue):
24
+ """
25
+ Options for controlling fast math optimization.
26
+ """
27
+
28
+ def __init__(self, value):
29
+ # https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags
30
+ valid_flags = {
31
+ "fast",
32
+ "nnan",
33
+ "ninf",
34
+ "nsz",
35
+ "arcp",
36
+ "contract",
37
+ "afn",
38
+ "reassoc",
39
+ }
40
+
41
+ if isinstance(value, FastMathOptions):
42
+ self.flags = value.flags.copy()
43
+ elif value is True:
44
+ self.flags = {"fast"}
45
+ elif value is False:
46
+ self.flags = set()
47
+ elif isinstance(value, set):
48
+ invalid = value - valid_flags
49
+ if invalid:
50
+ raise ValueError("Unrecognized fastmath flags: %s" % invalid)
51
+ self.flags = value
52
+ elif isinstance(value, dict):
53
+ invalid = set(value.keys()) - valid_flags
54
+ if invalid:
55
+ raise ValueError("Unrecognized fastmath flags: %s" % invalid)
56
+ self.flags = {v for v, enable in value.items() if enable}
57
+ else:
58
+ msg = "Expected fastmath option(s) to be either a bool, dict or set"
59
+ raise ValueError(msg)
60
+
61
+ def __bool__(self):
62
+ return bool(self.flags)
63
+
64
+ __nonzero__ = __bool__
65
+
66
+ def encode(self) -> str:
67
+ return str(self.flags)
68
+
69
+ def __eq__(self, other):
70
+ if type(other) is type(self):
71
+ return self.flags == other.flags
72
+ return NotImplemented
73
+
74
+
75
+ class ParallelOptions(AbstractOptionValue):
76
+ """
77
+ Options for controlling auto parallelization.
78
+ """
79
+
80
+ __slots__ = (
81
+ "enabled",
82
+ "comprehension",
83
+ "reduction",
84
+ "inplace_binop",
85
+ "setitem",
86
+ "numpy",
87
+ "stencil",
88
+ "fusion",
89
+ "prange",
90
+ )
91
+
92
+ def __init__(self, value):
93
+ if isinstance(value, bool):
94
+ self.enabled = value
95
+ self.comprehension = value
96
+ self.reduction = value
97
+ self.inplace_binop = value
98
+ self.setitem = value
99
+ self.numpy = value
100
+ self.stencil = value
101
+ self.fusion = value
102
+ self.prange = value
103
+ elif isinstance(value, dict):
104
+ self.enabled = True
105
+ self.comprehension = value.pop("comprehension", True)
106
+ self.reduction = value.pop("reduction", True)
107
+ self.inplace_binop = value.pop("inplace_binop", True)
108
+ self.setitem = value.pop("setitem", True)
109
+ self.numpy = value.pop("numpy", True)
110
+ self.stencil = value.pop("stencil", True)
111
+ self.fusion = value.pop("fusion", True)
112
+ self.prange = value.pop("prange", True)
113
+ if value:
114
+ msg = "Unrecognized parallel options: %s" % value.keys()
115
+ raise NameError(msg)
116
+ elif isinstance(value, ParallelOptions):
117
+ self.enabled = value.enabled
118
+ self.comprehension = value.comprehension
119
+ self.reduction = value.reduction
120
+ self.inplace_binop = value.inplace_binop
121
+ self.setitem = value.setitem
122
+ self.numpy = value.numpy
123
+ self.stencil = value.stencil
124
+ self.fusion = value.fusion
125
+ self.prange = value.prange
126
+ else:
127
+ msg = "Expect parallel option to be either a bool or a dict"
128
+ raise ValueError(msg)
129
+
130
+ def _get_values(self):
131
+ """Get values as dictionary."""
132
+ return {k: getattr(self, k) for k in self.__slots__}
133
+
134
+ def __eq__(self, other):
135
+ if type(other) is type(self):
136
+ return self._get_values() == other._get_values()
137
+ return NotImplemented
138
+
139
+ def encode(self) -> str:
140
+ return ", ".join(f"{k}={v}" for k, v in self._get_values().items())
141
+
142
+
143
+ class InlineOptions(AbstractOptionValue):
144
+ """
145
+ Options for controlling inlining
146
+ """
147
+
148
+ def __init__(self, value):
149
+ ok = False
150
+ if isinstance(value, str):
151
+ if value in ("always", "never"):
152
+ ok = True
153
+ else:
154
+ ok = hasattr(value, "__call__")
155
+
156
+ if ok:
157
+ self._inline = value
158
+ else:
159
+ msg = (
160
+ "kwarg 'inline' must be one of the strings 'always' or "
161
+ "'never', or it can be a callable that returns True/False. "
162
+ "Found value %s" % value
163
+ )
164
+ raise ValueError(msg)
165
+
166
+ @property
167
+ def is_never_inline(self):
168
+ """
169
+ True if never inline
170
+ """
171
+ return self._inline == "never"
172
+
173
+ @property
174
+ def is_always_inline(self):
175
+ """
176
+ True if always inline
177
+ """
178
+ return self._inline == "always"
179
+
180
+ @property
181
+ def has_cost_model(self):
182
+ """
183
+ True if a cost model is provided
184
+ """
185
+ return not (self.is_always_inline or self.is_never_inline)
186
+
187
+ @property
188
+ def value(self):
189
+ """
190
+ The raw value
191
+ """
192
+ return self._inline
193
+
194
+ def __eq__(self, other):
195
+ if type(other) is type(self):
196
+ return self.value == other.value
197
+ return NotImplemented
198
+
199
+ def encode(self) -> str:
200
+ return repr(self._inline)
201
+
202
+
203
+ class TargetOptions:
204
+ """Target options maps user options from decorators to the
205
+ ``numba.core.compiler.Flags`` used by lowering and target context.
206
+ """
207
+
208
+ class Mapping:
209
+ def __init__(self, flag_name, apply=lambda x: x):
210
+ self.flag_name = flag_name
211
+ self.apply = apply
212
+
213
+ def finalize(self, flags, options):
214
+ """Subclasses can override this method to make target specific
215
+ customizations of default flags.
216
+
217
+ Parameters
218
+ ----------
219
+ flags : Flags
220
+ options : dict
221
+ """
222
+ pass
223
+
224
+ @classmethod
225
+ def parse_as_flags(cls, flags, options):
226
+ """Parse target options defined in ``options`` and set ``flags``
227
+ accordingly.
228
+
229
+ Parameters
230
+ ----------
231
+ flags : Flags
232
+ options : dict
233
+ """
234
+ opt = cls()
235
+ opt._apply(flags, options)
236
+ opt.finalize(flags, options)
237
+ return flags
238
+
239
+ def _apply(self, flags, options):
240
+ # Find all Mapping instances in the class
241
+ mappings = {}
242
+ cls = type(self)
243
+ for k in dir(cls):
244
+ v = getattr(cls, k)
245
+ if isinstance(v, cls.Mapping):
246
+ mappings[k] = v
247
+
248
+ used = set()
249
+ for k, mapping in mappings.items():
250
+ if k in options:
251
+ v = mapping.apply(options[k])
252
+ setattr(flags, mapping.flag_name, v)
253
+ used.add(k)
254
+
255
+ unused = set(options) - used
256
+ if unused:
257
+ # Unread options?
258
+ m = (
259
+ f"Unrecognized options: {unused}. "
260
+ f"Known options are {mappings.keys()}"
261
+ )
262
+ raise KeyError(m)