numba-cuda 0.19.0__py3-none-any.whl → 0.20.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (353) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +12708 -1469
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +2656 -8769
  9. numba_cuda/numba/cuda/api.py +9 -1
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +288 -2
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +5 -2
  15. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  16. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  17. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  18. numba_cuda/numba/cuda/codegen.py +4 -1
  19. numba_cuda/numba/cuda/compiler.py +376 -30
  20. numba_cuda/numba/cuda/core/analysis.py +319 -0
  21. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  22. numba_cuda/numba/cuda/core/annotations/type_annotations.py +304 -0
  23. numba_cuda/numba/cuda/core/base.py +1289 -0
  24. numba_cuda/numba/cuda/core/bytecode.py +727 -0
  25. numba_cuda/numba/cuda/core/caching.py +5 -2
  26. numba_cuda/numba/cuda/core/callconv.py +3 -0
  27. numba_cuda/numba/cuda/core/codegen.py +3 -0
  28. numba_cuda/numba/cuda/core/compiler.py +9 -14
  29. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  30. numba_cuda/numba/cuda/core/config.py +747 -0
  31. numba_cuda/numba/cuda/core/consts.py +124 -0
  32. numba_cuda/numba/cuda/core/cpu.py +370 -0
  33. numba_cuda/numba/cuda/core/environment.py +68 -0
  34. numba_cuda/numba/cuda/core/event.py +511 -0
  35. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  36. numba_cuda/numba/cuda/core/inline_closurecall.py +1889 -0
  37. numba_cuda/numba/cuda/core/interpreter.py +52 -27
  38. numba_cuda/numba/cuda/core/ir_utils.py +17 -29
  39. numba_cuda/numba/cuda/core/options.py +262 -0
  40. numba_cuda/numba/cuda/core/postproc.py +249 -0
  41. numba_cuda/numba/cuda/core/pythonapi.py +1868 -0
  42. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  43. numba_cuda/numba/cuda/core/rewrites/ir_print.py +90 -0
  44. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  45. numba_cuda/numba/cuda/core/rewrites/static_binop.py +40 -0
  46. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +187 -0
  47. numba_cuda/numba/cuda/core/rewrites/static_raise.py +98 -0
  48. numba_cuda/numba/cuda/core/sigutils.py +3 -0
  49. numba_cuda/numba/cuda/core/ssa.py +496 -0
  50. numba_cuda/numba/cuda/core/targetconfig.py +329 -0
  51. numba_cuda/numba/cuda/core/tracing.py +231 -0
  52. numba_cuda/numba/cuda/core/transforms.py +952 -0
  53. numba_cuda/numba/cuda/core/typed_passes.py +741 -7
  54. numba_cuda/numba/cuda/core/typeinfer.py +1948 -0
  55. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  56. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  57. numba_cuda/numba/cuda/core/unsafe/eh.py +66 -0
  58. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  59. numba_cuda/numba/cuda/core/untyped_passes.py +1983 -0
  60. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  61. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  62. numba_cuda/numba/cuda/cpython/numbers.py +1474 -0
  63. numba_cuda/numba/cuda/cuda_paths.py +425 -246
  64. numba_cuda/numba/cuda/cudadecl.py +4 -1
  65. numba_cuda/numba/cuda/cudadrv/__init__.py +4 -1
  66. numba_cuda/numba/cuda/cudadrv/devicearray.py +5 -1
  67. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  68. numba_cuda/numba/cuda/cudadrv/driver.py +14 -140
  69. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  70. numba_cuda/numba/cuda/cudadrv/dummyarray.py +114 -24
  71. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  72. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  73. numba_cuda/numba/cuda/cudadrv/libs.py +8 -5
  74. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  75. numba_cuda/numba/cuda/cudadrv/mappings.py +4 -1
  76. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  77. numba_cuda/numba/cuda/cudadrv/nvrtc.py +22 -8
  78. numba_cuda/numba/cuda/cudadrv/nvvm.py +4 -4
  79. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  80. numba_cuda/numba/cuda/cudadrv/runtime.py +4 -1
  81. numba_cuda/numba/cuda/cudaimpl.py +8 -1
  82. numba_cuda/numba/cuda/cudamath.py +3 -0
  83. numba_cuda/numba/cuda/debuginfo.py +88 -2
  84. numba_cuda/numba/cuda/decorators.py +6 -3
  85. numba_cuda/numba/cuda/descriptor.py +6 -4
  86. numba_cuda/numba/cuda/device_init.py +3 -0
  87. numba_cuda/numba/cuda/deviceufunc.py +69 -2
  88. numba_cuda/numba/cuda/dispatcher.py +21 -39
  89. numba_cuda/numba/cuda/errors.py +10 -0
  90. numba_cuda/numba/cuda/extending.py +3 -0
  91. numba_cuda/numba/cuda/flags.py +143 -1
  92. numba_cuda/numba/cuda/fp16.py +3 -2
  93. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  94. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  95. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  96. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  97. numba_cuda/numba/cuda/initialize.py +4 -0
  98. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  99. numba_cuda/numba/cuda/intrinsics.py +3 -0
  100. numba_cuda/numba/cuda/itanium_mangler.py +3 -0
  101. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  102. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  103. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  104. numba_cuda/numba/cuda/libdevice.py +4 -0
  105. numba_cuda/numba/cuda/libdevicedecl.py +3 -0
  106. numba_cuda/numba/cuda/libdevicefuncs.py +3 -0
  107. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  108. numba_cuda/numba/cuda/locks.py +3 -0
  109. numba_cuda/numba/cuda/lowering.py +59 -159
  110. numba_cuda/numba/cuda/mathimpl.py +5 -1
  111. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  112. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  113. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  114. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  115. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  116. numba_cuda/numba/cuda/memory_management/nrt.py +48 -18
  117. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  118. numba_cuda/numba/cuda/models.py +12 -1
  119. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  120. numba_cuda/numba/cuda/np/npyfuncs.py +1807 -0
  121. numba_cuda/numba/cuda/np/numpy_support.py +553 -0
  122. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +59 -0
  123. numba_cuda/numba/cuda/nvvmutils.py +4 -1
  124. numba_cuda/numba/cuda/printimpl.py +15 -1
  125. numba_cuda/numba/cuda/random.py +4 -1
  126. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  127. numba_cuda/numba/cuda/serialize.py +4 -1
  128. numba_cuda/numba/cuda/simulator/__init__.py +4 -1
  129. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  130. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  131. numba_cuda/numba/cuda/simulator/api.py +4 -1
  132. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  133. numba_cuda/numba/cuda/simulator/compiler.py +7 -0
  134. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  135. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +4 -1
  136. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  137. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -0
  138. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  139. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  140. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  141. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  142. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  143. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  144. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  145. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  146. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  147. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  148. numba_cuda/numba/cuda/simulator/kernelapi.py +4 -1
  149. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  150. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +17 -2
  151. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  152. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  153. numba_cuda/numba/cuda/simulator_init.py +3 -0
  154. numba_cuda/numba/cuda/stubs.py +3 -0
  155. numba_cuda/numba/cuda/target.py +38 -17
  156. numba_cuda/numba/cuda/testing.py +7 -19
  157. numba_cuda/numba/cuda/tests/__init__.py +4 -1
  158. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  159. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  160. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  161. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +3 -0
  162. numba_cuda/numba/cuda/tests/core/test_serialize.py +7 -4
  163. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  164. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  166. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  167. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +4 -1
  169. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +4 -1
  171. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +3 -0
  173. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +9 -3
  175. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  177. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  178. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  179. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +21 -2
  182. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +5 -1
  184. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +4 -1
  185. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  197. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  198. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +4 -1
  199. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_array.py +5 -1
  202. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  203. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +4 -1
  206. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +542 -2
  207. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +84 -1
  208. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +4 -3
  211. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -0
  214. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +4 -1
  215. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +4 -1
  218. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +5 -3
  219. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +130 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +4 -1
  223. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  224. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +314 -3
  225. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +4 -1
  226. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +4 -1
  228. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  230. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +4 -1
  231. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +5 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  240. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +21 -8
  242. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  244. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +3 -0
  245. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +13 -37
  246. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  247. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  248. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +4 -1
  249. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  250. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +3 -0
  251. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  252. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  253. numba_cuda/numba/cuda/tests/cudapy/test_math.py +4 -1
  254. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +4 -1
  255. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  256. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  257. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  258. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  259. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  260. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  261. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  262. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  263. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  264. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  265. numba_cuda/numba/cuda/tests/cudapy/test_print.py +23 -0
  266. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  267. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  268. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +4 -1
  269. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  270. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +4 -1
  271. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  272. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -1
  273. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  274. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +4 -1
  275. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  276. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +453 -0
  277. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  278. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +4 -1
  279. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  280. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  281. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +266 -2
  282. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +4 -1
  283. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +4 -1
  284. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  285. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  286. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +115 -6
  287. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  288. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  289. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +4 -1
  290. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +4 -1
  291. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  292. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  293. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  294. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  295. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  296. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  297. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  298. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  299. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  300. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  301. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  302. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  303. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  304. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  305. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  306. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -2
  307. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +3 -0
  308. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +3 -0
  309. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +6 -2
  310. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +3 -2
  311. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +3 -0
  312. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +3 -0
  313. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  314. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +3 -0
  315. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +3 -2
  316. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +3 -0
  317. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +3 -0
  318. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  319. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  320. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  321. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  322. numba_cuda/numba/cuda/tests/nocuda/test_import.py +6 -1
  323. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +27 -12
  324. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  325. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  326. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -1
  327. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +3 -0
  328. numba_cuda/numba/cuda/tests/support.py +58 -15
  329. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +3 -0
  330. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -1
  331. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  332. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  333. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  334. numba_cuda/numba/cuda/tests/test_tracing.py +200 -0
  335. numba_cuda/numba/cuda/types.py +59 -0
  336. numba_cuda/numba/cuda/typing/__init__.py +12 -1
  337. numba_cuda/numba/cuda/typing/cffi_utils.py +55 -0
  338. numba_cuda/numba/cuda/typing/context.py +751 -0
  339. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  340. numba_cuda/numba/cuda/typing/npydecl.py +658 -0
  341. numba_cuda/numba/cuda/typing/templates.py +10 -14
  342. numba_cuda/numba/cuda/ufuncs.py +6 -3
  343. numba_cuda/numba/cuda/utils.py +9 -112
  344. numba_cuda/numba/cuda/vector_types.py +3 -0
  345. numba_cuda/numba/cuda/vectorizers.py +3 -0
  346. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/METADATA +6 -2
  347. numba_cuda-0.20.0.dist-info/RECORD +357 -0
  348. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/licenses/LICENSE +1 -0
  349. numba_cuda-0.20.0.dist-info/licenses/LICENSE.numba +24 -0
  350. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +0 -57
  351. numba_cuda-0.19.0.dist-info/RECORD +0 -301
  352. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/WHEEL +0 -0
  353. {numba_cuda-0.19.0.dist-info → numba_cuda-0.20.0.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+
1
5
  def initialize_all():
2
6
  # Import models to register them with the data model manager
3
7
  import numba.cuda.models # noqa: F401
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from .decorators import jit
2
5
  import numba
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from llvmlite import ir
2
5
 
3
6
  from numba import cuda, types
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Itanium CXX ABI Mangler
3
6
 
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  A library written in CUDA Python for generating reduction kernels
3
6
  """
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba import cuda
2
5
  from numba.cuda.cudadrv.driver import driver
3
6
  import math
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+
1
5
  def abs(x):
2
6
  """
3
7
  See https://docs.nvidia.com/cuda/libdevice-users-guide/__nv_abs.html
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda import libdevice, libdevicefuncs
2
5
  from numba.cuda.typing.templates import ConcreteTemplate, Registry
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from collections import namedtuple
2
5
  from textwrap import indent
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from llvmlite import ir
2
5
  from numba.core import types
3
6
  from numba.cuda import cgutils
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from threading import Lock
2
5
  from functools import wraps
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from collections import namedtuple, defaultdict
2
5
  import operator
3
6
  import warnings
@@ -7,18 +10,14 @@ from llvmlite import ir as llvm_ir
7
10
 
8
11
  from numba.core import (
9
12
  typing,
10
- utils,
11
13
  types,
12
14
  ir,
13
- debuginfo,
14
- funcdesc,
15
15
  generators,
16
- config,
17
- cgutils,
18
16
  removerefctpass,
19
- targetconfig,
20
17
  )
21
- from numba.cuda.core import ir_utils
18
+ from numba.cuda import debuginfo, cgutils, utils
19
+ from numba.cuda.core import ir_utils, targetconfig, funcdesc, config
20
+
22
21
  from numba.core.errors import (
23
22
  LoweringError,
24
23
  new_error_context,
@@ -27,8 +26,8 @@ from numba.core.errors import (
27
26
  UnsupportedError,
28
27
  NumbaDebugInfoWarning,
29
28
  )
30
- from numba.core.funcdesc import default_mangler
31
- from numba.core.environment import Environment
29
+ from numba.cuda.core.funcdesc import default_mangler
30
+ from numba.cuda.core.environment import Environment
32
31
  from numba.core.analysis import compute_use_defs, must_use_alloca
33
32
  from numba.misc.firstlinefinder import get_func_body_first_lineno
34
33
  from numba import version_info
@@ -463,7 +462,7 @@ class Lower(BaseLower):
463
462
  self._blk_local_varmap = {}
464
463
 
465
464
  def pre_block(self, block):
466
- from numba.core.unsafe import eh
465
+ from numba.cuda.core.unsafe import eh
467
466
 
468
467
  super(Lower, self).pre_block(block)
469
468
  self._cur_ir_block = block
@@ -1026,9 +1025,6 @@ class Lower(BaseLower):
1026
1025
  elif isinstance(fnty, types.RecursiveCall):
1027
1026
  res = self._lower_call_RecursiveCall(fnty, expr, signature)
1028
1027
 
1029
- elif isinstance(fnty, types.FunctionType):
1030
- res = self._lower_call_FunctionType(fnty, expr, signature)
1031
-
1032
1028
  else:
1033
1029
  res = self._lower_call_normal(fnty, expr, signature)
1034
1030
 
@@ -1049,7 +1045,7 @@ class Lower(BaseLower):
1049
1045
  )
1050
1046
 
1051
1047
  def _lower_call_ObjModeDispatcher(self, fnty, expr, signature):
1052
- from numba.core.pythonapi import ObjModeUtils
1048
+ from numba.cuda.core.pythonapi import ObjModeUtils
1053
1049
 
1054
1050
  self.init_pyapi()
1055
1051
  # Acquire the GIL
@@ -1226,136 +1222,6 @@ class Lower(BaseLower):
1226
1222
  )
1227
1223
  return res
1228
1224
 
1229
- def _lower_call_FunctionType(self, fnty, expr, signature):
1230
- self.debug_print("# calling first-class function type")
1231
- sig = types.unliteral(signature)
1232
- if not fnty.check_signature(signature):
1233
- # value dependent polymorphism?
1234
- raise UnsupportedError(
1235
- f"mismatch of function types:"
1236
- f" expected {fnty} but got {types.FunctionType(sig)}"
1237
- )
1238
- argvals = self.fold_call_args(
1239
- fnty,
1240
- sig,
1241
- expr.args,
1242
- expr.vararg,
1243
- expr.kws,
1244
- )
1245
- return self.__call_first_class_function_pointer(
1246
- fnty.ftype,
1247
- expr.func.name,
1248
- sig,
1249
- argvals,
1250
- )
1251
-
1252
- def __call_first_class_function_pointer(self, ftype, fname, sig, argvals):
1253
- """
1254
- Calls a first-class function pointer.
1255
-
1256
- This function is responsible for calling a first-class function pointer,
1257
- which can either be a JIT-compiled function or a Python function. It
1258
- determines if a JIT address is available, and if so, calls the function
1259
- using the JIT address. Otherwise, it calls the function using a function
1260
- pointer obtained from the `__get_first_class_function_pointer` method.
1261
-
1262
- Args:
1263
- ftype: The type of the function.
1264
- fname: The name of the function.
1265
- sig: The signature of the function.
1266
- argvals: The argument values to pass to the function.
1267
-
1268
- Returns:
1269
- The result of calling the function.
1270
- """
1271
- context = self.context
1272
- builder = self.builder
1273
- # Determine if jit address is available
1274
- fstruct = self.loadvar(fname)
1275
- struct = cgutils.create_struct_proxy(self.typeof(fname))(
1276
- context, builder, value=fstruct
1277
- )
1278
- jit_addr = struct.jit_addr
1279
- jit_addr.name = f"jit_addr_of_{fname}"
1280
-
1281
- ctx = context
1282
- res_slot = cgutils.alloca_once(
1283
- builder, ctx.get_value_type(sig.return_type)
1284
- )
1285
-
1286
- if_jit_addr_is_null = builder.if_else(
1287
- cgutils.is_null(builder, jit_addr), likely=False
1288
- )
1289
- with if_jit_addr_is_null as (then, orelse):
1290
- with then:
1291
- func_ptr = self.__get_first_class_function_pointer(
1292
- ftype, fname, sig
1293
- )
1294
- res = builder.call(func_ptr, argvals)
1295
- builder.store(res, res_slot)
1296
-
1297
- with orelse:
1298
- llty = ctx.call_conv.get_function_type(
1299
- sig.return_type, sig.args
1300
- ).as_pointer()
1301
- func_ptr = builder.bitcast(jit_addr, llty)
1302
- # call
1303
- status, res = ctx.call_conv.call_function(
1304
- builder, func_ptr, sig.return_type, sig.args, argvals
1305
- )
1306
- with cgutils.if_unlikely(builder, status.is_error):
1307
- context.call_conv.return_status_propagate(builder, status)
1308
- builder.store(res, res_slot)
1309
- return builder.load(res_slot)
1310
-
1311
- def __get_first_class_function_pointer(self, ftype, fname, sig):
1312
- from numba.experimental.function_type import lower_get_wrapper_address
1313
-
1314
- llty = self.context.get_value_type(ftype)
1315
- fstruct = self.loadvar(fname)
1316
- addr = self.builder.extract_value(
1317
- fstruct, 0, name="addr_of_%s" % (fname)
1318
- )
1319
-
1320
- fptr = cgutils.alloca_once(
1321
- self.builder, llty, name="fptr_of_%s" % (fname)
1322
- )
1323
- with self.builder.if_else(
1324
- cgutils.is_null(self.builder, addr), likely=False
1325
- ) as (then, orelse):
1326
- with then:
1327
- self.init_pyapi()
1328
- # Acquire the GIL
1329
- gil_state = self.pyapi.gil_ensure()
1330
- pyaddr = self.builder.extract_value(
1331
- fstruct, 1, name="pyaddr_of_%s" % (fname)
1332
- )
1333
- # try to recover the function address, see
1334
- # test_zero_address BadToGood example in
1335
- # test_function_type.py
1336
- addr1 = lower_get_wrapper_address(
1337
- self.context,
1338
- self.builder,
1339
- pyaddr,
1340
- sig,
1341
- failure_mode="ignore",
1342
- )
1343
- with self.builder.if_then(
1344
- cgutils.is_null(self.builder, addr1), likely=False
1345
- ):
1346
- self.return_exception(
1347
- RuntimeError,
1348
- exc_args=(f"{ftype} function address is null",),
1349
- loc=self.loc,
1350
- )
1351
- addr2 = self.pyapi.long_as_voidptr(addr1)
1352
- self.builder.store(self.builder.bitcast(addr2, llty), fptr)
1353
- self.pyapi.decref(addr1)
1354
- self.pyapi.gil_release(gil_state)
1355
- with orelse:
1356
- self.builder.store(self.builder.bitcast(addr, llty), fptr)
1357
- return self.builder.load(fptr)
1358
-
1359
1225
  def _lower_call_normal(self, fnty, expr, signature):
1360
1226
  # Normal function resolution
1361
1227
  self.debug_print("# calling normal function: {0}".format(fnty))
@@ -1841,8 +1707,7 @@ class CUDALower(Lower):
1841
1707
  int_type = (llvm_ir.IntType,)
1842
1708
  real_type = llvm_ir.FloatType, llvm_ir.DoubleType
1843
1709
  if isinstance(lltype, int_type + real_type):
1844
- index = name.find(".")
1845
- src_name = name[:index] if index > 0 else name
1710
+ src_name = name.split(".")[0]
1846
1711
  if src_name in self.poly_var_typ_map:
1847
1712
  # Do not emit debug value on polymorphic type var
1848
1713
  return
@@ -1869,6 +1734,9 @@ class CUDALower(Lower):
1869
1734
 
1870
1735
  self.poly_var_typ_map = {}
1871
1736
  self.poly_var_loc_map = {}
1737
+ self.poly_var_set = set()
1738
+ self.poly_cleaned = False
1739
+ self.lastblk = max(self.blocks.keys())
1872
1740
 
1873
1741
  # When debug info is enabled, walk through function body and mark
1874
1742
  # variables with polymorphic types.
@@ -1880,8 +1748,7 @@ class CUDALower(Lower):
1880
1748
  if x.target.name.startswith("$"):
1881
1749
  continue
1882
1750
  ssa_name = x.target.name
1883
- index = ssa_name.find(".")
1884
- src_name = ssa_name[:index] if index > 0 else ssa_name
1751
+ src_name = ssa_name.split(".")[0]
1885
1752
  # Check all the multi-versioned targets
1886
1753
  if len(x.target.versioned_names) > 0:
1887
1754
  fetype = self.typeof(ssa_name)
@@ -1902,12 +1769,12 @@ class CUDALower(Lower):
1902
1769
  """
1903
1770
  # If the name is not handled yet and a store is needed
1904
1771
  if name not in self.varmap and self.store_var_needed(name):
1905
- index = name.find(".")
1906
- src_name = name[:index] if index > 0 else name
1772
+ src_name = name.split(".")[0]
1907
1773
  if src_name in self.poly_var_typ_map:
1908
- dtype = types.UnionType(self.poly_var_typ_map[src_name])
1909
- datamodel = self.context.data_model_manager[dtype]
1774
+ self.poly_var_set.add(name)
1910
1775
  if src_name not in self.poly_var_loc_map:
1776
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
1777
+ datamodel = self.context.data_model_manager[dtype]
1911
1778
  # UnionType has sorted set of types, max at last index
1912
1779
  maxsizetype = dtype.types[-1]
1913
1780
  # Create a single element aggregate type
@@ -1916,13 +1783,7 @@ class CUDALower(Lower):
1916
1783
  ptr = self.alloca_lltype(src_name, lltype, datamodel)
1917
1784
  # save the location of the union type for polymorphic var
1918
1785
  self.poly_var_loc_map[src_name] = ptr
1919
- # Any member of this union type shoud type cast ptr to fetype
1920
- lltype = self.context.get_value_type(fetype)
1921
- castptr = self.builder.bitcast(
1922
- self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
1923
- )
1924
- # Remember the pointer
1925
- self.varmap[name] = castptr
1786
+ return
1926
1787
 
1927
1788
  super()._alloca_var(name, fetype)
1928
1789
 
@@ -1936,6 +1797,45 @@ class CUDALower(Lower):
1936
1797
  or self._disable_sroa_like_opt
1937
1798
  )
1938
1799
 
1800
+ def delvar(self, name):
1801
+ """
1802
+ Delete the given variable.
1803
+ """
1804
+ if name in self.poly_var_set:
1805
+ fetype = self.typeof(name)
1806
+ src_name = name.split(".")[0]
1807
+ ptr = self.poly_var_loc_map[src_name]
1808
+ self.decref(fetype, self.builder.load(ptr))
1809
+ if (
1810
+ self._cur_ir_block == self.blocks[self.lastblk]
1811
+ and not self.poly_cleaned
1812
+ ):
1813
+ # Zero-fill the debug union for polymorphic only
1814
+ # at the last block
1815
+ for v in self.poly_var_loc_map.values():
1816
+ self.builder.store(
1817
+ llvm_ir.Constant(v.type.pointee, None), v
1818
+ )
1819
+ self.poly_cleaned = True
1820
+ return
1821
+
1822
+ super().delvar(name)
1823
+
1824
+ def getvar(self, name):
1825
+ """
1826
+ Get a pointer to the given variable's slot.
1827
+ """
1828
+ if name in self.poly_var_set:
1829
+ src_name = name.split(".")[0]
1830
+ fetype = self.typeof(name)
1831
+ lltype = self.context.get_value_type(fetype)
1832
+ castptr = self.builder.bitcast(
1833
+ self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
1834
+ )
1835
+ return castptr
1836
+ else:
1837
+ return super().getvar(name)
1838
+
1939
1839
 
1940
1840
  def _lit_or_omitted(value):
1941
1841
  """Returns a Literal instance if the type of value is supported;
@@ -1,11 +1,15 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import math
2
5
  import operator
3
6
  from llvmlite import ir
4
- from numba.core import types, typing, targetconfig
7
+ from numba.core import types, typing
5
8
  from numba.cuda import cgutils
6
9
  from numba.core.imputils import Registry
7
10
  from numba.types import float32, float64, int64, uint64
8
11
  from numba.cuda import libdevice
12
+ from numba.cuda.core import targetconfig
9
13
 
10
14
  registry = Registry()
11
15
  lower = registry.lower
@@ -1 +1,4 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda.memory_management.nrt import rtsys # noqa: F401
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #include "memsys.cuh"
2
7
 
3
8
  __device__ size_t memsys_size = sizeof(NRT_MemSys);
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #include <cuda/atomic>
2
7
 
3
8
  // Globally needed variables
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #ifndef _NRT_H
2
7
  #define _NRT_H
3
8
 
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #include <cuda/atomic>
2
7
 
3
8
  typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
@@ -1,36 +1,30 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import ctypes
2
5
  import os
3
6
  from functools import wraps
4
7
  import numpy as np
5
8
 
6
- from numba import cuda, config
9
+
10
+ from numba import cuda, types
11
+ from numba.cuda import config
12
+
7
13
  from numba.core.runtime.nrt import _nrt_mstats
8
14
  from numba.cuda.cudadrv.driver import (
9
15
  _Linker,
10
16
  driver,
11
17
  launch_kernel,
12
18
  USE_NV_BINDING,
19
+ _have_nvjitlink,
13
20
  )
14
21
  from numba.cuda.cudadrv import devices
15
22
  from numba.cuda.api import get_current_device
16
- from numba.cuda.utils import _readenv, cached_file_read
23
+ from numba.cuda.utils import cached_file_read
17
24
  from numba.cuda.cudadrv.linkable_code import CUSource
25
+ from numba.cuda.typing.templates import signature
18
26
 
19
-
20
- # Check environment variable or config for NRT statistics enablement
21
- NRT_STATS = _readenv("NUMBA_CUDA_NRT_STATS", bool, False) or getattr(
22
- config, "NUMBA_CUDA_NRT_STATS", False
23
- )
24
- if not hasattr(config, "NUMBA_CUDA_NRT_STATS"):
25
- config.CUDA_NRT_STATS = NRT_STATS
26
-
27
-
28
- # Check environment variable or config for NRT enablement
29
- ENABLE_NRT = _readenv("NUMBA_CUDA_ENABLE_NRT", bool, False) or getattr(
30
- config, "NUMBA_CUDA_ENABLE_NRT", False
31
- )
32
- if not hasattr(config, "NUMBA_CUDA_ENABLE_NRT"):
33
- config.CUDA_ENABLE_NRT = ENABLE_NRT
27
+ from numba.core.extending import intrinsic, overload_classmethod
34
28
 
35
29
 
36
30
  def get_include():
@@ -38,6 +32,34 @@ def get_include():
38
32
  return os.path.dirname(os.path.abspath(__file__))
39
33
 
40
34
 
35
+ # Provide an implementation of Array._allocate() for the CUDA target (used
36
+ # internally by Numba when generating the allocation of an array)
37
+
38
+
39
+ @intrinsic
40
+ def intrin_alloc(typingctx, allocsize, align):
41
+ """Intrinsic to call into the allocator for Array"""
42
+
43
+ def codegen(context, builder, signature, args):
44
+ allocsize, align = args
45
+ meminfo = context.nrt.meminfo_alloc_aligned(builder, allocsize, align)
46
+ return meminfo
47
+
48
+ mip = types.MemInfoPointer(types.voidptr) # return untyped pointer
49
+ sig = signature(mip, allocsize, align)
50
+ return sig, codegen
51
+
52
+
53
+ @overload_classmethod(types.Array, "_allocate", target="CUDA")
54
+ def _ol_array_allocate(cls, allocsize, align):
55
+ """Implements a Numba-only CUDA-target classmethod on the array type."""
56
+
57
+ def impl(cls, allocsize, align):
58
+ return intrin_alloc(allocsize, align)
59
+
60
+ return impl
61
+
62
+
41
63
  # Protect method to ensure NRT memory allocation and initialization
42
64
  def _alloc_init_guard(method):
43
65
  """
@@ -65,10 +87,18 @@ class _Runtime:
65
87
 
66
88
  def __init__(self):
67
89
  """Initialize memsys module and variable"""
90
+ self._reset()
91
+
92
+ def _reset(self):
93
+ """Reset to the uninitialized state"""
68
94
  self._memsys_module = None
69
95
  self._memsys = None
70
96
  self._initialized = False
71
97
 
98
+ def close(self):
99
+ """Close and reset"""
100
+ self._reset()
101
+
72
102
  def _compile_memsys_module(self):
73
103
  """
74
104
  Compile memsys.cu and create a module from it in the current context
@@ -80,7 +110,7 @@ class _Runtime:
80
110
  cc = get_current_device().compute_capability
81
111
 
82
112
  # Create a new linker instance and add the cu file
83
- linker = _Linker.new(cc=cc)
113
+ linker = _Linker.new(cc=cc, lto=_have_nvjitlink())
84
114
  linker.add_cu_file(memsys_mod)
85
115
 
86
116
  # Complete the linker and create a module from it
@@ -0,0 +1,75 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import sys
5
+ import os
6
+ import re
7
+
8
+
9
+ def get_lib_dirs():
10
+ """
11
+ Anaconda specific
12
+ """
13
+ if sys.platform == "win32":
14
+ # CUDA 12 puts in "bin" directory, whereas CUDA 13 puts in "bin\x64" directory
15
+ dirnames = [
16
+ os.path.join("Library", "bin"),
17
+ os.path.join("Library", "bin", "x64"),
18
+ os.path.join("Library", "nvvm", "bin"),
19
+ os.path.join("Library", "nvvm", "bin", "x64"),
20
+ ]
21
+ else:
22
+ dirnames = [
23
+ "lib",
24
+ ]
25
+ libdirs = [os.path.join(sys.prefix, x) for x in dirnames]
26
+ return libdirs
27
+
28
+
29
+ DLLNAMEMAP = {
30
+ "linux": r"lib%(name)s\.so\.%(ver)s$",
31
+ "linux2": r"lib%(name)s\.so\.%(ver)s$",
32
+ "linux-static": r"lib%(name)s\.a$",
33
+ "darwin": r"lib%(name)s\.%(ver)s\.dylib$",
34
+ "win32": r"%(name)s%(ver)s\.dll$",
35
+ "win32-static": r"%(name)s\.lib$",
36
+ "bsd": r"lib%(name)s\.so\.%(ver)s$",
37
+ }
38
+
39
+ RE_VER = r"[0-9]*([_\.][0-9]+)*"
40
+
41
+
42
+ def find_lib(libname, libdir=None, platform=None, static=False):
43
+ platform = platform or sys.platform
44
+ platform = "bsd" if "bsd" in platform else platform
45
+ if static:
46
+ platform = f"{platform}-static"
47
+ if platform not in DLLNAMEMAP:
48
+ # Return empty list if platform name is undefined.
49
+ # Not all platforms define their static library paths.
50
+ return []
51
+ pat = DLLNAMEMAP[platform] % {"name": libname, "ver": RE_VER}
52
+ regex = re.compile(pat)
53
+ return find_file(regex, libdir)
54
+
55
+
56
+ def find_file(pat, libdir=None):
57
+ if libdir is None:
58
+ libdirs = get_lib_dirs()
59
+ elif isinstance(libdir, str):
60
+ libdirs = [
61
+ libdir,
62
+ ]
63
+ else:
64
+ libdirs = list(libdir)
65
+ files = []
66
+ for ldir in libdirs:
67
+ try:
68
+ entries = os.listdir(ldir)
69
+ except FileNotFoundError:
70
+ continue
71
+ candidates = [
72
+ os.path.join(ldir, ent) for ent in entries if pat.match(ent)
73
+ ]
74
+ files.extend([c for c in candidates if os.path.isfile(c)])
75
+ return files
@@ -1,11 +1,15 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import functools
2
5
 
3
6
  from llvmlite import ir
4
7
 
5
8
  from numba.core.datamodel.registry import DataModelManager, register
9
+ from numba.core.datamodel import PrimitiveModel
6
10
  from numba.core.extending import models
7
11
  from numba.core import types
8
- from numba.cuda.types import Dim3, GridGroup, CUDADispatcher
12
+ from numba.cuda.types import Dim3, GridGroup, CUDADispatcher, Bfloat16
9
13
 
10
14
 
11
15
  cuda_data_manager = DataModelManager()
@@ -42,3 +46,10 @@ class FloatModel(models.PrimitiveModel):
42
46
 
43
47
 
44
48
  register_model(CUDADispatcher)(models.OpaqueModel)
49
+
50
+
51
+ @register_model(Bfloat16)
52
+ class _model___nv_bfloat16(PrimitiveModel):
53
+ def __init__(self, dmm, fe_type):
54
+ be_type = ir.IntType(16)
55
+ super(_model___nv_bfloat16, self).__init__(dmm, fe_type, be_type)