numba-cuda 0.18.1__py3-none-any.whl → 0.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of numba-cuda might be problematic. Click here for more details.

Files changed (301) hide show
  1. _numba_cuda_redirector.pth +3 -0
  2. _numba_cuda_redirector.py +3 -0
  3. numba_cuda/VERSION +1 -1
  4. numba_cuda/__init__.py +2 -1
  5. numba_cuda/_version.py +2 -13
  6. numba_cuda/numba/cuda/__init__.py +4 -1
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +5 -2
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +4 -1
  9. numba_cuda/numba/cuda/api.py +5 -7
  10. numba_cuda/numba/cuda/api_util.py +3 -0
  11. numba_cuda/numba/cuda/args.py +3 -0
  12. numba_cuda/numba/cuda/bf16.py +3 -0
  13. numba_cuda/numba/cuda/cg.py +3 -0
  14. numba_cuda/numba/cuda/cgutils.py +3 -0
  15. numba_cuda/numba/cuda/codegen.py +3 -0
  16. numba_cuda/numba/cuda/compiler.py +10 -4
  17. numba_cuda/numba/cuda/core/caching.py +3 -0
  18. numba_cuda/numba/cuda/core/callconv.py +3 -0
  19. numba_cuda/numba/cuda/core/codegen.py +3 -0
  20. numba_cuda/numba/cuda/core/compiler.py +3 -0
  21. numba_cuda/numba/cuda/core/interpreter.py +3595 -0
  22. numba_cuda/numba/cuda/core/ir_utils.py +2644 -0
  23. numba_cuda/numba/cuda/core/sigutils.py +58 -0
  24. numba_cuda/numba/cuda/core/typed_passes.py +3 -0
  25. numba_cuda/numba/cuda/cuda_paths.py +12 -17
  26. numba_cuda/numba/cuda/cudadecl.py +4 -1
  27. numba_cuda/numba/cuda/cudadrv/__init__.py +3 -0
  28. numba_cuda/numba/cuda/cudadrv/devicearray.py +3 -0
  29. numba_cuda/numba/cuda/cudadrv/devices.py +3 -0
  30. numba_cuda/numba/cuda/cudadrv/driver.py +7 -19
  31. numba_cuda/numba/cuda/cudadrv/drvapi.py +3 -0
  32. numba_cuda/numba/cuda/cudadrv/dummyarray.py +3 -0
  33. numba_cuda/numba/cuda/cudadrv/enums.py +3 -0
  34. numba_cuda/numba/cuda/cudadrv/error.py +4 -0
  35. numba_cuda/numba/cuda/cudadrv/libs.py +4 -2
  36. numba_cuda/numba/cuda/cudadrv/linkable_code.py +3 -0
  37. numba_cuda/numba/cuda/cudadrv/mappings.py +3 -0
  38. numba_cuda/numba/cuda/cudadrv/ndarray.py +3 -0
  39. numba_cuda/numba/cuda/cudadrv/nvrtc.py +47 -44
  40. numba_cuda/numba/cuda/cudadrv/nvvm.py +6 -18
  41. numba_cuda/numba/cuda/cudadrv/rtapi.py +3 -0
  42. numba_cuda/numba/cuda/cudadrv/runtime.py +15 -1
  43. numba_cuda/numba/cuda/cudaimpl.py +3 -0
  44. numba_cuda/numba/cuda/cudamath.py +4 -1
  45. numba_cuda/numba/cuda/debuginfo.py +3 -0
  46. numba_cuda/numba/cuda/decorators.py +7 -3
  47. numba_cuda/numba/cuda/descriptor.py +3 -0
  48. numba_cuda/numba/cuda/device_init.py +3 -0
  49. numba_cuda/numba/cuda/deviceufunc.py +5 -1
  50. numba_cuda/numba/cuda/dispatcher.py +6 -2
  51. numba_cuda/numba/cuda/errors.py +10 -0
  52. numba_cuda/numba/cuda/extending.py +4 -1
  53. numba_cuda/numba/cuda/flags.py +2 -0
  54. numba_cuda/numba/cuda/fp16.py +3 -0
  55. numba_cuda/numba/cuda/initialize.py +4 -0
  56. numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -0
  57. numba_cuda/numba/cuda/intrinsics.py +3 -0
  58. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  59. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  60. numba_cuda/numba/cuda/kernels/reduction.py +3 -0
  61. numba_cuda/numba/cuda/kernels/transpose.py +3 -0
  62. numba_cuda/numba/cuda/libdevice.py +4 -0
  63. numba_cuda/numba/cuda/libdevicedecl.py +4 -1
  64. numba_cuda/numba/cuda/libdevicefuncs.py +4 -1
  65. numba_cuda/numba/cuda/libdeviceimpl.py +3 -0
  66. numba_cuda/numba/cuda/locks.py +3 -0
  67. numba_cuda/numba/cuda/lowering.py +53 -16
  68. numba_cuda/numba/cuda/mathimpl.py +3 -0
  69. numba_cuda/numba/cuda/memory_management/__init__.py +3 -0
  70. numba_cuda/numba/cuda/memory_management/memsys.cu +5 -0
  71. numba_cuda/numba/cuda/memory_management/memsys.cuh +5 -0
  72. numba_cuda/numba/cuda/memory_management/nrt.cu +5 -0
  73. numba_cuda/numba/cuda/memory_management/nrt.cuh +5 -0
  74. numba_cuda/numba/cuda/memory_management/nrt.py +5 -1
  75. numba_cuda/numba/cuda/models.py +3 -0
  76. numba_cuda/numba/cuda/nvvmutils.py +3 -0
  77. numba_cuda/numba/cuda/printimpl.py +3 -0
  78. numba_cuda/numba/cuda/random.py +3 -0
  79. numba_cuda/numba/cuda/reshape_funcs.cu +5 -0
  80. numba_cuda/numba/cuda/serialize.py +3 -0
  81. numba_cuda/numba/cuda/simulator/__init__.py +3 -0
  82. numba_cuda/numba/cuda/simulator/_internal/__init__.py +3 -0
  83. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  84. numba_cuda/numba/cuda/simulator/api.py +4 -1
  85. numba_cuda/numba/cuda/simulator/bf16.py +3 -0
  86. numba_cuda/numba/cuda/simulator/compiler.py +3 -0
  87. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +3 -0
  88. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +3 -0
  89. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +3 -0
  90. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +3 -7
  91. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +3 -0
  92. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +3 -0
  93. numba_cuda/numba/cuda/simulator/cudadrv/error.py +4 -0
  94. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +4 -0
  95. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +4 -0
  96. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +3 -0
  97. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -0
  98. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -0
  99. numba_cuda/numba/cuda/simulator/dispatcher.py +4 -0
  100. numba_cuda/numba/cuda/simulator/kernel.py +3 -0
  101. numba_cuda/numba/cuda/simulator/kernelapi.py +3 -0
  102. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +3 -0
  103. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +3 -0
  104. numba_cuda/numba/cuda/simulator/reduction.py +3 -0
  105. numba_cuda/numba/cuda/simulator/vector_types.py +3 -0
  106. numba_cuda/numba/cuda/simulator_init.py +3 -0
  107. numba_cuda/numba/cuda/stubs.py +3 -0
  108. numba_cuda/numba/cuda/target.py +4 -2
  109. numba_cuda/numba/cuda/testing.py +7 -6
  110. numba_cuda/numba/cuda/tests/__init__.py +3 -0
  111. numba_cuda/numba/cuda/tests/complex_usecases.py +3 -0
  112. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +3 -0
  113. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  114. numba_cuda/numba/cuda/tests/core/test_serialize.py +3 -0
  115. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +3 -0
  116. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +3 -0
  117. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +3 -0
  118. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +3 -0
  119. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +3 -0
  120. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +3 -0
  121. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +3 -0
  122. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -0
  123. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +3 -0
  124. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +4 -1
  125. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +4 -1
  126. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +4 -1
  127. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +4 -1
  128. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +3 -0
  129. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +3 -0
  130. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +3 -0
  131. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +3 -0
  132. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +3 -0
  133. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +4 -1
  134. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +4 -1
  135. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +3 -0
  136. numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +4 -1
  137. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +3 -0
  138. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +7 -6
  139. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +3 -4
  140. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +3 -0
  141. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +3 -0
  142. numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +4 -1
  143. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +3 -0
  144. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +3 -0
  145. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +3 -0
  146. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +3 -0
  147. numba_cuda/numba/cuda/tests/cudapy/__init__.py +3 -0
  148. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +3 -0
  149. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +3 -0
  150. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +3 -0
  151. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +3 -0
  152. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +3 -0
  153. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +3 -0
  154. numba_cuda/numba/cuda/tests/cudapy/test_array.py +3 -0
  155. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +3 -0
  156. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +3 -0
  157. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +3 -0
  158. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +3 -0
  159. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +4 -3
  160. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +4 -3
  161. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +3 -0
  162. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -0
  163. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +149 -3
  164. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +3 -0
  165. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +4 -1
  166. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +3 -4
  167. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +3 -0
  168. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +3 -0
  169. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +3 -0
  170. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +3 -0
  171. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +3 -0
  172. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +4 -1
  173. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +4 -1
  174. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +3 -0
  175. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +4 -1
  176. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +23 -284
  177. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  178. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +4 -1
  179. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +3 -0
  180. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +3 -0
  181. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +4 -1
  182. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +3 -0
  183. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +4 -6
  184. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +3 -0
  185. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +3 -0
  186. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +3 -0
  187. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +3 -0
  188. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -0
  189. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +4 -1
  190. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +3 -0
  191. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +3 -0
  192. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +3 -0
  193. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +3 -0
  194. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +3 -0
  195. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +3 -0
  196. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +4 -1
  197. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +298 -0
  198. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +3 -0
  199. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +3 -0
  200. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +3 -0
  201. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +3 -0
  202. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +4 -1
  203. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +3 -0
  204. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +3 -0
  205. numba_cuda/numba/cuda/tests/cudapy/test_math.py +3 -0
  206. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +3 -0
  207. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +3 -0
  208. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +3 -0
  209. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +3 -0
  210. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +3 -0
  211. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +3 -0
  212. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +3 -0
  213. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +4 -1
  214. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +3 -0
  215. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +3 -0
  216. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +3 -0
  217. numba_cuda/numba/cuda/tests/cudapy/test_print.py +3 -0
  218. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +3 -0
  219. numba_cuda/numba/cuda/tests/cudapy/test_random.py +3 -0
  220. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +3 -0
  221. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +3 -0
  222. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +3 -0
  223. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +3 -0
  224. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +3 -0
  225. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +3 -0
  226. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +3 -0
  227. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +3 -0
  228. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +3 -0
  229. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +3 -0
  230. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +3 -0
  231. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +4 -1
  232. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +3 -0
  233. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +3 -0
  234. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +3 -0
  235. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +3 -0
  236. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +3 -0
  237. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +3 -0
  238. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +3 -0
  239. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +8 -1
  240. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +3 -0
  241. numba_cuda/numba/cuda/tests/cudasim/__init__.py +3 -0
  242. numba_cuda/numba/cuda/tests/cudasim/support.py +3 -0
  243. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +3 -0
  244. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  245. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +5 -0
  246. numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
  247. numba_cuda/numba/cuda/tests/data/error.cu +5 -0
  248. numba_cuda/numba/cuda/tests/data/include/add.cuh +5 -0
  249. numba_cuda/numba/cuda/tests/data/jitlink.cu +5 -0
  250. numba_cuda/numba/cuda/tests/data/warn.cu +5 -0
  251. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +3 -0
  252. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  253. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +5 -0
  254. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +5 -0
  255. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +5 -0
  256. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +3 -0
  257. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +4 -1
  258. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -1
  259. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +4 -1
  260. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +4 -1
  261. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +4 -1
  262. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +4 -1
  263. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +3 -0
  264. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +4 -1
  265. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +4 -1
  266. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +4 -1
  267. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +4 -1
  268. numba_cuda/numba/cuda/tests/enum_usecases.py +3 -0
  269. numba_cuda/numba/cuda/tests/nocuda/__init__.py +3 -0
  270. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +3 -0
  271. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +3 -0
  272. numba_cuda/numba/cuda/tests/nocuda/test_import.py +4 -1
  273. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +3 -0
  274. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +3 -0
  275. numba_cuda/numba/cuda/tests/nrt/__init__.py +3 -0
  276. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +5 -2
  277. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +4 -1
  278. numba_cuda/numba/cuda/tests/support.py +755 -0
  279. numba_cuda/numba/cuda/tests/test_binary_generation/Makefile +6 -3
  280. numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +6 -2
  281. numba_cuda/numba/cuda/tests/test_binary_generation/nrt_extern.cu +5 -0
  282. numba_cuda/numba/cuda/tests/test_binary_generation/test_device_functions.cu +5 -0
  283. numba_cuda/numba/cuda/tests/test_binary_generation/undefined_extern.cu +5 -0
  284. numba_cuda/numba/cuda/types.py +3 -0
  285. numba_cuda/numba/cuda/typing/__init__.py +11 -0
  286. numba_cuda/numba/cuda/typing/templates.py +1448 -0
  287. numba_cuda/numba/cuda/ufuncs.py +3 -0
  288. numba_cuda/numba/cuda/utils.py +3 -0
  289. numba_cuda/numba/cuda/vector_types.py +6 -3
  290. numba_cuda/numba/cuda/vectorizers.py +3 -0
  291. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/METADATA +25 -29
  292. numba_cuda-0.19.1.dist-info/RECORD +302 -0
  293. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/licenses/LICENSE +1 -0
  294. numba_cuda-0.19.1.dist-info/licenses/LICENSE.numba +24 -0
  295. numba_cuda/numba/cuda/include/11/cuda_bf16.h +0 -3749
  296. numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +0 -2683
  297. numba_cuda/numba/cuda/include/11/cuda_fp16.h +0 -3794
  298. numba_cuda/numba/cuda/include/11/cuda_fp16.hpp +0 -2614
  299. numba_cuda-0.18.1.dist-info/RECORD +0 -296
  300. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/WHEEL +0 -0
  301. {numba_cuda-0.18.1.dist-info → numba_cuda-0.19.1.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import numbers
2
5
  from numba.core.errors import LoweringError
3
6
 
@@ -12,6 +15,13 @@ class KernelRuntimeError(RuntimeError):
12
15
  super(KernelRuntimeError, self).__init__(msg)
13
16
 
14
17
 
18
+ class UnsupportedBytecodeError(Exception):
19
+ """Unsupported bytecode is non-recoverable"""
20
+
21
+ def __init__(self, msg, loc=None):
22
+ super().__init__(f"{msg}. Raised from {loc}")
23
+
24
+
15
25
  class CudaLoweringError(LoweringError):
16
26
  pass
17
27
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Added for symmetry with the core API
3
6
  """
@@ -18,7 +21,7 @@ def make_attribute_wrapper(typeclass, struct_attr, python_attr):
18
21
  Vendored from numba.core.extending with a change to consider the CUDA data
19
22
  model manager.
20
23
  """
21
- from numba.core.typing.templates import AttributeTemplate
24
+ from numba.cuda.typing.templates import AttributeTemplate
22
25
 
23
26
  from numba.core.datamodel import default_manager
24
27
  from numba.core.datamodel.models import StructModel
@@ -1,3 +1,5 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
1
3
  from numba.core.compiler import Flags, Option
2
4
 
3
5
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import numba.core.types as types
2
5
  from numba.cuda._internal.cuda_fp16 import (
3
6
  typing_registry,
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+
1
5
  def initialize_all():
2
6
  # Import models to register them with the data model manager
3
7
  import numba.cuda.models # noqa: F401
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from .decorators import jit
2
5
  import numba
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from llvmlite import ir
2
5
 
3
6
  from numba import cuda, types
@@ -0,0 +1,214 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Itanium CXX ABI Mangler
6
+
7
+ Reference: https://itanium-cxx-abi.github.io/cxx-abi/abi.html
8
+
9
+ The basics of the mangling scheme.
10
+
11
+ We are hijacking the CXX mangling scheme for our use. We map Python modules
12
+ into CXX namespace. A `module1.submodule2.foo` is mapped to
13
+ `module1::submodule2::foo`. For parameterized numba types, we treat them as
14
+ templated types; for example, `array(int64, 1d, C)` becomes an
15
+ `array<int64, 1, C>`.
16
+
17
+ All mangled names are prefixed with "_Z". It is followed by the name of the
18
+ entity. A name contains one or more identifiers. Each identifier is encoded
19
+ as "<num of char><name>". If the name is namespaced and, therefore,
20
+ has multiple identifiers, the entire name is encoded as "N<name>E".
21
+
22
+ For functions, arguments types follow. There are condensed encodings for basic
23
+ built-in types; e.g. "i" for int, "f" for float. For other types, the
24
+ previously mentioned name encoding should be used.
25
+
26
+ For templated types, the template parameters are encoded immediately after the
27
+ name. If it is namespaced, it should be within the 'N' 'E' marker. Template
28
+ parameters are encoded in "I<params>E", where each parameter is encoded using
29
+ the mentioned name encoding scheme. Template parameters can contain literal
30
+ values like the '1' in the array type shown earlier. There is special encoding
31
+ scheme for them to avoid leading digits.
32
+ """
33
+
34
+ import re
35
+
36
+ from numba.core import types
37
+
38
+
39
+ # According the scheme, valid characters for mangled names are [a-zA-Z0-9_].
40
+ # We borrow the '_' as the escape character to encode invalid char into
41
+ # '_xx' where 'xx' is the hex codepoint.
42
+ _re_invalid_char = re.compile(r"[^a-z0-9_]", re.I)
43
+
44
+ PREFIX = "_Z"
45
+
46
+ # Numba types to mangled type code. These correspond with the codes listed in
47
+ # https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin
48
+ N2CODE = {
49
+ types.void: "v",
50
+ types.boolean: "b",
51
+ types.uint8: "h",
52
+ types.int8: "a",
53
+ types.uint16: "t",
54
+ types.int16: "s",
55
+ types.uint32: "j",
56
+ types.int32: "i",
57
+ types.uint64: "y",
58
+ types.int64: "x",
59
+ types.float16: "Dh",
60
+ types.float32: "f",
61
+ types.float64: "d",
62
+ }
63
+
64
+
65
+ def _escape_string(text):
66
+ """Escape the given string so that it only contains ASCII characters
67
+ of [a-zA-Z0-9_$].
68
+
69
+ The dollar symbol ($) and other invalid characters are escaped into
70
+ the string sequence of "$xx" where "xx" is the hex codepoint of the char.
71
+
72
+ Multibyte characters are encoded into utf8 and converted into the above
73
+ hex format.
74
+ """
75
+
76
+ def repl(m):
77
+ return "".join(("_%02x" % ch) for ch in m.group(0).encode("utf8"))
78
+
79
+ ret = re.sub(_re_invalid_char, repl, text)
80
+ # Return str if we got a unicode (for py2)
81
+ if not isinstance(ret, str):
82
+ return ret.encode("ascii")
83
+ return ret
84
+
85
+
86
+ def _fix_lead_digit(text):
87
+ """
88
+ Fix text with leading digit
89
+ """
90
+ if text and text[0].isdigit():
91
+ return "_" + text
92
+ else:
93
+ return text
94
+
95
+
96
+ def _len_encoded(string):
97
+ """
98
+ Prefix string with digit indicating the length.
99
+ Add underscore if string is prefixed with digits.
100
+ """
101
+ string = _fix_lead_digit(string)
102
+ return "%u%s" % (len(string), string)
103
+
104
+
105
+ def mangle_abi_tag(abi_tag: str) -> str:
106
+ return "B" + _len_encoded(_escape_string(abi_tag))
107
+
108
+
109
+ def mangle_identifier(ident, template_params="", *, abi_tags=(), uid=None):
110
+ """
111
+ Mangle the identifier with optional template parameters and abi_tags.
112
+
113
+ Note:
114
+
115
+ This treats '.' as '::' in C++.
116
+ """
117
+ if uid is not None:
118
+ # Add uid to abi-tags
119
+ abi_tags = (f"v{uid}", *abi_tags)
120
+ parts = [_len_encoded(_escape_string(x)) for x in ident.split(".")]
121
+ enc_abi_tags = list(map(mangle_abi_tag, abi_tags))
122
+ extras = template_params + "".join(enc_abi_tags)
123
+ if len(parts) > 1:
124
+ return "N%s%sE" % ("".join(parts), extras)
125
+ else:
126
+ return "%s%s" % (parts[0], extras)
127
+
128
+
129
+ def mangle_type_or_value(typ):
130
+ """
131
+ Mangle type parameter and arbitrary value.
132
+ """
133
+ # Handle numba types
134
+ if isinstance(typ, types.Type):
135
+ if typ in N2CODE:
136
+ return N2CODE[typ]
137
+ else:
138
+ return mangle_templated_ident(*typ.mangling_args)
139
+ # Handle integer literal
140
+ elif isinstance(typ, int):
141
+ return "Li%dE" % typ
142
+ # Handle str as identifier
143
+ elif isinstance(typ, str):
144
+ return mangle_identifier(typ)
145
+ # Otherwise
146
+ else:
147
+ enc = _escape_string(str(typ))
148
+ return _len_encoded(enc)
149
+
150
+
151
+ # Alias
152
+ mangle_type = mangle_type_or_value
153
+ mangle_value = mangle_type_or_value
154
+
155
+
156
+ def mangle_templated_ident(identifier, parameters):
157
+ """
158
+ Mangle templated identifier.
159
+ """
160
+ template_params = (
161
+ "I%sE" % "".join(map(mangle_type_or_value, parameters))
162
+ if parameters
163
+ else ""
164
+ )
165
+ return mangle_identifier(identifier, template_params)
166
+
167
+
168
+ def mangle_args(argtys):
169
+ """
170
+ Mangle sequence of Numba type objects and arbitrary values.
171
+ """
172
+ return "".join([mangle_type_or_value(t) for t in argtys])
173
+
174
+
175
+ def mangle(ident, argtys, *, abi_tags=(), uid=None):
176
+ """
177
+ Mangle identifier with Numba type objects and abi-tags.
178
+ """
179
+ return "".join(
180
+ [
181
+ PREFIX,
182
+ mangle_identifier(ident, abi_tags=abi_tags, uid=uid),
183
+ mangle_args(argtys),
184
+ ]
185
+ )
186
+
187
+
188
+ def prepend_namespace(mangled, ns):
189
+ """
190
+ Prepend namespace to mangled name.
191
+ """
192
+ if not mangled.startswith(PREFIX):
193
+ raise ValueError("input is not a mangled name")
194
+ elif mangled.startswith(PREFIX + "N"):
195
+ # nested
196
+ remaining = mangled[3:]
197
+ ret = PREFIX + "N" + mangle_identifier(ns) + remaining
198
+ else:
199
+ # non-nested
200
+ remaining = mangled[2:]
201
+ head, tail = _split_mangled_ident(remaining)
202
+ ret = PREFIX + "N" + mangle_identifier(ns) + head + "E" + tail
203
+ return ret
204
+
205
+
206
+ def _split_mangled_ident(mangled):
207
+ """
208
+ Returns `(head, tail)` where `head` is the `<len> + <name>` encoded
209
+ identifier and `tail` is the remaining.
210
+ """
211
+ ct = int(mangled)
212
+ ctlen = len(str(ct))
213
+ at = ctlen + ct
214
+ return mangled[:at], mangled[at:]
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  A library written in CUDA Python for generating reduction kernels
3
6
  """
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba import cuda
2
5
  from numba.cuda.cudadrv.driver import driver
3
6
  import math
@@ -1,3 +1,7 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+
1
5
  def abs(x):
2
6
  """
3
7
  See https://docs.nvidia.com/cuda/libdevice-users-guide/__nv_abs.html
@@ -1,5 +1,8 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda import libdevice, libdevicefuncs
2
- from numba.core.typing.templates import ConcreteTemplate, Registry
5
+ from numba.cuda.typing.templates import ConcreteTemplate, Registry
3
6
 
4
7
  registry = Registry()
5
8
  register_global = registry.register_global
@@ -1,8 +1,11 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from collections import namedtuple
2
5
  from textwrap import indent
3
6
 
4
7
  from numba.types import float32, float64, int16, int32, int64, void, Tuple
5
- from numba.core.typing.templates import signature
8
+ from numba.cuda.typing.templates import signature
6
9
 
7
10
  arg = namedtuple("arg", ("name", "ty", "is_ptr"))
8
11
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from llvmlite import ir
2
5
  from numba.core import types
3
6
  from numba.cuda import cgutils
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from threading import Lock
2
5
  from functools import wraps
3
6
 
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from collections import namedtuple, defaultdict
2
5
  import operator
3
6
  import warnings
@@ -14,11 +17,11 @@ from numba.core import (
14
17
  funcdesc,
15
18
  generators,
16
19
  config,
17
- ir_utils,
18
20
  cgutils,
19
21
  removerefctpass,
20
22
  targetconfig,
21
23
  )
24
+ from numba.cuda.core import ir_utils
22
25
  from numba.core.errors import (
23
26
  LoweringError,
24
27
  new_error_context,
@@ -1841,8 +1844,7 @@ class CUDALower(Lower):
1841
1844
  int_type = (llvm_ir.IntType,)
1842
1845
  real_type = llvm_ir.FloatType, llvm_ir.DoubleType
1843
1846
  if isinstance(lltype, int_type + real_type):
1844
- index = name.find(".")
1845
- src_name = name[:index] if index > 0 else name
1847
+ src_name = name.split(".")[0]
1846
1848
  if src_name in self.poly_var_typ_map:
1847
1849
  # Do not emit debug value on polymorphic type var
1848
1850
  return
@@ -1869,6 +1871,9 @@ class CUDALower(Lower):
1869
1871
 
1870
1872
  self.poly_var_typ_map = {}
1871
1873
  self.poly_var_loc_map = {}
1874
+ self.poly_var_set = set()
1875
+ self.poly_cleaned = False
1876
+ self.lastblk = max(self.blocks.keys())
1872
1877
 
1873
1878
  # When debug info is enabled, walk through function body and mark
1874
1879
  # variables with polymorphic types.
@@ -1880,8 +1885,7 @@ class CUDALower(Lower):
1880
1885
  if x.target.name.startswith("$"):
1881
1886
  continue
1882
1887
  ssa_name = x.target.name
1883
- index = ssa_name.find(".")
1884
- src_name = ssa_name[:index] if index > 0 else ssa_name
1888
+ src_name = ssa_name.split(".")[0]
1885
1889
  # Check all the multi-versioned targets
1886
1890
  if len(x.target.versioned_names) > 0:
1887
1891
  fetype = self.typeof(ssa_name)
@@ -1902,12 +1906,12 @@ class CUDALower(Lower):
1902
1906
  """
1903
1907
  # If the name is not handled yet and a store is needed
1904
1908
  if name not in self.varmap and self.store_var_needed(name):
1905
- index = name.find(".")
1906
- src_name = name[:index] if index > 0 else name
1909
+ src_name = name.split(".")[0]
1907
1910
  if src_name in self.poly_var_typ_map:
1908
- dtype = types.UnionType(self.poly_var_typ_map[src_name])
1909
- datamodel = self.context.data_model_manager[dtype]
1911
+ self.poly_var_set.add(name)
1910
1912
  if src_name not in self.poly_var_loc_map:
1913
+ dtype = types.UnionType(self.poly_var_typ_map[src_name])
1914
+ datamodel = self.context.data_model_manager[dtype]
1911
1915
  # UnionType has sorted set of types, max at last index
1912
1916
  maxsizetype = dtype.types[-1]
1913
1917
  # Create a single element aggregate type
@@ -1916,13 +1920,7 @@ class CUDALower(Lower):
1916
1920
  ptr = self.alloca_lltype(src_name, lltype, datamodel)
1917
1921
  # save the location of the union type for polymorphic var
1918
1922
  self.poly_var_loc_map[src_name] = ptr
1919
- # Any member of this union type shoud type cast ptr to fetype
1920
- lltype = self.context.get_value_type(fetype)
1921
- castptr = self.builder.bitcast(
1922
- self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
1923
- )
1924
- # Remember the pointer
1925
- self.varmap[name] = castptr
1923
+ return
1926
1924
 
1927
1925
  super()._alloca_var(name, fetype)
1928
1926
 
@@ -1936,6 +1934,45 @@ class CUDALower(Lower):
1936
1934
  or self._disable_sroa_like_opt
1937
1935
  )
1938
1936
 
1937
+ def delvar(self, name):
1938
+ """
1939
+ Delete the given variable.
1940
+ """
1941
+ if name in self.poly_var_set:
1942
+ fetype = self.typeof(name)
1943
+ src_name = name.split(".")[0]
1944
+ ptr = self.poly_var_loc_map[src_name]
1945
+ self.decref(fetype, self.builder.load(ptr))
1946
+ if (
1947
+ self._cur_ir_block == self.blocks[self.lastblk]
1948
+ and not self.poly_cleaned
1949
+ ):
1950
+ # Zero-fill the debug union for polymorphic only
1951
+ # at the last block
1952
+ for v in self.poly_var_loc_map.values():
1953
+ self.builder.store(
1954
+ llvm_ir.Constant(v.type.pointee, None), v
1955
+ )
1956
+ self.poly_cleaned = True
1957
+ return
1958
+
1959
+ super().delvar(name)
1960
+
1961
+ def getvar(self, name):
1962
+ """
1963
+ Get a pointer to the given variable's slot.
1964
+ """
1965
+ if name in self.poly_var_set:
1966
+ src_name = name.split(".")[0]
1967
+ fetype = self.typeof(name)
1968
+ lltype = self.context.get_value_type(fetype)
1969
+ castptr = self.builder.bitcast(
1970
+ self.poly_var_loc_map[src_name], llvm_ir.PointerType(lltype)
1971
+ )
1972
+ return castptr
1973
+ else:
1974
+ return super().getvar(name)
1975
+
1939
1976
 
1940
1977
  def _lit_or_omitted(value):
1941
1978
  """Returns a Literal instance if the type of value is supported;
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import math
2
5
  import operator
3
6
  from llvmlite import ir
@@ -1 +1,4 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda.memory_management.nrt import rtsys # noqa: F401
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #include "memsys.cuh"
2
7
 
3
8
  __device__ size_t memsys_size = sizeof(NRT_MemSys);
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #include <cuda/atomic>
2
7
 
3
8
  // Globally needed variables
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #ifndef _NRT_H
2
7
  #define _NRT_H
3
8
 
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  #include <cuda/atomic>
2
7
 
3
8
  typedef void (*NRT_dtor_function)(void* ptr, size_t size, void* info);
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import ctypes
2
5
  import os
3
6
  from functools import wraps
@@ -10,6 +13,7 @@ from numba.cuda.cudadrv.driver import (
10
13
  driver,
11
14
  launch_kernel,
12
15
  USE_NV_BINDING,
16
+ _have_nvjitlink,
13
17
  )
14
18
  from numba.cuda.cudadrv import devices
15
19
  from numba.cuda.api import get_current_device
@@ -80,7 +84,7 @@ class _Runtime:
80
84
  cc = get_current_device().compute_capability
81
85
 
82
86
  # Create a new linker instance and add the cu file
83
- linker = _Linker.new(cc=cc)
87
+ linker = _Linker.new(cc=cc, lto=_have_nvjitlink())
84
88
  linker.add_cu_file(memsys_mod)
85
89
 
86
90
  # Complete the linker and create a module from it
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import functools
2
5
 
3
6
  from llvmlite import ir
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import itertools
2
5
  from llvmlite import ir
3
6
  from numba.core import targetconfig
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from functools import singledispatch
2
5
  from llvmlite import ir
3
6
  from numba.core import types
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import math
2
5
 
3
6
  from numba import (
@@ -1,3 +1,8 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: BSD-2-Clause
4
+ */
5
+
1
6
  /*
2
7
  * Handle reshaping of zero-sized array.
3
8
  * See numba_attempt_nocopy_reshape() below.
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Serialization support for compiled functions.
3
6
  """
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  import sys
2
5
 
3
6
  from .api import *
@@ -1 +1,4 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  from numba.cuda.simulator._internal import cuda_bf16 # noqa: F401
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  Contains CUDA API functions
3
6
  """
@@ -17,8 +20,8 @@ from .cudadrv.linkable_code import (
17
20
  LTOIR, # noqa: F401
18
21
  ) # noqa: F401
19
22
  from .kernel import FakeCUDAKernel
20
- from numba.core.sigutils import is_signature
21
23
  from numba.core import config
24
+ from numba.cuda.core.sigutils import is_signature
22
25
  from warnings import warn
23
26
  from ..args import In, Out, InOut # noqa: F401
24
27
 
@@ -1 +1,4 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  bfloat16 = None
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
1
4
  """
2
5
  The compiler is not implemented in the simulator. This module provides a stub
3
6
  to allow tests to import successfully.