numba-cuda 0.22.0__cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (487) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-313-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-313-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-313-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-313-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-313-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +543 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +997 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +155 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsics.py +531 -0
  161. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  162. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  163. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  164. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  165. numba_cuda/numba/cuda/libdevice.py +3386 -0
  166. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  167. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  168. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  169. numba_cuda/numba/cuda/locks.py +19 -0
  170. numba_cuda/numba/cuda/lowering.py +1980 -0
  171. numba_cuda/numba/cuda/mathimpl.py +374 -0
  172. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  173. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  175. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  178. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  179. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  180. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  181. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  182. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  183. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  184. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  185. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  186. numba_cuda/numba/cuda/misc/literal.py +28 -0
  187. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  188. numba_cuda/numba/cuda/misc/special.py +94 -0
  189. numba_cuda/numba/cuda/models.py +56 -0
  190. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  191. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  192. numba_cuda/numba/cuda/np/extensions.py +11 -0
  193. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  194. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  195. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  196. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  197. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  198. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  199. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  200. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  201. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  202. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  203. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  204. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  206. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  207. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  208. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  209. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  210. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  211. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  212. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  213. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  214. numba_cuda/numba/cuda/printimpl.py +126 -0
  215. numba_cuda/numba/cuda/random.py +308 -0
  216. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  217. numba_cuda/numba/cuda/serialize.py +267 -0
  218. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  219. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  220. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  221. numba_cuda/numba/cuda/simulator/api.py +179 -0
  222. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  223. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  224. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  236. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  237. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  238. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  239. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  241. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  242. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  243. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  244. numba_cuda/numba/cuda/simulator_init.py +18 -0
  245. numba_cuda/numba/cuda/stubs.py +624 -0
  246. numba_cuda/numba/cuda/target.py +505 -0
  247. numba_cuda/numba/cuda/testing.py +347 -0
  248. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  249. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  251. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  252. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  253. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  254. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  255. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  285. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  286. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  289. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  290. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  291. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  292. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  293. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  294. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  295. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  396. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  397. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  399. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  400. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  401. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  402. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  403. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  404. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  406. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  407. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  424. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  425. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  430. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  431. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  433. numba_cuda/numba/cuda/tests/support.py +900 -0
  434. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  435. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  436. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  437. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  438. numba_cuda/numba/cuda/types/__init__.py +233 -0
  439. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  440. numba_cuda/numba/cuda/types/abstract.py +9 -0
  441. numba_cuda/numba/cuda/types/common.py +9 -0
  442. numba_cuda/numba/cuda/types/containers.py +9 -0
  443. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  444. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  445. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  446. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  447. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  448. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  449. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  450. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  451. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  452. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  453. numba_cuda/numba/cuda/types/function_type.py +11 -0
  454. numba_cuda/numba/cuda/types/functions.py +9 -0
  455. numba_cuda/numba/cuda/types/iterators.py +9 -0
  456. numba_cuda/numba/cuda/types/misc.py +9 -0
  457. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  458. numba_cuda/numba/cuda/types/scalars.py +9 -0
  459. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  460. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  461. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  462. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  463. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  464. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  465. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  466. numba_cuda/numba/cuda/typing/collections.py +138 -0
  467. numba_cuda/numba/cuda/typing/context.py +782 -0
  468. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  469. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  470. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  471. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  472. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  473. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  474. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  475. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  476. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  477. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  478. numba_cuda/numba/cuda/ufuncs.py +746 -0
  479. numba_cuda/numba/cuda/utils.py +724 -0
  480. numba_cuda/numba/cuda/vector_types.py +214 -0
  481. numba_cuda/numba/cuda/vectorizers.py +260 -0
  482. numba_cuda-0.22.0.dist-info/METADATA +109 -0
  483. numba_cuda-0.22.0.dist-info/RECORD +487 -0
  484. numba_cuda-0.22.0.dist-info/WHEEL +6 -0
  485. numba_cuda-0.22.0.dist-info/licenses/LICENSE +26 -0
  486. numba_cuda-0.22.0.dist-info/licenses/LICENSE.numba +24 -0
  487. numba_cuda-0.22.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,129 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ import operator
5
+
6
+ from numba.cuda import types, typing
7
+ from numba.cuda import cgutils
8
+
9
+ from numba.cuda.core.imputils import Registry, impl_ret_untracked
10
+
11
+ registry = Registry("optional")
12
+ lower_builtin = registry.lower
13
+ lower_cast = registry.lower_cast
14
+ lower_getattr_generic = registry.lower_getattr_generic
15
+ lower_setattr_generic = registry.lower_setattr_generic
16
+
17
+
18
+ def always_return_true_impl(context, builder, sig, args):
19
+ return cgutils.true_bit
20
+
21
+
22
+ def always_return_false_impl(context, builder, sig, args):
23
+ return cgutils.false_bit
24
+
25
+
26
+ def optional_is_none(context, builder, sig, args):
27
+ """
28
+ Check if an Optional value is invalid
29
+ """
30
+ [lty, rty] = sig.args
31
+ [lval, rval] = args
32
+
33
+ # Make sure None is on the right
34
+ if lty == types.none:
35
+ lty, rty = rty, lty
36
+ lval, rval = rval, lval
37
+
38
+ opt_type = lty
39
+ opt_val = lval
40
+
41
+ opt = context.make_helper(builder, opt_type, opt_val)
42
+ res = builder.not_(cgutils.as_bool_bit(builder, opt.valid))
43
+ return impl_ret_untracked(context, builder, sig.return_type, res)
44
+
45
+
46
+ # None is/not None
47
+ lower_builtin(operator.is_, types.none, types.none)(always_return_true_impl)
48
+
49
+ # Optional is None
50
+ lower_builtin(operator.is_, types.Optional, types.none)(optional_is_none)
51
+ lower_builtin(operator.is_, types.none, types.Optional)(optional_is_none)
52
+
53
+
54
+ @lower_getattr_generic(types.Optional)
55
+ def optional_getattr(context, builder, typ, value, attr):
56
+ """
57
+ Optional.__getattr__ => redirect to the wrapped type.
58
+ """
59
+ inner_type = typ.type
60
+ val = context.cast(builder, value, typ, inner_type)
61
+ imp = context.get_getattr(inner_type, attr)
62
+ return imp(context, builder, inner_type, val, attr)
63
+
64
+
65
+ @lower_setattr_generic(types.Optional)
66
+ def optional_setattr(context, builder, sig, args, attr):
67
+ """
68
+ Optional.__setattr__ => redirect to the wrapped type.
69
+ """
70
+ basety, valty = sig.args
71
+ target, val = args
72
+ target_type = basety.type
73
+ target = context.cast(builder, target, basety, target_type)
74
+
75
+ newsig = typing.signature(sig.return_type, target_type, valty)
76
+ imp = context.get_setattr(attr, newsig)
77
+ return imp(builder, (target, val))
78
+
79
+
80
+ @lower_cast(types.Optional, types.Optional)
81
+ def optional_to_optional(context, builder, fromty, toty, val):
82
+ """
83
+ The handling of optional->optional cast must be special cased for
84
+ correct propagation of None value. Given type T and U. casting of
85
+ T? to U? (? denotes optional) should always succeed. If the from-value
86
+ is None, the None value the casted value (U?) should be None; otherwise,
87
+ the from-value is casted to U. This is different from casting T? to U,
88
+ which requires the from-value must not be None.
89
+ """
90
+ optval = context.make_helper(builder, fromty, value=val)
91
+ validbit = cgutils.as_bool_bit(builder, optval.valid)
92
+ # Create uninitialized optional value
93
+ outoptval = context.make_helper(builder, toty)
94
+
95
+ with builder.if_else(validbit) as (is_valid, is_not_valid):
96
+ with is_valid:
97
+ # Cast internal value
98
+ outoptval.valid = cgutils.true_bit
99
+ outoptval.data = context.cast(
100
+ builder, optval.data, fromty.type, toty.type
101
+ )
102
+
103
+ with is_not_valid:
104
+ # Store None to result
105
+ outoptval.valid = cgutils.false_bit
106
+ outoptval.data = cgutils.get_null_value(outoptval.data.type)
107
+
108
+ return outoptval._getvalue()
109
+
110
+
111
+ @lower_cast(types.Any, types.Optional)
112
+ def any_to_optional(context, builder, fromty, toty, val):
113
+ if fromty == types.none:
114
+ return context.make_optional_none(builder, toty.type)
115
+ else:
116
+ val = context.cast(builder, val, fromty, toty.type)
117
+ return context.make_optional_value(builder, toty.type, val)
118
+
119
+
120
+ @lower_cast(types.Optional, types.Any)
121
+ @lower_cast(types.Optional, types.Boolean)
122
+ def optional_to_any(context, builder, fromty, toty, val):
123
+ optval = context.make_helper(builder, fromty, value=val)
124
+ validbit = cgutils.as_bool_bit(builder, optval.valid)
125
+ with builder.if_then(builder.not_(validbit), likely=False):
126
+ msg = "expected %s, got None" % (fromty.type,)
127
+ context.call_conv.return_user_exc(builder, TypeError, (msg,))
128
+
129
+ return context.cast(builder, optval.data, fromty.type, toty)
@@ -0,0 +1,262 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Defines CUDA Options for use in the CUDA target
6
+ """
7
+
8
+ from abc import ABCMeta, abstractmethod
9
+
10
+
11
+ class AbstractOptionValue(metaclass=ABCMeta):
12
+ """Abstract base class for custom option values."""
13
+
14
+ @abstractmethod
15
+ def encode(self) -> str:
16
+ """Returns an encoding of the values"""
17
+ ...
18
+
19
+ def __repr__(self) -> str:
20
+ return f"{self.__class__.__name__}({self.encode()})"
21
+
22
+
23
+ class FastMathOptions(AbstractOptionValue):
24
+ """
25
+ Options for controlling fast math optimization.
26
+ """
27
+
28
+ def __init__(self, value):
29
+ # https://releases.llvm.org/7.0.0/docs/LangRef.html#fast-math-flags
30
+ valid_flags = {
31
+ "fast",
32
+ "nnan",
33
+ "ninf",
34
+ "nsz",
35
+ "arcp",
36
+ "contract",
37
+ "afn",
38
+ "reassoc",
39
+ }
40
+
41
+ if isinstance(value, FastMathOptions):
42
+ self.flags = value.flags.copy()
43
+ elif value is True:
44
+ self.flags = {"fast"}
45
+ elif value is False:
46
+ self.flags = set()
47
+ elif isinstance(value, set):
48
+ invalid = value - valid_flags
49
+ if invalid:
50
+ raise ValueError("Unrecognized fastmath flags: %s" % invalid)
51
+ self.flags = value
52
+ elif isinstance(value, dict):
53
+ invalid = set(value.keys()) - valid_flags
54
+ if invalid:
55
+ raise ValueError("Unrecognized fastmath flags: %s" % invalid)
56
+ self.flags = {v for v, enable in value.items() if enable}
57
+ else:
58
+ msg = "Expected fastmath option(s) to be either a bool, dict or set"
59
+ raise ValueError(msg)
60
+
61
+ def __bool__(self):
62
+ return bool(self.flags)
63
+
64
+ __nonzero__ = __bool__
65
+
66
+ def encode(self) -> str:
67
+ return str(self.flags)
68
+
69
+ def __eq__(self, other):
70
+ if type(other) is type(self):
71
+ return self.flags == other.flags
72
+ return NotImplemented
73
+
74
+
75
+ class ParallelOptions(AbstractOptionValue):
76
+ """
77
+ Options for controlling auto parallelization.
78
+ """
79
+
80
+ __slots__ = (
81
+ "enabled",
82
+ "comprehension",
83
+ "reduction",
84
+ "inplace_binop",
85
+ "setitem",
86
+ "numpy",
87
+ "stencil",
88
+ "fusion",
89
+ "prange",
90
+ )
91
+
92
+ def __init__(self, value):
93
+ if isinstance(value, bool):
94
+ self.enabled = value
95
+ self.comprehension = value
96
+ self.reduction = value
97
+ self.inplace_binop = value
98
+ self.setitem = value
99
+ self.numpy = value
100
+ self.stencil = value
101
+ self.fusion = value
102
+ self.prange = value
103
+ elif isinstance(value, dict):
104
+ self.enabled = True
105
+ self.comprehension = value.pop("comprehension", True)
106
+ self.reduction = value.pop("reduction", True)
107
+ self.inplace_binop = value.pop("inplace_binop", True)
108
+ self.setitem = value.pop("setitem", True)
109
+ self.numpy = value.pop("numpy", True)
110
+ self.stencil = value.pop("stencil", True)
111
+ self.fusion = value.pop("fusion", True)
112
+ self.prange = value.pop("prange", True)
113
+ if value:
114
+ msg = "Unrecognized parallel options: %s" % value.keys()
115
+ raise NameError(msg)
116
+ elif isinstance(value, ParallelOptions):
117
+ self.enabled = value.enabled
118
+ self.comprehension = value.comprehension
119
+ self.reduction = value.reduction
120
+ self.inplace_binop = value.inplace_binop
121
+ self.setitem = value.setitem
122
+ self.numpy = value.numpy
123
+ self.stencil = value.stencil
124
+ self.fusion = value.fusion
125
+ self.prange = value.prange
126
+ else:
127
+ msg = "Expect parallel option to be either a bool or a dict"
128
+ raise ValueError(msg)
129
+
130
+ def _get_values(self):
131
+ """Get values as dictionary."""
132
+ return {k: getattr(self, k) for k in self.__slots__}
133
+
134
+ def __eq__(self, other):
135
+ if type(other) is type(self):
136
+ return self._get_values() == other._get_values()
137
+ return NotImplemented
138
+
139
+ def encode(self) -> str:
140
+ return ", ".join(f"{k}={v}" for k, v in self._get_values().items())
141
+
142
+
143
+ class InlineOptions(AbstractOptionValue):
144
+ """
145
+ Options for controlling inlining
146
+ """
147
+
148
+ def __init__(self, value):
149
+ ok = False
150
+ if isinstance(value, str):
151
+ if value in ("always", "never"):
152
+ ok = True
153
+ else:
154
+ ok = hasattr(value, "__call__")
155
+
156
+ if ok:
157
+ self._inline = value
158
+ else:
159
+ msg = (
160
+ "kwarg 'inline' must be one of the strings 'always' or "
161
+ "'never', or it can be a callable that returns True/False. "
162
+ "Found value %s" % value
163
+ )
164
+ raise ValueError(msg)
165
+
166
+ @property
167
+ def is_never_inline(self):
168
+ """
169
+ True if never inline
170
+ """
171
+ return self._inline == "never"
172
+
173
+ @property
174
+ def is_always_inline(self):
175
+ """
176
+ True if always inline
177
+ """
178
+ return self._inline == "always"
179
+
180
+ @property
181
+ def has_cost_model(self):
182
+ """
183
+ True if a cost model is provided
184
+ """
185
+ return not (self.is_always_inline or self.is_never_inline)
186
+
187
+ @property
188
+ def value(self):
189
+ """
190
+ The raw value
191
+ """
192
+ return self._inline
193
+
194
+ def __eq__(self, other):
195
+ if type(other) is type(self):
196
+ return self.value == other.value
197
+ return NotImplemented
198
+
199
+ def encode(self) -> str:
200
+ return repr(self._inline)
201
+
202
+
203
+ class TargetOptions:
204
+ """Target options maps user options from decorators to the
205
+ ``numba.cuda.core.compiler.Flags`` used by lowering and target context.
206
+ """
207
+
208
+ class Mapping:
209
+ def __init__(self, flag_name, apply=lambda x: x):
210
+ self.flag_name = flag_name
211
+ self.apply = apply
212
+
213
+ def finalize(self, flags, options):
214
+ """Subclasses can override this method to make target specific
215
+ customizations of default flags.
216
+
217
+ Parameters
218
+ ----------
219
+ flags : Flags
220
+ options : dict
221
+ """
222
+ pass
223
+
224
+ @classmethod
225
+ def parse_as_flags(cls, flags, options):
226
+ """Parse target options defined in ``options`` and set ``flags``
227
+ accordingly.
228
+
229
+ Parameters
230
+ ----------
231
+ flags : Flags
232
+ options : dict
233
+ """
234
+ opt = cls()
235
+ opt._apply(flags, options)
236
+ opt.finalize(flags, options)
237
+ return flags
238
+
239
+ def _apply(self, flags, options):
240
+ # Find all Mapping instances in the class
241
+ mappings = {}
242
+ cls = type(self)
243
+ for k in dir(cls):
244
+ v = getattr(cls, k)
245
+ if isinstance(v, cls.Mapping):
246
+ mappings[k] = v
247
+
248
+ used = set()
249
+ for k, mapping in mappings.items():
250
+ if k in options:
251
+ v = mapping.apply(options[k])
252
+ setattr(flags, mapping.flag_name, v)
253
+ used.add(k)
254
+
255
+ unused = set(options) - used
256
+ if unused:
257
+ # Unread options?
258
+ m = (
259
+ f"Unrecognized options: {unused}. "
260
+ f"Known options are {mappings.keys()}"
261
+ )
262
+ raise KeyError(m)
@@ -0,0 +1,249 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+ from functools import cached_property
4
+ from numba.cuda.core import analysis, ir
5
+ from numba.cuda.core import ir_utils, transforms
6
+
7
+
8
+ class YieldPoint(object):
9
+ def __init__(self, block, inst):
10
+ assert isinstance(block, ir.Block)
11
+ assert isinstance(inst, ir.Yield)
12
+ self.block = block
13
+ self.inst = inst
14
+ self.live_vars = None
15
+ self.weak_live_vars = None
16
+
17
+
18
+ class GeneratorInfo(object):
19
+ def __init__(self):
20
+ # { index: YieldPoint }
21
+ self.yield_points = {}
22
+ # Ordered list of variable names
23
+ self.state_vars = []
24
+
25
+ def get_yield_points(self):
26
+ """
27
+ Return an iterable of YieldPoint instances.
28
+ """
29
+ return self.yield_points.values()
30
+
31
+
32
+ class VariableLifetime(object):
33
+ """
34
+ For lazily building information of variable lifetime
35
+ """
36
+
37
+ def __init__(self, blocks):
38
+ self._blocks = blocks
39
+
40
+ @cached_property
41
+ def cfg(self):
42
+ return analysis.compute_cfg_from_blocks(self._blocks)
43
+
44
+ @cached_property
45
+ def usedefs(self):
46
+ return analysis.compute_use_defs(self._blocks)
47
+
48
+ @cached_property
49
+ def livemap(self):
50
+ return analysis.compute_live_map(
51
+ self.cfg, self._blocks, self.usedefs.usemap, self.usedefs.defmap
52
+ )
53
+
54
+ @cached_property
55
+ def deadmaps(self):
56
+ return analysis.compute_dead_maps(
57
+ self.cfg, self._blocks, self.livemap, self.usedefs.defmap
58
+ )
59
+
60
+
61
+ # other packages that define new nodes add calls for inserting dels
62
+ # format: {type:function}
63
+ ir_extension_insert_dels = {}
64
+
65
+
66
+ class PostProcessor(object):
67
+ """
68
+ A post-processor for Numba IR.
69
+ """
70
+
71
+ def __init__(self, func_ir):
72
+ self.func_ir = func_ir
73
+
74
+ def run(self, emit_dels: bool = False, extend_lifetimes: bool = False):
75
+ """
76
+ Run the following passes over Numba IR:
77
+ - canonicalize the CFG
78
+ - emit explicit `del` instructions for variables
79
+ - compute lifetime of variables
80
+ - compute generator info (if function is a generator function)
81
+ """
82
+ self.func_ir.blocks = transforms.canonicalize_cfg(self.func_ir.blocks)
83
+ vlt = VariableLifetime(self.func_ir.blocks)
84
+ self.func_ir.variable_lifetime = vlt
85
+
86
+ bev = analysis.compute_live_variables(
87
+ vlt.cfg,
88
+ self.func_ir.blocks,
89
+ vlt.usedefs.defmap,
90
+ vlt.deadmaps.combined,
91
+ )
92
+ for offset, ir_block in self.func_ir.blocks.items():
93
+ self.func_ir.block_entry_vars[ir_block] = bev[offset]
94
+
95
+ if self.func_ir.is_generator:
96
+ self.func_ir.generator_info = GeneratorInfo()
97
+ self._compute_generator_info()
98
+ else:
99
+ self.func_ir.generator_info = None
100
+
101
+ # Emit del nodes, do this last as the generator info parsing generates
102
+ # and then strips dels as part of its analysis.
103
+ if emit_dels:
104
+ self._insert_var_dels(extend_lifetimes=extend_lifetimes)
105
+
106
+ def _populate_generator_info(self):
107
+ """
108
+ Fill `index` for the Yield instruction and create YieldPoints.
109
+ """
110
+ dct = self.func_ir.generator_info.yield_points
111
+ assert not dct, "rerunning _populate_generator_info"
112
+ for block in self.func_ir.blocks.values():
113
+ for inst in block.body:
114
+ if isinstance(inst, ir.Assign):
115
+ yieldinst = inst.value
116
+ if isinstance(yieldinst, ir.Yield):
117
+ index = len(dct) + 1
118
+ yieldinst.index = index
119
+ yp = YieldPoint(block, yieldinst)
120
+ dct[yieldinst.index] = yp
121
+
122
+ def _compute_generator_info(self):
123
+ """
124
+ Compute the generator's state variables as the union of live variables
125
+ at all yield points.
126
+ """
127
+ # generate del info, it's used in analysis here, strip it out at the end
128
+ self._insert_var_dels()
129
+ self._populate_generator_info()
130
+ gi = self.func_ir.generator_info
131
+ for yp in gi.get_yield_points():
132
+ live_vars = set(self.func_ir.get_block_entry_vars(yp.block))
133
+ weak_live_vars = set()
134
+ stmts = iter(yp.block.body)
135
+ for stmt in stmts:
136
+ if isinstance(stmt, ir.Assign):
137
+ if stmt.value is yp.inst:
138
+ break
139
+ live_vars.add(stmt.target.name)
140
+ elif isinstance(stmt, ir.Del):
141
+ live_vars.remove(stmt.value)
142
+ else:
143
+ assert 0, "couldn't find yield point"
144
+ # Try to optimize out any live vars that are deleted immediately
145
+ # after the yield point.
146
+ for stmt in stmts:
147
+ if isinstance(stmt, ir.Del):
148
+ name = stmt.value
149
+ if name in live_vars:
150
+ live_vars.remove(name)
151
+ weak_live_vars.add(name)
152
+ else:
153
+ break
154
+ yp.live_vars = live_vars
155
+ yp.weak_live_vars = weak_live_vars
156
+
157
+ st = set()
158
+ for yp in gi.get_yield_points():
159
+ st |= yp.live_vars
160
+ st |= yp.weak_live_vars
161
+ gi.state_vars = sorted(st)
162
+ self.remove_dels()
163
+
164
+ def _insert_var_dels(self, extend_lifetimes=False):
165
+ """
166
+ Insert del statements for each variable.
167
+ Returns a 2-tuple of (variable definition map, variable deletion map)
168
+ which indicates variables defined and deleted in each block.
169
+
170
+ The algorithm avoids relying on explicit knowledge on loops and
171
+ distinguish between variables that are defined locally vs variables that
172
+ come from incoming blocks.
173
+ We start with simple usage (variable reference) and definition (variable
174
+ creation) maps on each block. Propagate the liveness info to predecessor
175
+ blocks until it stabilize, at which point we know which variables must
176
+ exist before entering each block. Then, we compute the end of variable
177
+ lives and insert del statements accordingly. Variables are deleted after
178
+ the last use. Variable referenced by terminators (e.g. conditional
179
+ branch and return) are deleted by the successors or the caller.
180
+ """
181
+ vlt = self.func_ir.variable_lifetime
182
+ self._patch_var_dels(
183
+ vlt.deadmaps.internal,
184
+ vlt.deadmaps.escaping,
185
+ extend_lifetimes=extend_lifetimes,
186
+ )
187
+
188
+ def _patch_var_dels(
189
+ self, internal_dead_map, escaping_dead_map, extend_lifetimes=False
190
+ ):
191
+ """
192
+ Insert delete in each block
193
+ """
194
+ for offset, ir_block in self.func_ir.blocks.items():
195
+ # for each internal var, insert delete after the last use
196
+ internal_dead_set = internal_dead_map[offset].copy()
197
+ delete_pts = []
198
+ # for each statement in reverse order
199
+ for stmt in reversed(ir_block.body[:-1]):
200
+ # internal vars that are used here
201
+ live_set = set(v.name for v in stmt.list_vars())
202
+ dead_set = live_set & internal_dead_set
203
+ for T, def_func in ir_extension_insert_dels.items():
204
+ if isinstance(stmt, T):
205
+ done_dels = def_func(stmt, dead_set)
206
+ dead_set -= done_dels
207
+ internal_dead_set -= done_dels
208
+ # used here but not afterwards
209
+ delete_pts.append((stmt, dead_set))
210
+ internal_dead_set -= dead_set
211
+
212
+ # rewrite body and insert dels
213
+ body = []
214
+ lastloc = ir_block.loc
215
+ del_store = []
216
+ for stmt, delete_set in reversed(delete_pts):
217
+ # If using extended lifetimes then the Dels are all put at the
218
+ # block end just ahead of the terminator, so associate their
219
+ # location with the terminator.
220
+ if extend_lifetimes:
221
+ lastloc = ir_block.body[-1].loc
222
+ else:
223
+ lastloc = stmt.loc
224
+ # Ignore dels (assuming no user inserted deletes)
225
+ if not isinstance(stmt, ir.Del):
226
+ body.append(stmt)
227
+ # note: the reverse sort is not necessary for correctness
228
+ # it is just to minimize changes to test for now
229
+ for var_name in sorted(delete_set, reverse=True):
230
+ delnode = ir.Del(var_name, loc=lastloc)
231
+ if extend_lifetimes:
232
+ del_store.append(delnode)
233
+ else:
234
+ body.append(delnode)
235
+ if extend_lifetimes:
236
+ body.extend(del_store)
237
+ body.append(ir_block.body[-1]) # terminator
238
+ ir_block.body = body
239
+
240
+ # vars to delete at the start
241
+ escape_dead_set = escaping_dead_map[offset]
242
+ for var_name in sorted(escape_dead_set):
243
+ ir_block.prepend(ir.Del(var_name, loc=ir_block.body[0].loc))
244
+
245
+ def remove_dels(self):
246
+ """
247
+ Strips the IR of Del nodes
248
+ """
249
+ ir_utils.remove_dels(self.func_ir.blocks)