numba-cuda 0.22.1__cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +580 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpython-311-aarch64-linux-gnu.so +0 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpython-311-aarch64-linux-gnu.so +0 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cpython-311-aarch64-linux-gnu.so +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpython-311-aarch64-linux-gnu.so +0 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cpython-311-aarch64-linux-gnu.so +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/cuda_errors.py +917 -0
  60. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  61. numba_cuda/numba/cuda/core/environment.py +66 -0
  62. numba_cuda/numba/cuda/core/errors.py +9 -0
  63. numba_cuda/numba/cuda/core/event.py +511 -0
  64. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  65. numba_cuda/numba/cuda/core/generators.py +387 -0
  66. numba_cuda/numba/cuda/core/imputils.py +509 -0
  67. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  68. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  69. numba_cuda/numba/cuda/core/ir.py +1812 -0
  70. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  71. numba_cuda/numba/cuda/core/optional.py +129 -0
  72. numba_cuda/numba/cuda/core/options.py +262 -0
  73. numba_cuda/numba/cuda/core/postproc.py +249 -0
  74. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  75. numba_cuda/numba/cuda/core/registry.py +46 -0
  76. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  77. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  78. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  79. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  82. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  83. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  84. numba_cuda/numba/cuda/core/ssa.py +498 -0
  85. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  86. numba_cuda/numba/cuda/core/tracing.py +231 -0
  87. numba_cuda/numba/cuda/core/transforms.py +956 -0
  88. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  89. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  90. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  91. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  93. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  94. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  95. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  96. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  97. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  98. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  99. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  100. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  101. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  102. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  103. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  104. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  105. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  106. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  107. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  110. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  111. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  112. numba_cuda/numba/cuda/cudadecl.py +543 -0
  113. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  114. numba_cuda/numba/cuda/cudadrv/devicearray.py +954 -0
  115. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  116. numba_cuda/numba/cuda/cudadrv/driver.py +3238 -0
  117. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  118. numba_cuda/numba/cuda/cudadrv/dummyarray.py +562 -0
  119. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  120. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  121. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  122. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  123. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  124. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  125. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  126. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  127. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  128. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  129. numba_cuda/numba/cuda/cudaimpl.py +983 -0
  130. numba_cuda/numba/cuda/cudamath.py +149 -0
  131. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  136. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  137. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  138. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  140. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  141. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  142. numba_cuda/numba/cuda/debuginfo.py +997 -0
  143. numba_cuda/numba/cuda/decorators.py +294 -0
  144. numba_cuda/numba/cuda/descriptor.py +35 -0
  145. numba_cuda/numba/cuda/device_init.py +155 -0
  146. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  147. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  148. numba_cuda/numba/cuda/errors.py +72 -0
  149. numba_cuda/numba/cuda/extending.py +697 -0
  150. numba_cuda/numba/cuda/flags.py +178 -0
  151. numba_cuda/numba/cuda/fp16.py +357 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  153. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  155. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  157. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  159. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  160. numba_cuda/numba/cuda/initialize.py +24 -0
  161. numba_cuda/numba/cuda/intrinsics.py +531 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1980 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +624 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +360 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +191 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +200 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +978 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +446 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +452 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.22.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.22.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.22.1.dist-info/WHEEL +6 -0
  486. numba_cuda-0.22.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.22.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.22.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1098 @@
1
+ // SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ // SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ #include "_pymodule.h"
5
+
6
+ #include <cstring>
7
+ #include <ctime>
8
+ #include <cassert>
9
+ #include <vector>
10
+
11
+ #include "_typeof.h"
12
+ #include "frameobject.h"
13
+ #include "traceback.h"
14
+ #include "typeconv.hpp"
15
+ #include "_devicearray.h"
16
+
17
+ /*
18
+ * Notes on the C_TRACE macro:
19
+ *
20
+ * The original C_TRACE macro (from ceval.c) would call
21
+ * PyTrace_C_CALL et al., for which the frame argument wouldn't
22
+ * be usable. Since we explicitly synthesize a frame using the
23
+ * original Python code object, we call PyTrace_CALL instead so
24
+ * the profiler can report the correct source location.
25
+ *
26
+ * Likewise, while ceval.c would call PyTrace_C_EXCEPTION in case
27
+ * of error, the profiler would simply expect a RETURN in case of
28
+ * a Python function, so we generate that here (making sure the
29
+ * exception state is preserved correctly).
30
+ *
31
+ */
32
+
33
+ #if (PY_MAJOR_VERSION >= 3) && ((PY_MINOR_VERSION == 12) || (PY_MINOR_VERSION == 13))
34
+
35
+ #ifndef Py_BUILD_CORE
36
+ #define Py_BUILD_CORE 1
37
+ #endif
38
+ #include "internal/pycore_frame.h"
39
+ // This is a fix suggested in the comments in https://github.com/python/cpython/issues/108216
40
+ // specifically https://github.com/python/cpython/issues/108216#issuecomment-1696565797
41
+ #ifdef HAVE_STD_ATOMIC
42
+ # undef HAVE_STD_ATOMIC
43
+ #endif
44
+ #undef _PyGC_FINALIZED
45
+
46
+ /* dynamic_annotations.h is needed for building Python with --with-valgrind
47
+ * support. The following include is to workaround issues described in
48
+ * https://github.com/numba/numba/pull/10073
49
+ */
50
+ #include "dynamic_annotations.h"
51
+ #if (PY_MINOR_VERSION == 12)
52
+ #include "internal/pycore_atomic.h"
53
+ #endif
54
+ #include "internal/pycore_interp.h"
55
+ #include "internal/pycore_pyerrors.h"
56
+ #include "internal/pycore_instruments.h"
57
+ #include "internal/pycore_call.h"
58
+ #include "cpython/code.h"
59
+
60
+ #elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION == 11)
61
+ #ifndef Py_BUILD_CORE
62
+ #define Py_BUILD_CORE 1
63
+ #endif
64
+ #include "internal/pycore_frame.h"
65
+ #include "internal/pycore_pyerrors.h"
66
+
67
+ /*
68
+ * Code originally from:
69
+ * https://github.com/python/cpython/blob/deaf509e8fc6e0363bd6f26d52ad42f976ec42f2/Python/ceval.c#L6804
70
+ */
71
+ static int
72
+ call_trace(Py_tracefunc func, PyObject *obj,
73
+ PyThreadState *tstate, PyFrameObject *frame,
74
+ int what, PyObject *arg)
75
+ {
76
+ int result;
77
+ if (tstate->tracing) {
78
+ return 0;
79
+ }
80
+ if (frame == NULL) {
81
+ return -1;
82
+ }
83
+ int old_what = tstate->tracing_what;
84
+ tstate->tracing_what = what;
85
+ PyThreadState_EnterTracing(tstate);
86
+ result = func(obj, frame, what, NULL);
87
+ PyThreadState_LeaveTracing(tstate);
88
+ tstate->tracing_what = old_what;
89
+ return result;
90
+ }
91
+
92
+ /*
93
+ * Code originally from:
94
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240
95
+ */
96
+ static int
97
+ call_trace_protected(Py_tracefunc func, PyObject *obj,
98
+ PyThreadState *tstate, PyFrameObject *frame,
99
+ int what, PyObject *arg)
100
+ {
101
+ PyObject *type, *value, *traceback;
102
+ int err;
103
+ _PyErr_Fetch(tstate, &type, &value, &traceback);
104
+ err = call_trace(func, obj, tstate, frame, what, arg);
105
+ if (err == 0)
106
+ {
107
+ _PyErr_Restore(tstate, type, value, traceback);
108
+ return 0;
109
+ }
110
+ else {
111
+ Py_XDECREF(type);
112
+ Py_XDECREF(value);
113
+ Py_XDECREF(traceback);
114
+ return -1;
115
+ }
116
+ }
117
+
118
+ /*
119
+ * Code originally from:
120
+ * https://github.com/python/cpython/blob/deaf509e8fc6e0363bd6f26d52ad42f976ec42f2/Python/ceval.c#L7245
121
+ * NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521
122
+ * has been removed, it's dealt with in call_cfunc.
123
+ */
124
+ #define C_TRACE(x, call, frame) \
125
+ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
126
+ tstate, frame, \
127
+ PyTrace_CALL, cfunc)) { \
128
+ x = NULL; \
129
+ } \
130
+ else { \
131
+ x = call; \
132
+ if (tstate->c_profilefunc != NULL) { \
133
+ if (x == NULL) { \
134
+ call_trace_protected(tstate->c_profilefunc, \
135
+ tstate->c_profileobj, \
136
+ tstate, frame, \
137
+ PyTrace_RETURN, cfunc); \
138
+ /* XXX should pass (type, value, tb) */ \
139
+ } else { \
140
+ if (call_trace(tstate->c_profilefunc, \
141
+ tstate->c_profileobj, \
142
+ tstate, frame, \
143
+ PyTrace_RETURN, cfunc)) { \
144
+ Py_DECREF(x); \
145
+ x = NULL; \
146
+ } \
147
+ } \
148
+ } \
149
+ } \
150
+
151
+ #elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION == 10 || PY_MINOR_VERSION == 11)
152
+
153
+ /*
154
+ * Code originally from:
155
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L36-L40
156
+ */
157
+ typedef struct {
158
+ PyCodeObject *code; // The code object for the bounds. May be NULL.
159
+ PyCodeAddressRange bounds; // Only valid if code != NULL.
160
+ CFrame cframe;
161
+ } PyTraceInfo;
162
+
163
+
164
+ /*
165
+ * Code originally from:
166
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1257-L1266
167
+ * NOTE: The function is renamed.
168
+ */
169
+ static void
170
+ _nb_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range)
171
+ {
172
+ range->opaque.lo_next = linetable;
173
+ range->opaque.limit = range->opaque.lo_next + length;
174
+ range->ar_start = -1;
175
+ range->ar_end = 0;
176
+ range->opaque.computed_line = firstlineno;
177
+ range->ar_line = -1;
178
+ }
179
+
180
+ /*
181
+ * Code originally from:
182
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1269-L1275
183
+ * NOTE: The function is renamed.
184
+ */
185
+ static int
186
+ _nb_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds)
187
+ {
188
+ const char *linetable = PyBytes_AS_STRING(co->co_linetable);
189
+ Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable);
190
+ _nb_PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds);
191
+ return bounds->ar_line;
192
+ }
193
+
194
+ /*
195
+ * Code originally from:
196
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5468-L5475
197
+ * NOTE: The call to _PyCode_InitAddressRange is renamed.
198
+ */
199
+ static void
200
+ initialize_trace_info(PyTraceInfo *trace_info, PyFrameObject *frame)
201
+ {
202
+ if (trace_info->code != frame->f_code) {
203
+ trace_info->code = frame->f_code;
204
+ _nb_PyCode_InitAddressRange(frame->f_code, &trace_info->bounds);
205
+ }
206
+ }
207
+
208
+ /*
209
+ * Code originally from:
210
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5477-L5501
211
+ */
212
+ static int
213
+ call_trace(Py_tracefunc func, PyObject *obj,
214
+ PyThreadState *tstate, PyFrameObject *frame,
215
+ PyTraceInfo *trace_info,
216
+ int what, PyObject *arg)
217
+ {
218
+ int result;
219
+ if (tstate->tracing)
220
+ return 0;
221
+ tstate->tracing++;
222
+ tstate->cframe->use_tracing = 0;
223
+ if (frame->f_lasti < 0) {
224
+ frame->f_lineno = frame->f_code->co_firstlineno;
225
+ }
226
+ else {
227
+ initialize_trace_info(trace_info, frame);
228
+ frame->f_lineno = _PyCode_CheckLineNumber(frame->f_lasti*sizeof(_Py_CODEUNIT), &trace_info->bounds);
229
+ }
230
+ result = func(obj, frame, what, arg);
231
+ frame->f_lineno = 0;
232
+ tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
233
+ || (tstate->c_profilefunc != NULL));
234
+ tstate->tracing--;
235
+ return result;
236
+ }
237
+
238
+ /*
239
+ * Code originally from:
240
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5445-L5466
241
+ */
242
+ static int
243
+ call_trace_protected(Py_tracefunc func, PyObject *obj,
244
+ PyThreadState *tstate, PyFrameObject *frame,
245
+ PyTraceInfo *trace_info,
246
+ int what, PyObject *arg)
247
+ {
248
+ PyObject *type, *value, *traceback;
249
+ int err;
250
+ PyErr_Fetch(&type, &value, &traceback);
251
+ err = call_trace(func, obj, tstate, frame, trace_info, what, arg);
252
+ if (err == 0)
253
+ {
254
+ PyErr_Restore(type, value, traceback);
255
+ return 0;
256
+ }
257
+ else
258
+ {
259
+ Py_XDECREF(type);
260
+ Py_XDECREF(value);
261
+ Py_XDECREF(traceback);
262
+ return -1;
263
+ }
264
+ }
265
+
266
+ /*
267
+ * Code originally from:
268
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5810-L5839
269
+ * NOTE: The state test https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5811
270
+ * has been removed, it's dealt with in call_cfunc.
271
+ */
272
+ #define C_TRACE(x, call) \
273
+ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
274
+ tstate, tstate->frame, &trace_info, PyTrace_CALL,\
275
+ cfunc)) \
276
+ x = NULL; \
277
+ else \
278
+ { \
279
+ x = call; \
280
+ if (tstate->c_profilefunc != NULL) \
281
+ { \
282
+ if (x == NULL) \
283
+ { \
284
+ call_trace_protected(tstate->c_profilefunc, \
285
+ tstate->c_profileobj, \
286
+ tstate, tstate->frame, \
287
+ &trace_info, \
288
+ PyTrace_RETURN, cfunc); \
289
+ /* XXX should pass (type, value, tb) */ \
290
+ } \
291
+ else \
292
+ { \
293
+ if (call_trace(tstate->c_profilefunc, \
294
+ tstate->c_profileobj, \
295
+ tstate, tstate->frame, \
296
+ &trace_info, \
297
+ PyTrace_RETURN, cfunc)) \
298
+ { \
299
+ Py_DECREF(x); \
300
+ x = NULL; \
301
+ } \
302
+ } \
303
+ } \
304
+ }
305
+
306
+ #else // Python <3.10
307
+
308
+ /*
309
+ * Code originally from:
310
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4242-L4257
311
+ */
312
+ static int
313
+ call_trace(Py_tracefunc func, PyObject *obj,
314
+ PyThreadState *tstate, PyFrameObject *frame,
315
+ int what, PyObject *arg)
316
+ {
317
+ int result;
318
+ if (tstate->tracing)
319
+ return 0;
320
+ tstate->tracing++;
321
+ tstate->use_tracing = 0;
322
+ result = func(obj, frame, what, arg);
323
+ tstate->use_tracing = ((tstate->c_tracefunc != NULL)
324
+ || (tstate->c_profilefunc != NULL));
325
+ tstate->tracing--;
326
+ return result;
327
+ }
328
+
329
+ /*
330
+ * Code originally from:
331
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240
332
+ */
333
+ static int
334
+ call_trace_protected(Py_tracefunc func, PyObject *obj,
335
+ PyThreadState *tstate, PyFrameObject *frame,
336
+ int what, PyObject *arg)
337
+ {
338
+ PyObject *type, *value, *traceback;
339
+ int err;
340
+ PyErr_Fetch(&type, &value, &traceback);
341
+ err = call_trace(func, obj, tstate, frame, what, arg);
342
+ if (err == 0)
343
+ {
344
+ PyErr_Restore(type, value, traceback);
345
+ return 0;
346
+ }
347
+ else
348
+ {
349
+ Py_XDECREF(type);
350
+ Py_XDECREF(value);
351
+ Py_XDECREF(traceback);
352
+ return -1;
353
+ }
354
+ }
355
+
356
+ /*
357
+ * Code originally from:
358
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4520-L4549
359
+ * NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521
360
+ * has been removed, it's dealt with in call_cfunc.
361
+ */
362
+ #define C_TRACE(x, call) \
363
+ if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
364
+ tstate, tstate->frame, PyTrace_CALL, cfunc)) \
365
+ x = NULL; \
366
+ else \
367
+ { \
368
+ x = call; \
369
+ if (tstate->c_profilefunc != NULL) \
370
+ { \
371
+ if (x == NULL) \
372
+ { \
373
+ call_trace_protected(tstate->c_profilefunc, \
374
+ tstate->c_profileobj, \
375
+ tstate, tstate->frame, \
376
+ PyTrace_RETURN, cfunc); \
377
+ /* XXX should pass (type, value, tb) */ \
378
+ } \
379
+ else \
380
+ { \
381
+ if (call_trace(tstate->c_profilefunc, \
382
+ tstate->c_profileobj, \
383
+ tstate, tstate->frame, \
384
+ PyTrace_RETURN, cfunc)) \
385
+ { \
386
+ Py_DECREF(x); \
387
+ x = NULL; \
388
+ } \
389
+ } \
390
+ } \
391
+ }
392
+
393
+
394
+ #endif
395
+
396
+ typedef std::vector<Type> TypeTable;
397
+ typedef std::vector<PyObject*> Functions;
398
+
399
+ /* The Dispatcher class is the base class of all dispatchers in the CUDA target.
400
+ Its main responsibilities are:
401
+
402
+ - Resolving the best overload to call for a given set of arguments, and
403
+ - Calling the resolved overload.
404
+
405
+ This logic is implemented within this class for efficiency (lookup of the
406
+ appropriate overload needs to be fast) and ease of implementation (calling
407
+ directly into a compiled function using a function pointer is easier within
408
+ the C++ code where the overload has been resolved). */
409
+ class Dispatcher {
410
+ public:
411
+ PyObject_HEAD
412
+ /* Whether compilation of new overloads is permitted */
413
+ char can_compile;
414
+ /* Whether fallback to object mode is permitted */
415
+ char can_fallback;
416
+ /* Whether types must match exactly when resolving overloads.
417
+ If not, conversions (e.g. float32 -> float64) are permitted when
418
+ searching for a match. */
419
+ char exact_match_required;
420
+ /* Borrowed reference */
421
+ PyObject *fallbackdef;
422
+ /* Whether to fold named arguments and default values
423
+ (false for lifted loops) */
424
+ int fold_args;
425
+ /* Whether the last positional argument is a stararg */
426
+ int has_stararg;
427
+ /* Tuple of argument names */
428
+ PyObject *argnames;
429
+ /* Tuple of default values */
430
+ PyObject *defargs;
431
+ /* Number of arguments to function */
432
+ int argct;
433
+ /* Used for selecting overloaded function implementations */
434
+ TypeManager *tm;
435
+ /* An array of overloads */
436
+ Functions functions;
437
+ /* A flattened array of argument types to all overloads
438
+ * (invariant: sizeof(overloads) == argct * sizeof(functions)) */
439
+ TypeTable overloads;
440
+
441
+ /* Add a new overload. Parameters:
442
+
443
+ - args: An array of Type objects, one for each parameter
444
+ - callable: The callable implementing this overload. */
445
+ void addDefinition(Type args[], PyObject *callable) {
446
+ overloads.reserve(argct + overloads.size());
447
+ for (int i=0; i<argct; ++i) {
448
+ overloads.push_back(args[i]);
449
+ }
450
+ functions.push_back(callable);
451
+ }
452
+
453
+ /* Given a list of types, find the overloads that have a matching signature.
454
+ Returns the best match, as well as the number of matches found.
455
+
456
+ Parameters:
457
+
458
+ - sig: an array of Type objects, one for each parameter.
459
+ - matches: the number of matches found (mutated by this function).
460
+ - allow_unsafe: whether to match overloads that would require an unsafe
461
+ cast.
462
+ - exact_match_required: Whether all arguments types must match the
463
+ overload's types exactly. When false,
464
+ overloads that would require a type conversion
465
+ can also be matched. */
466
+ PyObject* resolve(Type sig[], int &matches, bool allow_unsafe,
467
+ bool exact_match_required) const {
468
+ const int ovct = functions.size();
469
+ int selected;
470
+ matches = 0;
471
+ if (0 == ovct) {
472
+ // No overloads registered
473
+ return NULL;
474
+ }
475
+ if (argct == 0) {
476
+ // Nullary function: trivial match on first overload
477
+ matches = 1;
478
+ selected = 0;
479
+ }
480
+ else {
481
+ matches = tm->selectOverload(sig, &overloads[0], selected, argct,
482
+ ovct, allow_unsafe,
483
+ exact_match_required);
484
+ }
485
+ if (matches == 1) {
486
+ return functions[selected];
487
+ }
488
+ return NULL;
489
+ }
490
+
491
+ /* Remove all overloads */
492
+ void clear() {
493
+ functions.clear();
494
+ overloads.clear();
495
+ }
496
+
497
+ };
498
+
499
+
500
+ static int
501
+ Dispatcher_traverse(Dispatcher *self, visitproc visit, void *arg)
502
+ {
503
+ Py_VISIT(self->defargs);
504
+ return 0;
505
+ }
506
+
507
+ static void
508
+ Dispatcher_dealloc(Dispatcher *self)
509
+ {
510
+ Py_XDECREF(self->argnames);
511
+ Py_XDECREF(self->defargs);
512
+ self->clear();
513
+ Py_TYPE(self)->tp_free((PyObject*)self);
514
+ }
515
+
516
+
517
+ static int
518
+ Dispatcher_init(Dispatcher *self, PyObject *args, PyObject *kwds)
519
+ {
520
+ PyObject *tmaddrobj;
521
+ void *tmaddr;
522
+ int argct;
523
+ int can_fallback;
524
+ int has_stararg = 0;
525
+ int exact_match_required = 0;
526
+
527
+ if (!PyArg_ParseTuple(args, "OiiO!O!i|ii", &tmaddrobj, &argct,
528
+ &self->fold_args,
529
+ &PyTuple_Type, &self->argnames,
530
+ &PyTuple_Type, &self->defargs,
531
+ &can_fallback,
532
+ &has_stararg,
533
+ &exact_match_required
534
+ )) {
535
+ return -1;
536
+ }
537
+ Py_INCREF(self->argnames);
538
+ Py_INCREF(self->defargs);
539
+ tmaddr = PyLong_AsVoidPtr(tmaddrobj);
540
+ self->tm = static_cast<TypeManager*>(tmaddr);
541
+ self->argct = argct;
542
+ self->can_compile = 1;
543
+ self->can_fallback = can_fallback;
544
+ self->fallbackdef = NULL;
545
+ self->has_stararg = has_stararg;
546
+ self->exact_match_required = exact_match_required;
547
+ return 0;
548
+ }
549
+
550
+ static PyObject *
551
+ Dispatcher_clear(Dispatcher *self, PyObject *args)
552
+ {
553
+ self->clear();
554
+ Py_RETURN_NONE;
555
+ }
556
+
557
+ static
558
+ PyObject*
559
+ Dispatcher_Insert(Dispatcher *self, PyObject *args, PyObject *kwds)
560
+ {
561
+ /* The cuda kwarg is a temporary addition until CUDA overloads are compiled
562
+ * functions. Once they are compiled functions, kwargs can be removed from
563
+ * this function. */
564
+ static char *keywords[] = {
565
+ (char*)"sig",
566
+ (char*)"func",
567
+ (char*)"objectmode",
568
+ (char*)"cuda",
569
+ NULL
570
+ };
571
+
572
+ PyObject *sigtup, *cfunc;
573
+ int i, sigsz;
574
+ int *sig;
575
+ int objectmode = 0;
576
+ int cuda = 0;
577
+
578
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|ip", keywords, &sigtup,
579
+ &cfunc, &objectmode, &cuda)) {
580
+ return NULL;
581
+ }
582
+
583
+ if (!cuda && !PyObject_TypeCheck(cfunc, &PyCFunction_Type) ) {
584
+ PyErr_SetString(PyExc_TypeError, "must be builtin_function_or_method");
585
+ return NULL;
586
+ }
587
+
588
+ sigsz = PySequence_Fast_GET_SIZE(sigtup);
589
+ sig = new int[sigsz];
590
+
591
+ for (i = 0; i < sigsz; ++i) {
592
+ sig[i] = PyLong_AsLong(PySequence_Fast_GET_ITEM(sigtup, i));
593
+ }
594
+
595
+ /* The reference to cfunc is borrowed; this only works because the
596
+ derived Python class also stores an (owned) reference to cfunc. */
597
+ self->addDefinition(sig, cfunc);
598
+
599
+ /* Add pure python fallback */
600
+ if (!self->fallbackdef && objectmode){
601
+ self->fallbackdef = cfunc;
602
+ }
603
+
604
+ delete[] sig;
605
+
606
+ Py_RETURN_NONE;
607
+ }
608
+
609
+ static
610
+ void explain_issue(PyObject *dispatcher, PyObject *args, PyObject *kws,
611
+ const char *method_name, const char *default_msg)
612
+ {
613
+ PyObject *callback, *result;
614
+ callback = PyObject_GetAttrString(dispatcher, method_name);
615
+ if (!callback) {
616
+ PyErr_SetString(PyExc_TypeError, default_msg);
617
+ return;
618
+ }
619
+ result = PyObject_Call(callback, args, kws);
620
+ Py_DECREF(callback);
621
+ if (result != NULL) {
622
+ PyErr_Format(PyExc_RuntimeError, "%s must raise an exception",
623
+ method_name);
624
+ Py_DECREF(result);
625
+ }
626
+ }
627
+
628
+ static
629
+ void explain_ambiguous(PyObject *dispatcher, PyObject *args, PyObject *kws)
630
+ {
631
+ explain_issue(dispatcher, args, kws, "_explain_ambiguous",
632
+ "Ambiguous overloading");
633
+ }
634
+
635
+ static
636
+ void explain_matching_error(PyObject *dispatcher, PyObject *args, PyObject *kws)
637
+ {
638
+ explain_issue(dispatcher, args, kws, "_explain_matching_error",
639
+ "No matching definition");
640
+ }
641
+
642
+ static
643
+ int search_new_conversions(PyObject *dispatcher, PyObject *args, PyObject *kws)
644
+ {
645
+ PyObject *callback, *result;
646
+ int res;
647
+
648
+ callback = PyObject_GetAttrString(dispatcher,
649
+ "_search_new_conversions");
650
+ if (!callback) {
651
+ return -1;
652
+ }
653
+ result = PyObject_Call(callback, args, kws);
654
+ Py_DECREF(callback);
655
+ if (result == NULL) {
656
+ return -1;
657
+ }
658
+ if (!PyBool_Check(result)) {
659
+ Py_DECREF(result);
660
+ PyErr_SetString(PyExc_TypeError,
661
+ "_search_new_conversions() should return a boolean");
662
+ return -1;
663
+ }
664
+ res = (result == Py_True) ? 1 : 0;
665
+ Py_DECREF(result);
666
+ return res;
667
+ }
668
+
669
+
670
+ /* A custom, fast, inlinable version of PyCFunction_Call() */
671
+ static PyObject *
672
+ call_cfunc(Dispatcher *self, PyObject *cfunc, PyObject *args, PyObject *kws, PyObject *locals)
673
+ {
674
+ PyCFunctionWithKeywords fn = NULL;
675
+ PyObject * pyresult = NULL;
676
+
677
+ assert(PyCFunction_Check(cfunc));
678
+ assert(PyCFunction_GET_FLAGS(cfunc) == (METH_VARARGS | METH_KEYWORDS));
679
+ fn = (PyCFunctionWithKeywords) PyCFunction_GET_FUNCTION(cfunc);
680
+
681
+ // make call
682
+ pyresult = fn(PyCFunction_GET_SELF(cfunc), args, kws);
683
+
684
+ return pyresult;
685
+ }
686
+
687
+
688
+
689
+
690
+ /* A copy of compile_and_invoke, that only compiles. This is needed for CUDA
691
+ * kernels, because its overloads are Python instances of the _Kernel class,
692
+ * rather than compiled functions. Once CUDA overloads are compiled functions,
693
+ * cuda_compile_only can be removed. */
694
+ static
695
+ PyObject*
696
+ cuda_compile_only(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals)
697
+ {
698
+ /* Compile a new one */
699
+ PyObject *cfa, *cfunc;
700
+ cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args");
701
+ if (cfa == NULL)
702
+ return NULL;
703
+
704
+ cfunc = PyObject_Call(cfa, args, kws);
705
+ Py_DECREF(cfa);
706
+
707
+ return cfunc;
708
+ }
709
+
710
+ static int
711
+ find_named_args(Dispatcher *self, PyObject **pargs, PyObject **pkws)
712
+ {
713
+ PyObject *oldargs = *pargs, *newargs;
714
+ PyObject *kws = *pkws;
715
+ Py_ssize_t pos_args = PyTuple_GET_SIZE(oldargs);
716
+ Py_ssize_t named_args, total_args, i;
717
+ Py_ssize_t func_args = PyTuple_GET_SIZE(self->argnames);
718
+ Py_ssize_t defaults = PyTuple_GET_SIZE(self->defargs);
719
+ /* Last parameter with a default value */
720
+ Py_ssize_t last_def = (self->has_stararg)
721
+ ? func_args - 2
722
+ : func_args - 1;
723
+ /* First parameter with a default value */
724
+ Py_ssize_t first_def = last_def - defaults + 1;
725
+ /* Minimum number of required arguments */
726
+ Py_ssize_t minargs = first_def;
727
+
728
+ if (kws != NULL)
729
+ named_args = PyDict_Size(kws);
730
+ else
731
+ named_args = 0;
732
+ total_args = pos_args + named_args;
733
+ if (!self->has_stararg && total_args > func_args) {
734
+ PyErr_Format(PyExc_TypeError,
735
+ "too many arguments: expected %d, got %d",
736
+ (int) func_args, (int) total_args);
737
+ return -1;
738
+ }
739
+ else if (total_args < minargs) {
740
+ if (minargs == func_args)
741
+ PyErr_Format(PyExc_TypeError,
742
+ "not enough arguments: expected %d, got %d",
743
+ (int) minargs, (int) total_args);
744
+ else
745
+ PyErr_Format(PyExc_TypeError,
746
+ "not enough arguments: expected at least %d, got %d",
747
+ (int) minargs, (int) total_args);
748
+ return -1;
749
+ }
750
+ newargs = PyTuple_New(func_args);
751
+ if (!newargs)
752
+ return -1;
753
+ /* First pack the stararg */
754
+ if (self->has_stararg) {
755
+ Py_ssize_t stararg_size = Py_MAX(0, pos_args - func_args + 1);
756
+ PyObject *stararg = PyTuple_New(stararg_size);
757
+ if (!stararg) {
758
+ Py_DECREF(newargs);
759
+ return -1;
760
+ }
761
+ for (i = 0; i < stararg_size; i++) {
762
+ PyObject *value = PyTuple_GET_ITEM(oldargs, func_args - 1 + i);
763
+ Py_INCREF(value);
764
+ PyTuple_SET_ITEM(stararg, i, value);
765
+ }
766
+ /* Put it in last position */
767
+ PyTuple_SET_ITEM(newargs, func_args - 1, stararg);
768
+
769
+ }
770
+ for (i = 0; i < pos_args; i++) {
771
+ PyObject *value = PyTuple_GET_ITEM(oldargs, i);
772
+ if (self->has_stararg && i >= func_args - 1) {
773
+ /* Skip stararg */
774
+ break;
775
+ }
776
+ Py_INCREF(value);
777
+ PyTuple_SET_ITEM(newargs, i, value);
778
+ }
779
+
780
+ /* Iterate over missing positional arguments, try to find them in
781
+ named arguments or default values. */
782
+ for (i = pos_args; i < func_args; i++) {
783
+ PyObject *name = PyTuple_GET_ITEM(self->argnames, i);
784
+ if (self->has_stararg && i >= func_args - 1) {
785
+ /* Skip stararg */
786
+ break;
787
+ }
788
+ if (kws != NULL) {
789
+ /* Named argument? */
790
+ PyObject *value = PyDict_GetItem(kws, name);
791
+ if (value != NULL) {
792
+ Py_INCREF(value);
793
+ PyTuple_SET_ITEM(newargs, i, value);
794
+ named_args--;
795
+ continue;
796
+ }
797
+ }
798
+ if (i >= first_def && i <= last_def) {
799
+ /* Argument has a default value? */
800
+ PyObject *value = PyTuple_GET_ITEM(self->defargs, i - first_def);
801
+ Py_INCREF(value);
802
+ PyTuple_SET_ITEM(newargs, i, value);
803
+ continue;
804
+ }
805
+ else if (i < func_args - 1 || !self->has_stararg) {
806
+ PyErr_Format(PyExc_TypeError,
807
+ "missing argument '%s'",
808
+ PyString_AsString(name));
809
+ Py_DECREF(newargs);
810
+ return -1;
811
+ }
812
+ }
813
+ if (named_args) {
814
+ PyErr_Format(PyExc_TypeError,
815
+ "some keyword arguments unexpected");
816
+ Py_DECREF(newargs);
817
+ return -1;
818
+ }
819
+ *pargs = newargs;
820
+ *pkws = NULL;
821
+ return 0;
822
+ }
823
+
824
+
825
+
826
+ /* Based on Dispatcher_call above, with the following differences:
827
+ 1. It does not invoke the definition of the function.
828
+ 2. It returns the definition, instead of a value returned by the function.
829
+
830
+ This is because CUDA functions are, at present, _Kernel objects rather than
831
+ compiled functions. */
832
+ static PyObject*
833
+ Dispatcher_cuda_call(Dispatcher *self, PyObject *args, PyObject *kws)
834
+ {
835
+ PyObject *tmptype, *retval = NULL;
836
+ int *tys = NULL;
837
+ int argct;
838
+ int i;
839
+ int prealloc[24];
840
+ int matches;
841
+ PyObject *cfunc;
842
+ PyThreadState *ts = PyThreadState_Get();
843
+ PyObject *locals = NULL;
844
+
845
+ /* If compilation is enabled, ensure that an exact match is found and if
846
+ * not compile one */
847
+ int exact_match_required = self->can_compile ? 1 : self->exact_match_required;
848
+
849
+ #if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
850
+ if (ts->tracing && ts->c_profilefunc) {
851
+ #else
852
+ if (ts->use_tracing && ts->c_profilefunc) {
853
+ #endif
854
+ locals = PyEval_GetLocals();
855
+ if (locals == NULL) {
856
+ goto CLEANUP;
857
+ }
858
+ }
859
+ if (self->fold_args) {
860
+ if (find_named_args(self, &args, &kws))
861
+ return NULL;
862
+ }
863
+ else
864
+ Py_INCREF(args);
865
+ /* Now we own a reference to args */
866
+
867
+ argct = PySequence_Fast_GET_SIZE(args);
868
+
869
+ if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int)))
870
+ tys = prealloc;
871
+ else
872
+ tys = new int[argct];
873
+
874
+ for (i = 0; i < argct; ++i) {
875
+ tmptype = PySequence_Fast_GET_ITEM(args, i);
876
+ tys[i] = typeof_typecode((PyObject *) self, tmptype);
877
+ if (tys[i] == -1) {
878
+ if (self->can_fallback){
879
+ /* We will clear the exception if fallback is allowed. */
880
+ PyErr_Clear();
881
+ } else {
882
+ goto CLEANUP;
883
+ }
884
+ }
885
+ }
886
+
887
+ /* We only allow unsafe conversions if compilation of new specializations
888
+ has been disabled. */
889
+ cfunc = self->resolve(tys, matches, !self->can_compile,
890
+ exact_match_required);
891
+
892
+ if (matches == 0 && !self->can_compile) {
893
+ /*
894
+ * If we can't compile a new specialization, look for
895
+ * matching signatures for which conversions haven't been
896
+ * registered on the C++ TypeManager.
897
+ */
898
+ int res = search_new_conversions((PyObject *) self, args, kws);
899
+ if (res < 0) {
900
+ retval = NULL;
901
+ goto CLEANUP;
902
+ }
903
+ if (res > 0) {
904
+ /* Retry with the newly registered conversions */
905
+ cfunc = self->resolve(tys, matches, !self->can_compile,
906
+ exact_match_required);
907
+ }
908
+ }
909
+
910
+ if (matches == 1) {
911
+ /* Definition is found */
912
+ retval = cfunc;
913
+ Py_INCREF(retval);
914
+ } else if (matches == 0) {
915
+ /* No matching definition */
916
+ if (self->can_compile) {
917
+ retval = cuda_compile_only(self, args, kws, locals);
918
+ } else if (self->fallbackdef) {
919
+ /* Have object fallback */
920
+ retval = call_cfunc(self, self->fallbackdef, args, kws, locals);
921
+ } else {
922
+ /* Raise TypeError */
923
+ explain_matching_error((PyObject *) self, args, kws);
924
+ retval = NULL;
925
+ }
926
+ } else if (self->can_compile) {
927
+ /* Ambiguous, but are allowed to compile */
928
+ retval = cuda_compile_only(self, args, kws, locals);
929
+ } else {
930
+ /* Ambiguous */
931
+ explain_ambiguous((PyObject *) self, args, kws);
932
+ retval = NULL;
933
+ }
934
+
935
+ CLEANUP:
936
+ if (tys != prealloc)
937
+ delete[] tys;
938
+ Py_DECREF(args);
939
+
940
+ return retval;
941
+ }
942
+
943
+ static int
944
+ import_devicearray(void)
945
+ {
946
+ PyObject *devicearray = PyImport_ImportModule(NUMBA_DEVICEARRAY_IMPORT_NAME);
947
+ if (devicearray == NULL) {
948
+ return -1;
949
+ }
950
+
951
+ PyObject *d = PyModule_GetDict(devicearray);
952
+ if (d == NULL) {
953
+ Py_DECREF(devicearray);
954
+ return -1;
955
+ }
956
+
957
+ PyObject *key = PyUnicode_FromString("_DEVICEARRAY_API");
958
+ PyObject *c_api = PyDict_GetItemWithError(d, key);
959
+ int retcode = 0;
960
+ if (PyCapsule_IsValid(c_api, NUMBA_DEVICEARRAY_IMPORT_NAME "._DEVICEARRAY_API")) {
961
+ DeviceArray_API = (void**)PyCapsule_GetPointer(c_api, NUMBA_DEVICEARRAY_IMPORT_NAME "._DEVICEARRAY_API");
962
+ if (DeviceArray_API == NULL) {
963
+ retcode = -1;
964
+ }
965
+ } else {
966
+ retcode = -1;
967
+ }
968
+
969
+ Py_DECREF(key);
970
+ Py_DECREF(devicearray);
971
+ return retcode;
972
+ }
973
+
974
+ static PyMethodDef Dispatcher_methods[] = {
975
+ { "_clear", (PyCFunction)Dispatcher_clear, METH_NOARGS, NULL },
976
+ { "_insert", (PyCFunction)Dispatcher_Insert, METH_VARARGS | METH_KEYWORDS,
977
+ "insert new definition"},
978
+ { "_cuda_call", (PyCFunction)Dispatcher_cuda_call,
979
+ METH_VARARGS | METH_KEYWORDS, "CUDA call resolution" },
980
+ { NULL },
981
+ };
982
+
983
+ static PyMemberDef Dispatcher_members[] = {
984
+ {(char*)"_can_compile", T_BOOL, offsetof(Dispatcher, can_compile), 0, NULL },
985
+ {NULL} /* Sentinel */
986
+ };
987
+
988
+
989
+ static PyTypeObject DispatcherType = {
990
+ PyVarObject_HEAD_INIT(NULL, 0)
991
+ "_dispatcher.Dispatcher", /* tp_name */
992
+ sizeof(Dispatcher), /* tp_basicsize */
993
+ 0, /* tp_itemsize */
994
+ (destructor)Dispatcher_dealloc, /* tp_dealloc */
995
+ 0, /* tp_vectorcall_offset */
996
+ 0, /* tp_getattr */
997
+ 0, /* tp_setattr */
998
+ 0, /* tp_as_async */
999
+ 0, /* tp_repr */
1000
+ 0, /* tp_as_number */
1001
+ 0, /* tp_as_sequence */
1002
+ 0, /* tp_as_mapping */
1003
+ 0, /* tp_hash */
1004
+ 0, /* tp_call*/
1005
+ 0, /* tp_str*/
1006
+ 0, /* tp_getattro*/
1007
+ 0, /* tp_setattro*/
1008
+ 0, /* tp_as_buffer*/
1009
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/
1010
+ "Dispatcher object", /* tp_doc */
1011
+ (traverseproc) Dispatcher_traverse, /* tp_traverse */
1012
+ 0, /* tp_clear */
1013
+ 0, /* tp_richcompare */
1014
+ 0, /* tp_weaklistoffset */
1015
+ 0, /* tp_iter */
1016
+ 0, /* tp_iternext */
1017
+ Dispatcher_methods, /* tp_methods */
1018
+ Dispatcher_members, /* tp_members */
1019
+ 0, /* tp_getset */
1020
+ 0, /* tp_base */
1021
+ 0, /* tp_dict */
1022
+ 0, /* tp_descr_get */
1023
+ 0, /* tp_descr_set */
1024
+ 0, /* tp_dictoffset */
1025
+ (initproc)Dispatcher_init, /* tp_init */
1026
+ 0, /* tp_alloc */
1027
+ 0, /* tp_new */
1028
+ 0, /* tp_free */
1029
+ 0, /* tp_is_gc */
1030
+ 0, /* tp_bases */
1031
+ 0, /* tp_mro */
1032
+ 0, /* tp_cache */
1033
+ 0, /* tp_subclasses */
1034
+ 0, /* tp_weaklist */
1035
+ 0, /* tp_del */
1036
+ 0, /* tp_version_tag */
1037
+ 0, /* tp_finalize */
1038
+ 0, /* tp_vectorcall */
1039
+ #if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 12)
1040
+ /* This was introduced first in 3.12
1041
+ * https://github.com/python/cpython/issues/91051
1042
+ */
1043
+ 0, /* tp_watched */
1044
+ #endif
1045
+
1046
+ /* WARNING: Do not remove this, only modify it! It is a version guard to
1047
+ * act as a reminder to update this struct on Python version update! */
1048
+ #if (PY_MAJOR_VERSION == 3)
1049
+ #if ! (NB_SUPPORTED_PYTHON_MINOR)
1050
+ #error "Python minor version is not supported."
1051
+ #endif
1052
+ #else
1053
+ #error "Python major version is not supported."
1054
+ #endif
1055
+ /* END WARNING*/
1056
+ };
1057
+
1058
+
1059
+
1060
+
1061
+ static PyObject *compute_fingerprint(PyObject *self, PyObject *args)
1062
+ {
1063
+ PyObject *val;
1064
+ if (!PyArg_ParseTuple(args, "O:compute_fingerprint", &val))
1065
+ return NULL;
1066
+ return typeof_compute_fingerprint(val);
1067
+ }
1068
+
1069
+ static PyMethodDef ext_methods[] = {
1070
+ #define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL }
1071
+ declmethod(typeof_init),
1072
+ declmethod(compute_fingerprint),
1073
+ { NULL },
1074
+ #undef declmethod
1075
+ };
1076
+
1077
+
1078
+ MOD_INIT(_dispatcher) {
1079
+ if (import_devicearray() < 0) {
1080
+ PyErr_Print();
1081
+ PyErr_SetString(PyExc_ImportError, NUMBA_DEVICEARRAY_IMPORT_NAME " failed to import");
1082
+ return MOD_ERROR_VAL;
1083
+ }
1084
+
1085
+ PyObject *m;
1086
+ MOD_DEF(m, "_dispatcher", "No docs", ext_methods)
1087
+ if (m == NULL)
1088
+ return MOD_ERROR_VAL;
1089
+
1090
+ DispatcherType.tp_new = PyType_GenericNew;
1091
+ if (PyType_Ready(&DispatcherType) < 0) {
1092
+ return MOD_ERROR_VAL;
1093
+ }
1094
+ Py_INCREF(&DispatcherType);
1095
+ PyModule_AddObject(m, "Dispatcher", (PyObject*)(&DispatcherType));
1096
+
1097
+ return MOD_SUCCESS_VAL(m);
1098
+ }