numba-cuda 0.21.1__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (488) hide show
  1. _numba_cuda_redirector.pth +4 -0
  2. _numba_cuda_redirector.py +89 -0
  3. numba_cuda/VERSION +1 -0
  4. numba_cuda/__init__.py +6 -0
  5. numba_cuda/_version.py +11 -0
  6. numba_cuda/numba/cuda/__init__.py +70 -0
  7. numba_cuda/numba/cuda/_internal/cuda_bf16.py +16394 -0
  8. numba_cuda/numba/cuda/_internal/cuda_fp16.py +8112 -0
  9. numba_cuda/numba/cuda/api.py +577 -0
  10. numba_cuda/numba/cuda/api_util.py +76 -0
  11. numba_cuda/numba/cuda/args.py +72 -0
  12. numba_cuda/numba/cuda/bf16.py +397 -0
  13. numba_cuda/numba/cuda/cache_hints.py +287 -0
  14. numba_cuda/numba/cuda/cext/__init__.py +2 -0
  15. numba_cuda/numba/cuda/cext/_devicearray.cp313-win_amd64.pyd +0 -0
  16. numba_cuda/numba/cuda/cext/_devicearray.cpp +159 -0
  17. numba_cuda/numba/cuda/cext/_devicearray.h +29 -0
  18. numba_cuda/numba/cuda/cext/_dispatcher.cp313-win_amd64.pyd +0 -0
  19. numba_cuda/numba/cuda/cext/_dispatcher.cpp +1098 -0
  20. numba_cuda/numba/cuda/cext/_hashtable.cpp +532 -0
  21. numba_cuda/numba/cuda/cext/_hashtable.h +135 -0
  22. numba_cuda/numba/cuda/cext/_helperlib.c +71 -0
  23. numba_cuda/numba/cuda/cext/_helperlib.cp313-win_amd64.pyd +0 -0
  24. numba_cuda/numba/cuda/cext/_helpermod.c +82 -0
  25. numba_cuda/numba/cuda/cext/_pymodule.h +38 -0
  26. numba_cuda/numba/cuda/cext/_typeconv.cp313-win_amd64.pyd +0 -0
  27. numba_cuda/numba/cuda/cext/_typeconv.cpp +206 -0
  28. numba_cuda/numba/cuda/cext/_typeof.cpp +1159 -0
  29. numba_cuda/numba/cuda/cext/_typeof.h +19 -0
  30. numba_cuda/numba/cuda/cext/capsulethunk.h +111 -0
  31. numba_cuda/numba/cuda/cext/mviewbuf.c +385 -0
  32. numba_cuda/numba/cuda/cext/mviewbuf.cp313-win_amd64.pyd +0 -0
  33. numba_cuda/numba/cuda/cext/typeconv.cpp +212 -0
  34. numba_cuda/numba/cuda/cext/typeconv.hpp +101 -0
  35. numba_cuda/numba/cuda/cg.py +67 -0
  36. numba_cuda/numba/cuda/cgutils.py +1294 -0
  37. numba_cuda/numba/cuda/cloudpickle/__init__.py +21 -0
  38. numba_cuda/numba/cuda/cloudpickle/cloudpickle.py +1598 -0
  39. numba_cuda/numba/cuda/cloudpickle/cloudpickle_fast.py +17 -0
  40. numba_cuda/numba/cuda/codegen.py +541 -0
  41. numba_cuda/numba/cuda/compiler.py +1396 -0
  42. numba_cuda/numba/cuda/core/analysis.py +758 -0
  43. numba_cuda/numba/cuda/core/annotations/__init__.py +0 -0
  44. numba_cuda/numba/cuda/core/annotations/pretty_annotate.py +288 -0
  45. numba_cuda/numba/cuda/core/annotations/type_annotations.py +305 -0
  46. numba_cuda/numba/cuda/core/base.py +1332 -0
  47. numba_cuda/numba/cuda/core/boxing.py +1411 -0
  48. numba_cuda/numba/cuda/core/bytecode.py +728 -0
  49. numba_cuda/numba/cuda/core/byteflow.py +2346 -0
  50. numba_cuda/numba/cuda/core/caching.py +744 -0
  51. numba_cuda/numba/cuda/core/callconv.py +392 -0
  52. numba_cuda/numba/cuda/core/codegen.py +171 -0
  53. numba_cuda/numba/cuda/core/compiler.py +199 -0
  54. numba_cuda/numba/cuda/core/compiler_lock.py +85 -0
  55. numba_cuda/numba/cuda/core/compiler_machinery.py +497 -0
  56. numba_cuda/numba/cuda/core/config.py +650 -0
  57. numba_cuda/numba/cuda/core/consts.py +124 -0
  58. numba_cuda/numba/cuda/core/controlflow.py +989 -0
  59. numba_cuda/numba/cuda/core/entrypoints.py +57 -0
  60. numba_cuda/numba/cuda/core/environment.py +66 -0
  61. numba_cuda/numba/cuda/core/errors.py +917 -0
  62. numba_cuda/numba/cuda/core/event.py +511 -0
  63. numba_cuda/numba/cuda/core/funcdesc.py +330 -0
  64. numba_cuda/numba/cuda/core/generators.py +387 -0
  65. numba_cuda/numba/cuda/core/imputils.py +509 -0
  66. numba_cuda/numba/cuda/core/inline_closurecall.py +1787 -0
  67. numba_cuda/numba/cuda/core/interpreter.py +3617 -0
  68. numba_cuda/numba/cuda/core/ir.py +1812 -0
  69. numba_cuda/numba/cuda/core/ir_utils.py +2638 -0
  70. numba_cuda/numba/cuda/core/optional.py +129 -0
  71. numba_cuda/numba/cuda/core/options.py +262 -0
  72. numba_cuda/numba/cuda/core/postproc.py +249 -0
  73. numba_cuda/numba/cuda/core/pythonapi.py +1859 -0
  74. numba_cuda/numba/cuda/core/registry.py +46 -0
  75. numba_cuda/numba/cuda/core/removerefctpass.py +123 -0
  76. numba_cuda/numba/cuda/core/rewrites/__init__.py +26 -0
  77. numba_cuda/numba/cuda/core/rewrites/ir_print.py +91 -0
  78. numba_cuda/numba/cuda/core/rewrites/registry.py +104 -0
  79. numba_cuda/numba/cuda/core/rewrites/static_binop.py +41 -0
  80. numba_cuda/numba/cuda/core/rewrites/static_getitem.py +189 -0
  81. numba_cuda/numba/cuda/core/rewrites/static_raise.py +100 -0
  82. numba_cuda/numba/cuda/core/sigutils.py +68 -0
  83. numba_cuda/numba/cuda/core/ssa.py +498 -0
  84. numba_cuda/numba/cuda/core/targetconfig.py +330 -0
  85. numba_cuda/numba/cuda/core/tracing.py +231 -0
  86. numba_cuda/numba/cuda/core/transforms.py +956 -0
  87. numba_cuda/numba/cuda/core/typed_passes.py +867 -0
  88. numba_cuda/numba/cuda/core/typeinfer.py +1950 -0
  89. numba_cuda/numba/cuda/core/unsafe/__init__.py +0 -0
  90. numba_cuda/numba/cuda/core/unsafe/bytes.py +67 -0
  91. numba_cuda/numba/cuda/core/unsafe/eh.py +67 -0
  92. numba_cuda/numba/cuda/core/unsafe/refcount.py +98 -0
  93. numba_cuda/numba/cuda/core/untyped_passes.py +1979 -0
  94. numba_cuda/numba/cuda/cpython/builtins.py +1153 -0
  95. numba_cuda/numba/cuda/cpython/charseq.py +1218 -0
  96. numba_cuda/numba/cuda/cpython/cmathimpl.py +560 -0
  97. numba_cuda/numba/cuda/cpython/enumimpl.py +103 -0
  98. numba_cuda/numba/cuda/cpython/iterators.py +167 -0
  99. numba_cuda/numba/cuda/cpython/listobj.py +1326 -0
  100. numba_cuda/numba/cuda/cpython/mathimpl.py +499 -0
  101. numba_cuda/numba/cuda/cpython/numbers.py +1475 -0
  102. numba_cuda/numba/cuda/cpython/rangeobj.py +289 -0
  103. numba_cuda/numba/cuda/cpython/slicing.py +322 -0
  104. numba_cuda/numba/cuda/cpython/tupleobj.py +456 -0
  105. numba_cuda/numba/cuda/cpython/unicode.py +2865 -0
  106. numba_cuda/numba/cuda/cpython/unicode_support.py +1597 -0
  107. numba_cuda/numba/cuda/cpython/unsafe/__init__.py +0 -0
  108. numba_cuda/numba/cuda/cpython/unsafe/numbers.py +64 -0
  109. numba_cuda/numba/cuda/cpython/unsafe/tuple.py +92 -0
  110. numba_cuda/numba/cuda/cuda_paths.py +691 -0
  111. numba_cuda/numba/cuda/cudadecl.py +556 -0
  112. numba_cuda/numba/cuda/cudadrv/__init__.py +14 -0
  113. numba_cuda/numba/cuda/cudadrv/devicearray.py +951 -0
  114. numba_cuda/numba/cuda/cudadrv/devices.py +249 -0
  115. numba_cuda/numba/cuda/cudadrv/driver.py +3222 -0
  116. numba_cuda/numba/cuda/cudadrv/drvapi.py +435 -0
  117. numba_cuda/numba/cuda/cudadrv/dummyarray.py +558 -0
  118. numba_cuda/numba/cuda/cudadrv/enums.py +613 -0
  119. numba_cuda/numba/cuda/cudadrv/error.py +48 -0
  120. numba_cuda/numba/cuda/cudadrv/libs.py +220 -0
  121. numba_cuda/numba/cuda/cudadrv/linkable_code.py +184 -0
  122. numba_cuda/numba/cuda/cudadrv/mappings.py +14 -0
  123. numba_cuda/numba/cuda/cudadrv/ndarray.py +26 -0
  124. numba_cuda/numba/cuda/cudadrv/nvrtc.py +193 -0
  125. numba_cuda/numba/cuda/cudadrv/nvvm.py +756 -0
  126. numba_cuda/numba/cuda/cudadrv/rtapi.py +13 -0
  127. numba_cuda/numba/cuda/cudadrv/runtime.py +34 -0
  128. numba_cuda/numba/cuda/cudaimpl.py +995 -0
  129. numba_cuda/numba/cuda/cudamath.py +149 -0
  130. numba_cuda/numba/cuda/datamodel/__init__.py +7 -0
  131. numba_cuda/numba/cuda/datamodel/cuda_manager.py +66 -0
  132. numba_cuda/numba/cuda/datamodel/cuda_models.py +1446 -0
  133. numba_cuda/numba/cuda/datamodel/cuda_packer.py +224 -0
  134. numba_cuda/numba/cuda/datamodel/cuda_registry.py +22 -0
  135. numba_cuda/numba/cuda/datamodel/cuda_testing.py +153 -0
  136. numba_cuda/numba/cuda/datamodel/manager.py +11 -0
  137. numba_cuda/numba/cuda/datamodel/models.py +9 -0
  138. numba_cuda/numba/cuda/datamodel/packer.py +9 -0
  139. numba_cuda/numba/cuda/datamodel/registry.py +11 -0
  140. numba_cuda/numba/cuda/datamodel/testing.py +11 -0
  141. numba_cuda/numba/cuda/debuginfo.py +903 -0
  142. numba_cuda/numba/cuda/decorators.py +294 -0
  143. numba_cuda/numba/cuda/descriptor.py +35 -0
  144. numba_cuda/numba/cuda/device_init.py +158 -0
  145. numba_cuda/numba/cuda/deviceufunc.py +1021 -0
  146. numba_cuda/numba/cuda/dispatcher.py +2463 -0
  147. numba_cuda/numba/cuda/errors.py +72 -0
  148. numba_cuda/numba/cuda/extending.py +697 -0
  149. numba_cuda/numba/cuda/flags.py +178 -0
  150. numba_cuda/numba/cuda/fp16.py +357 -0
  151. numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
  152. numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
  153. numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
  154. numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
  155. numba_cuda/numba/cuda/include/13/cuda_bf16.h +5118 -0
  156. numba_cuda/numba/cuda/include/13/cuda_bf16.hpp +3865 -0
  157. numba_cuda/numba/cuda/include/13/cuda_fp16.h +5363 -0
  158. numba_cuda/numba/cuda/include/13/cuda_fp16.hpp +3483 -0
  159. numba_cuda/numba/cuda/initialize.py +24 -0
  160. numba_cuda/numba/cuda/intrinsic_wrapper.py +41 -0
  161. numba_cuda/numba/cuda/intrinsics.py +382 -0
  162. numba_cuda/numba/cuda/itanium_mangler.py +214 -0
  163. numba_cuda/numba/cuda/kernels/__init__.py +2 -0
  164. numba_cuda/numba/cuda/kernels/reduction.py +265 -0
  165. numba_cuda/numba/cuda/kernels/transpose.py +65 -0
  166. numba_cuda/numba/cuda/libdevice.py +3386 -0
  167. numba_cuda/numba/cuda/libdevicedecl.py +20 -0
  168. numba_cuda/numba/cuda/libdevicefuncs.py +1060 -0
  169. numba_cuda/numba/cuda/libdeviceimpl.py +88 -0
  170. numba_cuda/numba/cuda/locks.py +19 -0
  171. numba_cuda/numba/cuda/lowering.py +1951 -0
  172. numba_cuda/numba/cuda/mathimpl.py +374 -0
  173. numba_cuda/numba/cuda/memory_management/__init__.py +4 -0
  174. numba_cuda/numba/cuda/memory_management/memsys.cu +99 -0
  175. numba_cuda/numba/cuda/memory_management/memsys.cuh +22 -0
  176. numba_cuda/numba/cuda/memory_management/nrt.cu +212 -0
  177. numba_cuda/numba/cuda/memory_management/nrt.cuh +48 -0
  178. numba_cuda/numba/cuda/memory_management/nrt.py +390 -0
  179. numba_cuda/numba/cuda/memory_management/nrt_context.py +438 -0
  180. numba_cuda/numba/cuda/misc/appdirs.py +594 -0
  181. numba_cuda/numba/cuda/misc/cffiimpl.py +24 -0
  182. numba_cuda/numba/cuda/misc/coverage_support.py +43 -0
  183. numba_cuda/numba/cuda/misc/dump_style.py +41 -0
  184. numba_cuda/numba/cuda/misc/findlib.py +75 -0
  185. numba_cuda/numba/cuda/misc/firstlinefinder.py +96 -0
  186. numba_cuda/numba/cuda/misc/gdb_hook.py +240 -0
  187. numba_cuda/numba/cuda/misc/literal.py +28 -0
  188. numba_cuda/numba/cuda/misc/llvm_pass_timings.py +412 -0
  189. numba_cuda/numba/cuda/misc/special.py +94 -0
  190. numba_cuda/numba/cuda/models.py +56 -0
  191. numba_cuda/numba/cuda/np/arraymath.py +5130 -0
  192. numba_cuda/numba/cuda/np/arrayobj.py +7635 -0
  193. numba_cuda/numba/cuda/np/extensions.py +11 -0
  194. numba_cuda/numba/cuda/np/linalg.py +3087 -0
  195. numba_cuda/numba/cuda/np/math/__init__.py +0 -0
  196. numba_cuda/numba/cuda/np/math/cmathimpl.py +558 -0
  197. numba_cuda/numba/cuda/np/math/mathimpl.py +487 -0
  198. numba_cuda/numba/cuda/np/math/numbers.py +1461 -0
  199. numba_cuda/numba/cuda/np/npdatetime.py +969 -0
  200. numba_cuda/numba/cuda/np/npdatetime_helpers.py +217 -0
  201. numba_cuda/numba/cuda/np/npyfuncs.py +1808 -0
  202. numba_cuda/numba/cuda/np/npyimpl.py +1027 -0
  203. numba_cuda/numba/cuda/np/numpy_support.py +798 -0
  204. numba_cuda/numba/cuda/np/polynomial/__init__.py +4 -0
  205. numba_cuda/numba/cuda/np/polynomial/polynomial_core.py +242 -0
  206. numba_cuda/numba/cuda/np/polynomial/polynomial_functions.py +380 -0
  207. numba_cuda/numba/cuda/np/ufunc/__init__.py +4 -0
  208. numba_cuda/numba/cuda/np/ufunc/decorators.py +203 -0
  209. numba_cuda/numba/cuda/np/ufunc/sigparse.py +68 -0
  210. numba_cuda/numba/cuda/np/ufunc/ufuncbuilder.py +65 -0
  211. numba_cuda/numba/cuda/np/ufunc_db.py +1282 -0
  212. numba_cuda/numba/cuda/np/unsafe/__init__.py +0 -0
  213. numba_cuda/numba/cuda/np/unsafe/ndarray.py +84 -0
  214. numba_cuda/numba/cuda/nvvmutils.py +254 -0
  215. numba_cuda/numba/cuda/printimpl.py +126 -0
  216. numba_cuda/numba/cuda/random.py +308 -0
  217. numba_cuda/numba/cuda/reshape_funcs.cu +156 -0
  218. numba_cuda/numba/cuda/serialize.py +267 -0
  219. numba_cuda/numba/cuda/simulator/__init__.py +63 -0
  220. numba_cuda/numba/cuda/simulator/_internal/__init__.py +4 -0
  221. numba_cuda/numba/cuda/simulator/_internal/cuda_bf16.py +2 -0
  222. numba_cuda/numba/cuda/simulator/api.py +179 -0
  223. numba_cuda/numba/cuda/simulator/bf16.py +4 -0
  224. numba_cuda/numba/cuda/simulator/compiler.py +38 -0
  225. numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +11 -0
  226. numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +462 -0
  227. numba_cuda/numba/cuda/simulator/cudadrv/devices.py +122 -0
  228. numba_cuda/numba/cuda/simulator/cudadrv/driver.py +66 -0
  229. numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +7 -0
  230. numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +7 -0
  231. numba_cuda/numba/cuda/simulator/cudadrv/error.py +10 -0
  232. numba_cuda/numba/cuda/simulator/cudadrv/libs.py +10 -0
  233. numba_cuda/numba/cuda/simulator/cudadrv/linkable_code.py +61 -0
  234. numba_cuda/numba/cuda/simulator/cudadrv/nvrtc.py +11 -0
  235. numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +32 -0
  236. numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +22 -0
  237. numba_cuda/numba/cuda/simulator/dispatcher.py +11 -0
  238. numba_cuda/numba/cuda/simulator/kernel.py +320 -0
  239. numba_cuda/numba/cuda/simulator/kernelapi.py +509 -0
  240. numba_cuda/numba/cuda/simulator/memory_management/__init__.py +4 -0
  241. numba_cuda/numba/cuda/simulator/memory_management/nrt.py +21 -0
  242. numba_cuda/numba/cuda/simulator/reduction.py +19 -0
  243. numba_cuda/numba/cuda/simulator/tests/support.py +4 -0
  244. numba_cuda/numba/cuda/simulator/vector_types.py +65 -0
  245. numba_cuda/numba/cuda/simulator_init.py +18 -0
  246. numba_cuda/numba/cuda/stubs.py +635 -0
  247. numba_cuda/numba/cuda/target.py +505 -0
  248. numba_cuda/numba/cuda/testing.py +347 -0
  249. numba_cuda/numba/cuda/tests/__init__.py +62 -0
  250. numba_cuda/numba/cuda/tests/benchmarks/__init__.py +0 -0
  251. numba_cuda/numba/cuda/tests/benchmarks/test_kernel_launch.py +119 -0
  252. numba_cuda/numba/cuda/tests/cloudpickle_main_class.py +9 -0
  253. numba_cuda/numba/cuda/tests/core/serialize_usecases.py +113 -0
  254. numba_cuda/numba/cuda/tests/core/test_itanium_mangler.py +83 -0
  255. numba_cuda/numba/cuda/tests/core/test_serialize.py +371 -0
  256. numba_cuda/numba/cuda/tests/cudadrv/__init__.py +9 -0
  257. numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +147 -0
  258. numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +161 -0
  259. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +397 -0
  260. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +24 -0
  261. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +180 -0
  262. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +313 -0
  263. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +187 -0
  264. numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +621 -0
  265. numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +247 -0
  266. numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +100 -0
  267. numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +198 -0
  268. numba_cuda/numba/cuda/tests/cudadrv/test_events.py +53 -0
  269. numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +72 -0
  270. numba_cuda/numba/cuda/tests/cudadrv/test_init.py +138 -0
  271. numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +43 -0
  272. numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +15 -0
  273. numba_cuda/numba/cuda/tests/cudadrv/test_linkable_code.py +58 -0
  274. numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +348 -0
  275. numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +128 -0
  276. numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +301 -0
  277. numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +174 -0
  278. numba_cuda/numba/cuda/tests/cudadrv/test_nvrtc.py +28 -0
  279. numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +185 -0
  280. numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +39 -0
  281. numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +23 -0
  282. numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +38 -0
  283. numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +48 -0
  284. numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +44 -0
  285. numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +127 -0
  286. numba_cuda/numba/cuda/tests/cudapy/__init__.py +9 -0
  287. numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +231 -0
  288. numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +50 -0
  289. numba_cuda/numba/cuda/tests/cudapy/cg_cache_usecases.py +36 -0
  290. numba_cuda/numba/cuda/tests/cudapy/complex_usecases.py +116 -0
  291. numba_cuda/numba/cuda/tests/cudapy/enum_usecases.py +59 -0
  292. numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +62 -0
  293. numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +28 -0
  294. numba_cuda/numba/cuda/tests/cudapy/overload_usecases.py +33 -0
  295. numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +104 -0
  296. numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +47 -0
  297. numba_cuda/numba/cuda/tests/cudapy/test_analysis.py +1122 -0
  298. numba_cuda/numba/cuda/tests/cudapy/test_array.py +344 -0
  299. numba_cuda/numba/cuda/tests/cudapy/test_array_alignment.py +268 -0
  300. numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +203 -0
  301. numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +63 -0
  302. numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py +360 -0
  303. numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1815 -0
  304. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16.py +599 -0
  305. numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +377 -0
  306. numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +160 -0
  307. numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +27 -0
  308. numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py +98 -0
  309. numba_cuda/numba/cuda/tests/cudapy/test_cache_hints.py +210 -0
  310. numba_cuda/numba/cuda/tests/cudapy/test_caching.py +683 -0
  311. numba_cuda/numba/cuda/tests/cudapy/test_casting.py +265 -0
  312. numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +42 -0
  313. numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +718 -0
  314. numba_cuda/numba/cuda/tests/cudapy/test_complex.py +370 -0
  315. numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +23 -0
  316. numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +142 -0
  317. numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +178 -0
  318. numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +193 -0
  319. numba_cuda/numba/cuda/tests/cudapy/test_copy_propagate.py +131 -0
  320. numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +438 -0
  321. numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +94 -0
  322. numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +101 -0
  323. numba_cuda/numba/cuda/tests/cudapy/test_debug.py +105 -0
  324. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +889 -0
  325. numba_cuda/numba/cuda/tests/cudapy/test_debuginfo_types.py +476 -0
  326. numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +500 -0
  327. numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +820 -0
  328. numba_cuda/numba/cuda/tests/cudapy/test_enums.py +152 -0
  329. numba_cuda/numba/cuda/tests/cudapy/test_errors.py +111 -0
  330. numba_cuda/numba/cuda/tests/cudapy/test_exception.py +170 -0
  331. numba_cuda/numba/cuda/tests/cudapy/test_extending.py +1088 -0
  332. numba_cuda/numba/cuda/tests/cudapy/test_extending_types.py +71 -0
  333. numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +265 -0
  334. numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py +1433 -0
  335. numba_cuda/numba/cuda/tests/cudapy/test_forall.py +57 -0
  336. numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +34 -0
  337. numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +69 -0
  338. numba_cuda/numba/cuda/tests/cudapy/test_globals.py +62 -0
  339. numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +474 -0
  340. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +167 -0
  341. numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +92 -0
  342. numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +39 -0
  343. numba_cuda/numba/cuda/tests/cudapy/test_inline.py +170 -0
  344. numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +255 -0
  345. numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1219 -0
  346. numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +263 -0
  347. numba_cuda/numba/cuda/tests/cudapy/test_ir.py +598 -0
  348. numba_cuda/numba/cuda/tests/cudapy/test_ir_utils.py +276 -0
  349. numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +101 -0
  350. numba_cuda/numba/cuda/tests/cudapy/test_lang.py +68 -0
  351. numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +123 -0
  352. numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +194 -0
  353. numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +220 -0
  354. numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +173 -0
  355. numba_cuda/numba/cuda/tests/cudapy/test_make_function_to_jit_function.py +364 -0
  356. numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +47 -0
  357. numba_cuda/numba/cuda/tests/cudapy/test_math.py +842 -0
  358. numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +76 -0
  359. numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +78 -0
  360. numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +25 -0
  361. numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +145 -0
  362. numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +39 -0
  363. numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +82 -0
  364. numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +53 -0
  365. numba_cuda/numba/cuda/tests/cudapy/test_operator.py +504 -0
  366. numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +93 -0
  367. numba_cuda/numba/cuda/tests/cudapy/test_overload.py +402 -0
  368. numba_cuda/numba/cuda/tests/cudapy/test_powi.py +128 -0
  369. numba_cuda/numba/cuda/tests/cudapy/test_print.py +193 -0
  370. numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +37 -0
  371. numba_cuda/numba/cuda/tests/cudapy/test_random.py +117 -0
  372. numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +614 -0
  373. numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +130 -0
  374. numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +94 -0
  375. numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
  376. numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +86 -0
  377. numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +40 -0
  378. numba_cuda/numba/cuda/tests/cudapy/test_sm.py +457 -0
  379. numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +233 -0
  380. numba_cuda/numba/cuda/tests/cudapy/test_ssa.py +454 -0
  381. numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +56 -0
  382. numba_cuda/numba/cuda/tests/cudapy/test_sync.py +277 -0
  383. numba_cuda/numba/cuda/tests/cudapy/test_tracing.py +200 -0
  384. numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +90 -0
  385. numba_cuda/numba/cuda/tests/cudapy/test_typeconv.py +333 -0
  386. numba_cuda/numba/cuda/tests/cudapy/test_typeinfer.py +538 -0
  387. numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +585 -0
  388. numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +42 -0
  389. numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +485 -0
  390. numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +312 -0
  391. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +23 -0
  392. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +183 -0
  393. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +40 -0
  394. numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +40 -0
  395. numba_cuda/numba/cuda/tests/cudapy/test_warning.py +206 -0
  396. numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +331 -0
  397. numba_cuda/numba/cuda/tests/cudasim/__init__.py +9 -0
  398. numba_cuda/numba/cuda/tests/cudasim/support.py +9 -0
  399. numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +111 -0
  400. numba_cuda/numba/cuda/tests/data/__init__.py +2 -0
  401. numba_cuda/numba/cuda/tests/data/cta_barrier.cu +28 -0
  402. numba_cuda/numba/cuda/tests/data/cuda_include.cu +10 -0
  403. numba_cuda/numba/cuda/tests/data/error.cu +12 -0
  404. numba_cuda/numba/cuda/tests/data/include/add.cuh +8 -0
  405. numba_cuda/numba/cuda/tests/data/jitlink.cu +28 -0
  406. numba_cuda/numba/cuda/tests/data/jitlink.ptx +49 -0
  407. numba_cuda/numba/cuda/tests/data/warn.cu +12 -0
  408. numba_cuda/numba/cuda/tests/doc_examples/__init__.py +9 -0
  409. numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +2 -0
  410. numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +54 -0
  411. numba_cuda/numba/cuda/tests/doc_examples/ffi/include/mul.cuh +8 -0
  412. numba_cuda/numba/cuda/tests/doc_examples/ffi/saxpy.cu +14 -0
  413. numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +86 -0
  414. numba_cuda/numba/cuda/tests/doc_examples/test_cpointer.py +68 -0
  415. numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +81 -0
  416. numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +141 -0
  417. numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +160 -0
  418. numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +180 -0
  419. numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +119 -0
  420. numba_cuda/numba/cuda/tests/doc_examples/test_random.py +66 -0
  421. numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +80 -0
  422. numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +206 -0
  423. numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +53 -0
  424. numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +76 -0
  425. numba_cuda/numba/cuda/tests/nocuda/__init__.py +9 -0
  426. numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +391 -0
  427. numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +48 -0
  428. numba_cuda/numba/cuda/tests/nocuda/test_import.py +63 -0
  429. numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +252 -0
  430. numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +59 -0
  431. numba_cuda/numba/cuda/tests/nrt/__init__.py +9 -0
  432. numba_cuda/numba/cuda/tests/nrt/test_nrt.py +387 -0
  433. numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +124 -0
  434. numba_cuda/numba/cuda/tests/support.py +900 -0
  435. numba_cuda/numba/cuda/typeconv/__init__.py +4 -0
  436. numba_cuda/numba/cuda/typeconv/castgraph.py +137 -0
  437. numba_cuda/numba/cuda/typeconv/rules.py +63 -0
  438. numba_cuda/numba/cuda/typeconv/typeconv.py +121 -0
  439. numba_cuda/numba/cuda/types/__init__.py +233 -0
  440. numba_cuda/numba/cuda/types/__init__.pyi +167 -0
  441. numba_cuda/numba/cuda/types/abstract.py +9 -0
  442. numba_cuda/numba/cuda/types/common.py +9 -0
  443. numba_cuda/numba/cuda/types/containers.py +9 -0
  444. numba_cuda/numba/cuda/types/cuda_abstract.py +533 -0
  445. numba_cuda/numba/cuda/types/cuda_common.py +110 -0
  446. numba_cuda/numba/cuda/types/cuda_containers.py +971 -0
  447. numba_cuda/numba/cuda/types/cuda_function_type.py +230 -0
  448. numba_cuda/numba/cuda/types/cuda_functions.py +798 -0
  449. numba_cuda/numba/cuda/types/cuda_iterators.py +120 -0
  450. numba_cuda/numba/cuda/types/cuda_misc.py +569 -0
  451. numba_cuda/numba/cuda/types/cuda_npytypes.py +690 -0
  452. numba_cuda/numba/cuda/types/cuda_scalars.py +280 -0
  453. numba_cuda/numba/cuda/types/ext_types.py +101 -0
  454. numba_cuda/numba/cuda/types/function_type.py +11 -0
  455. numba_cuda/numba/cuda/types/functions.py +9 -0
  456. numba_cuda/numba/cuda/types/iterators.py +9 -0
  457. numba_cuda/numba/cuda/types/misc.py +9 -0
  458. numba_cuda/numba/cuda/types/npytypes.py +9 -0
  459. numba_cuda/numba/cuda/types/scalars.py +9 -0
  460. numba_cuda/numba/cuda/typing/__init__.py +19 -0
  461. numba_cuda/numba/cuda/typing/arraydecl.py +939 -0
  462. numba_cuda/numba/cuda/typing/asnumbatype.py +130 -0
  463. numba_cuda/numba/cuda/typing/bufproto.py +70 -0
  464. numba_cuda/numba/cuda/typing/builtins.py +1209 -0
  465. numba_cuda/numba/cuda/typing/cffi_utils.py +219 -0
  466. numba_cuda/numba/cuda/typing/cmathdecl.py +47 -0
  467. numba_cuda/numba/cuda/typing/collections.py +138 -0
  468. numba_cuda/numba/cuda/typing/context.py +782 -0
  469. numba_cuda/numba/cuda/typing/ctypes_utils.py +125 -0
  470. numba_cuda/numba/cuda/typing/dictdecl.py +63 -0
  471. numba_cuda/numba/cuda/typing/enumdecl.py +74 -0
  472. numba_cuda/numba/cuda/typing/listdecl.py +147 -0
  473. numba_cuda/numba/cuda/typing/mathdecl.py +158 -0
  474. numba_cuda/numba/cuda/typing/npdatetime.py +322 -0
  475. numba_cuda/numba/cuda/typing/npydecl.py +749 -0
  476. numba_cuda/numba/cuda/typing/setdecl.py +115 -0
  477. numba_cuda/numba/cuda/typing/templates.py +1446 -0
  478. numba_cuda/numba/cuda/typing/typeof.py +301 -0
  479. numba_cuda/numba/cuda/ufuncs.py +746 -0
  480. numba_cuda/numba/cuda/utils.py +724 -0
  481. numba_cuda/numba/cuda/vector_types.py +214 -0
  482. numba_cuda/numba/cuda/vectorizers.py +260 -0
  483. numba_cuda-0.21.1.dist-info/METADATA +109 -0
  484. numba_cuda-0.21.1.dist-info/RECORD +488 -0
  485. numba_cuda-0.21.1.dist-info/WHEEL +5 -0
  486. numba_cuda-0.21.1.dist-info/licenses/LICENSE +26 -0
  487. numba_cuda-0.21.1.dist-info/licenses/LICENSE.numba +24 -0
  488. numba_cuda-0.21.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1027 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: BSD-2-Clause
3
+
4
+ """
5
+ Implementation of functions in the Numpy package.
6
+ """
7
+
8
+ import itertools
9
+ from collections import namedtuple
10
+
11
+ import llvmlite.ir as ir
12
+
13
+ import numpy as np
14
+ import operator
15
+
16
+ from numba.cuda.np import arrayobj
17
+ from numba.cuda.np import ufunc_db
18
+ from numba.cuda.np.ufunc.sigparse import parse_signature
19
+ from numba.cuda.core.imputils import (
20
+ Registry,
21
+ impl_ret_new_ref,
22
+ force_error_model,
23
+ impl_ret_borrowed,
24
+ )
25
+ from numba.cuda import typing, types
26
+ from numba.cuda import cgutils
27
+ from numba.cuda.np.numpy_support import (
28
+ ufunc_find_matching_loop,
29
+ select_array_wrapper,
30
+ from_dtype,
31
+ _ufunc_loop_sig,
32
+ )
33
+ from numba.cuda.np.arrayobj import _getitem_array_generic
34
+ from numba.cuda.typing import npydecl
35
+ from numba.cuda.extending import overload, intrinsic
36
+
37
+ from numba.cuda.core import errors
38
+
39
+ registry = Registry("npyimpl")
40
+
41
+
42
+ ########################################################################
43
+
44
+ # In the way we generate code, ufuncs work with scalar as well as
45
+ # with array arguments. The following helper classes help dealing
46
+ # with scalar and array arguments in a regular way.
47
+ #
48
+ # In short, the classes provide a uniform interface. The interface
49
+ # handles the indexing of as many dimensions as the array may have.
50
+ # For scalars, all indexing is ignored and when the value is read,
51
+ # the scalar is returned. For arrays code for actual indexing is
52
+ # generated and reading performs the appropriate indirection.
53
+
54
+
55
+ class _ScalarIndexingHelper(object):
56
+ def update_indices(self, loop_indices, name):
57
+ pass
58
+
59
+ def as_values(self):
60
+ pass
61
+
62
+
63
+ class _ScalarHelper(object):
64
+ """Helper class to handle scalar arguments (and result).
65
+ Note that store_data is only used when generating code for
66
+ a scalar ufunc and to write the output value.
67
+
68
+ For loading, the value is directly used without having any
69
+ kind of indexing nor memory backing it up. This is the use
70
+ for input arguments.
71
+
72
+ For storing, a variable is created in the stack where the
73
+ value will be written.
74
+
75
+ Note that it is not supported (as it is unneeded for our
76
+ current use-cases) reading back a stored value. This class
77
+ will always "load" the original value it got at its creation.
78
+ """
79
+
80
+ def __init__(self, ctxt, bld, val, ty):
81
+ self.context = ctxt
82
+ self.builder = bld
83
+ self.val = val
84
+ self.base_type = ty
85
+ intpty = ctxt.get_value_type(types.intp)
86
+ self.shape = [ir.Constant(intpty, 1)]
87
+
88
+ lty = ctxt.get_data_type(ty) if ty != types.boolean else ir.IntType(1)
89
+ self._ptr = cgutils.alloca_once(bld, lty)
90
+
91
+ def create_iter_indices(self):
92
+ return _ScalarIndexingHelper()
93
+
94
+ def load_data(self, indices):
95
+ return self.val
96
+
97
+ def store_data(self, indices, val):
98
+ self.builder.store(val, self._ptr)
99
+
100
+ @property
101
+ def return_val(self):
102
+ return self.builder.load(self._ptr)
103
+
104
+
105
+ class _ArrayIndexingHelper(
106
+ namedtuple("_ArrayIndexingHelper", ("array", "indices"))
107
+ ):
108
+ def update_indices(self, loop_indices, name):
109
+ bld = self.array.builder
110
+ intpty = self.array.context.get_value_type(types.intp)
111
+ ONE = ir.Constant(ir.IntType(intpty.width), 1)
112
+
113
+ # we are only interested in as many inner dimensions as dimensions
114
+ # the indexed array has (the outer dimensions are broadcast, so
115
+ # ignoring the outer indices produces the desired result.
116
+ indices = loop_indices[len(loop_indices) - len(self.indices) :]
117
+ for src, dst, dim in zip(indices, self.indices, self.array.shape):
118
+ cond = bld.icmp_unsigned(">", dim, ONE)
119
+ with bld.if_then(cond):
120
+ bld.store(src, dst)
121
+
122
+ def as_values(self):
123
+ """
124
+ The indexing helper is built using alloca for each value, so it
125
+ actually contains pointers to the actual indices to load. Note
126
+ that update_indices assumes the same. This method returns the
127
+ indices as values
128
+ """
129
+ bld = self.array.builder
130
+ return [bld.load(index) for index in self.indices]
131
+
132
+
133
+ class _ArrayHelper(
134
+ namedtuple(
135
+ "_ArrayHelper",
136
+ (
137
+ "context",
138
+ "builder",
139
+ "shape",
140
+ "strides",
141
+ "data",
142
+ "layout",
143
+ "base_type",
144
+ "ndim",
145
+ "return_val",
146
+ ),
147
+ )
148
+ ):
149
+ """Helper class to handle array arguments/result.
150
+ It provides methods to generate code loading/storing specific
151
+ items as well as support code for handling indices.
152
+ """
153
+
154
+ def create_iter_indices(self):
155
+ intpty = self.context.get_value_type(types.intp)
156
+ ZERO = ir.Constant(ir.IntType(intpty.width), 0)
157
+
158
+ indices = []
159
+ for i in range(self.ndim):
160
+ x = cgutils.alloca_once(self.builder, ir.IntType(intpty.width))
161
+ self.builder.store(ZERO, x)
162
+ indices.append(x)
163
+ return _ArrayIndexingHelper(self, indices)
164
+
165
+ def _load_effective_address(self, indices):
166
+ return cgutils.get_item_pointer2(
167
+ self.context,
168
+ self.builder,
169
+ data=self.data,
170
+ shape=self.shape,
171
+ strides=self.strides,
172
+ layout=self.layout,
173
+ inds=indices,
174
+ )
175
+
176
+ def load_data(self, indices):
177
+ model = self.context.data_model_manager[self.base_type]
178
+ ptr = self._load_effective_address(indices)
179
+ return model.load_from_data_pointer(self.builder, ptr)
180
+
181
+ def store_data(self, indices, value):
182
+ ctx = self.context
183
+ bld = self.builder
184
+ store_value = ctx.get_value_as_data(bld, self.base_type, value)
185
+ assert ctx.get_data_type(self.base_type) == store_value.type
186
+ bld.store(store_value, self._load_effective_address(indices))
187
+
188
+
189
+ class _ArrayGUHelper(
190
+ namedtuple(
191
+ "_ArrayHelper",
192
+ (
193
+ "context",
194
+ "builder",
195
+ "shape",
196
+ "strides",
197
+ "data",
198
+ "layout",
199
+ "base_type",
200
+ "ndim",
201
+ "inner_arr_ty",
202
+ "is_input_arg",
203
+ ),
204
+ )
205
+ ):
206
+ """Helper class to handle array arguments/result.
207
+ It provides methods to generate code loading/storing specific
208
+ items as well as support code for handling indices.
209
+
210
+ Contrary to _ArrayHelper, this class can create a view to a subarray
211
+ """
212
+
213
+ def create_iter_indices(self):
214
+ intpty = self.context.get_value_type(types.intp)
215
+ ZERO = ir.Constant(ir.IntType(intpty.width), 0)
216
+
217
+ indices = []
218
+ for i in range(self.ndim - self.inner_arr_ty.ndim):
219
+ x = cgutils.alloca_once(self.builder, ir.IntType(intpty.width))
220
+ self.builder.store(ZERO, x)
221
+ indices.append(x)
222
+ return _ArrayIndexingHelper(self, indices)
223
+
224
+ def _load_effective_address(self, indices):
225
+ context = self.context
226
+ builder = self.builder
227
+ arr_ty = types.Array(self.base_type, self.ndim, self.layout)
228
+ arr = context.make_array(arr_ty)(context, builder, self.data)
229
+
230
+ return cgutils.get_item_pointer2(
231
+ context,
232
+ builder,
233
+ data=arr.data,
234
+ shape=self.shape,
235
+ strides=self.strides,
236
+ layout=self.layout,
237
+ inds=indices,
238
+ )
239
+
240
+ def load_data(self, indices):
241
+ context, builder = self.context, self.builder
242
+
243
+ if self.inner_arr_ty.ndim == 0 and self.is_input_arg:
244
+ # scalar case for input arguments
245
+ model = context.data_model_manager[self.base_type]
246
+ ptr = self._load_effective_address(indices)
247
+ return model.load_from_data_pointer(builder, ptr)
248
+ elif self.inner_arr_ty.ndim == 0 and not self.is_input_arg:
249
+ # Output arrays are handled as 1d with shape=(1,) when its
250
+ # signature represents a scalar. For instance: "(n),(m) -> ()"
251
+ intpty = context.get_value_type(types.intp)
252
+ one = intpty(1)
253
+
254
+ fromty = types.Array(self.base_type, self.ndim, self.layout)
255
+ toty = types.Array(self.base_type, 1, self.layout)
256
+ itemsize = intpty(arrayobj.get_itemsize(context, fromty))
257
+
258
+ # create a view from the original ndarray to a 1d array
259
+ arr_from = self.context.make_array(fromty)(
260
+ context, builder, self.data
261
+ )
262
+ arr_to = self.context.make_array(toty)(context, builder)
263
+ arrayobj.populate_array(
264
+ arr_to,
265
+ data=self._load_effective_address(indices),
266
+ shape=cgutils.pack_array(builder, [one]),
267
+ strides=cgutils.pack_array(builder, [itemsize]),
268
+ itemsize=arr_from.itemsize,
269
+ meminfo=arr_from.meminfo,
270
+ parent=arr_from.parent,
271
+ )
272
+ return arr_to._getvalue()
273
+ else:
274
+ # generic case
275
+ # getitem n-dim array -> m-dim array, where N > M
276
+ index_types = (types.int64,) * (self.ndim - self.inner_arr_ty.ndim)
277
+ arrty = types.Array(self.base_type, self.ndim, self.layout)
278
+ arr = self.context.make_array(arrty)(context, builder, self.data)
279
+ res = _getitem_array_generic(
280
+ context,
281
+ builder,
282
+ self.inner_arr_ty,
283
+ arrty,
284
+ arr,
285
+ index_types,
286
+ indices,
287
+ )
288
+ return impl_ret_borrowed(context, builder, self.inner_arr_ty, res)
289
+
290
+ def guard_shape(self, loopshape):
291
+ inner_ndim = self.inner_arr_ty.ndim
292
+
293
+ def raise_impl(loop_shape, array_shape):
294
+ # This would in fact be a test for broadcasting.
295
+ # Broadcast would fail if, ignoring the core dimensions, the
296
+ # remaining ones are different than indices given by loop shape.
297
+
298
+ remaining = len(array_shape) - inner_ndim
299
+ _raise = remaining > len(loop_shape)
300
+ if not _raise:
301
+ for i in range(remaining):
302
+ _raise |= array_shape[i] != loop_shape[i]
303
+ if _raise:
304
+ # Ideally we should call `np.broadcast_shapes` with loop and
305
+ # array shapes. But since broadcasting is not supported here,
306
+ # we just raise an error
307
+ # TODO: check why raising a dynamic exception here fails
308
+ raise ValueError("Loop and array shapes are incompatible")
309
+
310
+ context, builder = self.context, self.builder
311
+ sig = types.none(
312
+ types.UniTuple(types.intp, len(loopshape)),
313
+ types.UniTuple(types.intp, len(self.shape)),
314
+ )
315
+ tup = (
316
+ context.make_tuple(builder, sig.args[0], loopshape),
317
+ context.make_tuple(builder, sig.args[1], self.shape),
318
+ )
319
+ context.compile_internal(builder, raise_impl, sig, tup)
320
+
321
+ def guard_match_core_dims(self, other: "_ArrayGUHelper", ndims: int):
322
+ # arguments with the same signature should match their core dimensions
323
+ #
324
+ # @guvectorize('(n,m), (n,m) -> (n)')
325
+ # def foo(x, y, res):
326
+ # ...
327
+ #
328
+ # x and y should have the same core (2D) dimensions
329
+ def raise_impl(self_shape, other_shape):
330
+ same = True
331
+ a, b = len(self_shape) - ndims, len(other_shape) - ndims
332
+ for i in range(ndims):
333
+ same &= self_shape[a + i] == other_shape[b + i]
334
+ if not same:
335
+ # NumPy raises the following:
336
+ # ValueError: gufunc: Input operand 1 has a mismatch in its
337
+ # core dimension 0, with gufunc signature (n),(n) -> ()
338
+ # (size 3 is different from 2)
339
+ # But since we cannot raise a dynamic exception here, we just
340
+ # (try) something meaninful
341
+ msg = (
342
+ "Operand has a mismatch in one of its core dimensions. "
343
+ "Please, check if all arguments to a @guvectorize "
344
+ "function have the same core dimensions."
345
+ )
346
+ raise ValueError(msg)
347
+
348
+ context, builder = self.context, self.builder
349
+ sig = types.none(
350
+ types.UniTuple(types.intp, len(self.shape)),
351
+ types.UniTuple(types.intp, len(other.shape)),
352
+ )
353
+ tup = (
354
+ context.make_tuple(builder, sig.args[0], self.shape),
355
+ context.make_tuple(builder, sig.args[1], other.shape),
356
+ )
357
+ context.compile_internal(builder, raise_impl, sig, tup)
358
+
359
+
360
+ def _prepare_argument(ctxt, bld, inp, tyinp, where="input operand"):
361
+ """returns an instance of the appropriate Helper (either
362
+ _ScalarHelper or _ArrayHelper) class to handle the argument.
363
+ using the polymorphic interface of the Helper classes, scalar
364
+ and array cases can be handled with the same code"""
365
+
366
+ # first un-Optional Optionals
367
+ if isinstance(tyinp, types.Optional):
368
+ oty = tyinp
369
+ tyinp = tyinp.type
370
+ inp = ctxt.cast(bld, inp, oty, tyinp)
371
+
372
+ # then prepare the arg for a concrete instance
373
+ if isinstance(tyinp, types.ArrayCompatible):
374
+ ary = ctxt.make_array(tyinp)(ctxt, bld, inp)
375
+ shape = cgutils.unpack_tuple(bld, ary.shape, tyinp.ndim)
376
+ strides = cgutils.unpack_tuple(bld, ary.strides, tyinp.ndim)
377
+ return _ArrayHelper(
378
+ ctxt,
379
+ bld,
380
+ shape,
381
+ strides,
382
+ ary.data,
383
+ tyinp.layout,
384
+ tyinp.dtype,
385
+ tyinp.ndim,
386
+ inp,
387
+ )
388
+ elif types.unliteral(tyinp) in types.number_domain | {
389
+ types.boolean
390
+ } or isinstance(tyinp, types.scalars._NPDatetimeBase):
391
+ return _ScalarHelper(ctxt, bld, inp, tyinp)
392
+ else:
393
+ raise NotImplementedError(
394
+ "unsupported type for {0}: {1}".format(where, str(tyinp))
395
+ )
396
+
397
+
398
+ _broadcast_onto_sig = types.intp(
399
+ types.intp,
400
+ types.CPointer(types.intp),
401
+ types.intp,
402
+ types.CPointer(types.intp),
403
+ )
404
+
405
+
406
+ def _broadcast_onto(src_ndim, src_shape, dest_ndim, dest_shape):
407
+ """Low-level utility function used in calculating a shape for
408
+ an implicit output array. This function assumes that the
409
+ destination shape is an LLVM pointer to a C-style array that was
410
+ already initialized to a size of one along all axes.
411
+
412
+ Returns an integer value:
413
+ >= 1 : Succeeded. Return value should equal the number of dimensions in
414
+ the destination shape.
415
+ 0 : Failed to broadcast because source shape is larger than the
416
+ destination shape (this case should be weeded out at type
417
+ checking).
418
+ < 0 : Failed to broadcast onto destination axis, at axis number ==
419
+ -(return_value + 1).
420
+ """
421
+ if src_ndim > dest_ndim:
422
+ # This check should have been done during type checking, but
423
+ # let's be defensive anyway...
424
+ return 0
425
+ else:
426
+ src_index = 0
427
+ dest_index = dest_ndim - src_ndim
428
+ while src_index < src_ndim:
429
+ src_dim_size = src_shape[src_index]
430
+ dest_dim_size = dest_shape[dest_index]
431
+ # Check to see if we've already mutated the destination
432
+ # shape along this axis.
433
+ if dest_dim_size != 1:
434
+ # If we have mutated the destination shape already,
435
+ # then the source axis size must either be one,
436
+ # or the destination axis size.
437
+ if src_dim_size != dest_dim_size and src_dim_size != 1:
438
+ return -(dest_index + 1)
439
+ elif src_dim_size != 1:
440
+ # If the destination size is still its initial
441
+ dest_shape[dest_index] = src_dim_size
442
+ src_index += 1
443
+ dest_index += 1
444
+ return dest_index
445
+
446
+
447
+ def _build_array(context, builder, array_ty, input_types, inputs):
448
+ """Utility function to handle allocation of an implicit output array
449
+ given the target context, builder, output array type, and a list of
450
+ _ArrayHelper instances.
451
+ """
452
+ # First, strip optional types, ufunc loops are typed on concrete types
453
+ input_types = [
454
+ x.type if isinstance(x, types.Optional) else x for x in input_types
455
+ ]
456
+
457
+ intp_ty = context.get_value_type(types.intp)
458
+
459
+ def make_intp_const(val):
460
+ return context.get_constant(types.intp, val)
461
+
462
+ ZERO = make_intp_const(0) # noqa: F841
463
+ ONE = make_intp_const(1)
464
+
465
+ src_shape = cgutils.alloca_once(
466
+ builder, intp_ty, array_ty.ndim, "src_shape"
467
+ )
468
+ dest_ndim = make_intp_const(array_ty.ndim)
469
+ dest_shape = cgutils.alloca_once(
470
+ builder, intp_ty, array_ty.ndim, "dest_shape"
471
+ )
472
+ dest_shape_addrs = tuple(
473
+ cgutils.gep_inbounds(builder, dest_shape, index)
474
+ for index in range(array_ty.ndim)
475
+ )
476
+
477
+ # Initialize the destination shape with all ones.
478
+ for dest_shape_addr in dest_shape_addrs:
479
+ builder.store(ONE, dest_shape_addr)
480
+
481
+ # For each argument, try to broadcast onto the destination shape,
482
+ # mutating along any axis where the argument shape is not one and
483
+ # the destination shape is one.
484
+ for arg_number, arg in enumerate(inputs):
485
+ if not hasattr(arg, "ndim"): # Skip scalar arguments
486
+ continue
487
+ arg_ndim = make_intp_const(arg.ndim)
488
+ for index in range(arg.ndim):
489
+ builder.store(
490
+ arg.shape[index],
491
+ cgutils.gep_inbounds(builder, src_shape, index),
492
+ )
493
+ arg_result = context.compile_internal(
494
+ builder,
495
+ _broadcast_onto,
496
+ _broadcast_onto_sig,
497
+ [arg_ndim, src_shape, dest_ndim, dest_shape],
498
+ )
499
+ with cgutils.if_unlikely(
500
+ builder, builder.icmp_signed("<", arg_result, ONE)
501
+ ):
502
+ msg = "unable to broadcast argument %d to output array" % (
503
+ arg_number,
504
+ )
505
+
506
+ loc = errors.loc_info.get("loc", None)
507
+ if loc is not None:
508
+ msg += '\nFile "%s", line %d, ' % (loc.filename, loc.line)
509
+
510
+ context.call_conv.return_user_exc(builder, ValueError, (msg,))
511
+
512
+ real_array_ty = array_ty.as_array
513
+
514
+ dest_shape_tup = tuple(
515
+ builder.load(dest_shape_addr) for dest_shape_addr in dest_shape_addrs
516
+ )
517
+ array_val = arrayobj._empty_nd_impl(
518
+ context, builder, real_array_ty, dest_shape_tup
519
+ )
520
+
521
+ # Get the best argument to call __array_wrap__ on
522
+ array_wrapper_index = select_array_wrapper(input_types)
523
+ array_wrapper_ty = input_types[array_wrapper_index]
524
+ try:
525
+ # __array_wrap__(source wrapped array, out array) -> out wrapped array
526
+ array_wrap = context.get_function(
527
+ "__array_wrap__", array_ty(array_wrapper_ty, real_array_ty)
528
+ )
529
+ except NotImplementedError:
530
+ # If it's the same priority as a regular array, assume we
531
+ # should use the allocated array unchanged.
532
+ if array_wrapper_ty.array_priority != types.Array.array_priority:
533
+ raise
534
+ out_val = array_val._getvalue()
535
+ else:
536
+ wrap_args = (
537
+ inputs[array_wrapper_index].return_val,
538
+ array_val._getvalue(),
539
+ )
540
+ out_val = array_wrap(builder, wrap_args)
541
+
542
+ ndim = array_ty.ndim
543
+ shape = cgutils.unpack_tuple(builder, array_val.shape, ndim)
544
+ strides = cgutils.unpack_tuple(builder, array_val.strides, ndim)
545
+ return _ArrayHelper(
546
+ context,
547
+ builder,
548
+ shape,
549
+ strides,
550
+ array_val.data,
551
+ array_ty.layout,
552
+ array_ty.dtype,
553
+ ndim,
554
+ out_val,
555
+ )
556
+
557
+
558
+ # ufuncs either return a single result when nout == 1, else a tuple of results
559
+
560
+
561
+ def _unpack_output_types(ufunc, sig):
562
+ if ufunc.nout == 1:
563
+ return [sig.return_type]
564
+ else:
565
+ return list(sig.return_type)
566
+
567
+
568
+ def _unpack_output_values(ufunc, builder, values):
569
+ if ufunc.nout == 1:
570
+ return [values]
571
+ else:
572
+ return cgutils.unpack_tuple(builder, values)
573
+
574
+
575
+ def _pack_output_values(ufunc, context, builder, typ, values):
576
+ if ufunc.nout == 1:
577
+ return values[0]
578
+ else:
579
+ return context.make_tuple(builder, typ, values)
580
+
581
+
582
+ def numpy_ufunc_kernel(context, builder, sig, args, ufunc, kernel_class):
583
+ # This is the code generator that builds all the looping needed
584
+ # to execute a numpy functions over several dimensions (including
585
+ # scalar cases).
586
+ #
587
+ # context - the code generation context
588
+ # builder - the code emitter
589
+ # sig - signature of the ufunc
590
+ # args - the args to the ufunc
591
+ # ufunc - the ufunc itself
592
+ # kernel_class - a code generating subclass of _Kernel that provides
593
+
594
+ arguments = [
595
+ _prepare_argument(context, builder, arg, tyarg)
596
+ for arg, tyarg in zip(args, sig.args)
597
+ ]
598
+
599
+ if len(arguments) < ufunc.nin:
600
+ raise RuntimeError(
601
+ "Not enough inputs to {}, expected {} got {}".format(
602
+ ufunc.__name__, ufunc.nin, len(arguments)
603
+ )
604
+ )
605
+
606
+ for out_i, ret_ty in enumerate(_unpack_output_types(ufunc, sig)):
607
+ if ufunc.nin + out_i >= len(arguments):
608
+ # this out argument is not provided
609
+ if isinstance(ret_ty, types.ArrayCompatible):
610
+ output = _build_array(
611
+ context, builder, ret_ty, sig.args, arguments
612
+ )
613
+ else:
614
+ output = _prepare_argument(
615
+ context,
616
+ builder,
617
+ ir.Constant(context.get_value_type(ret_ty), None),
618
+ ret_ty,
619
+ )
620
+ arguments.append(output)
621
+ elif context.enable_nrt:
622
+ # Incref the output
623
+ context.nrt.incref(builder, ret_ty, args[ufunc.nin + out_i])
624
+
625
+ inputs = arguments[: ufunc.nin]
626
+ outputs = arguments[ufunc.nin :]
627
+ assert len(outputs) == ufunc.nout
628
+
629
+ outer_sig = _ufunc_loop_sig(
630
+ [a.base_type for a in outputs], [a.base_type for a in inputs]
631
+ )
632
+ kernel = kernel_class(context, builder, outer_sig)
633
+ intpty = context.get_value_type(types.intp)
634
+
635
+ indices = [inp.create_iter_indices() for inp in inputs]
636
+
637
+ # assume outputs are all the same size, which numpy requires
638
+
639
+ loopshape = outputs[0].shape
640
+
641
+ # count the number of C and F layout arrays, respectively
642
+ input_layouts = [
643
+ inp.layout for inp in inputs if isinstance(inp, _ArrayHelper)
644
+ ]
645
+ num_c_layout = len([x for x in input_layouts if x == "C"])
646
+ num_f_layout = len([x for x in input_layouts if x == "F"])
647
+
648
+ # Only choose F iteration order if more arrays are in F layout.
649
+ # Default to C order otherwise.
650
+ # This is a best effort for performance. NumPy has more fancy logic that
651
+ # uses array iterators in non-trivial cases.
652
+ if num_f_layout > num_c_layout:
653
+ order = "F"
654
+ else:
655
+ order = "C"
656
+
657
+ with cgutils.loop_nest(
658
+ builder, loopshape, intp=intpty, order=order
659
+ ) as loop_indices:
660
+ vals_in = []
661
+ for i, (index, arg) in enumerate(zip(indices, inputs)):
662
+ index.update_indices(loop_indices, i)
663
+ vals_in.append(arg.load_data(index.as_values()))
664
+
665
+ vals_out = _unpack_output_values(
666
+ ufunc, builder, kernel.generate(*vals_in)
667
+ )
668
+ for val_out, output in zip(vals_out, outputs):
669
+ output.store_data(loop_indices, val_out)
670
+
671
+ out = _pack_output_values(
672
+ ufunc,
673
+ context,
674
+ builder,
675
+ sig.return_type,
676
+ [o.return_val for o in outputs],
677
+ )
678
+ return impl_ret_new_ref(context, builder, sig.return_type, out)
679
+
680
+
681
+ def numpy_gufunc_kernel(context, builder, sig, args, ufunc, kernel_class):
682
+ arguments = []
683
+ expected_ndims = kernel_class.dufunc.expected_ndims()
684
+ expected_ndims = expected_ndims[0] + expected_ndims[1]
685
+ is_input = [True] * ufunc.nin + [False] * ufunc.nout
686
+ for arg, ty, exp_ndim, is_inp in zip(
687
+ args, sig.args, expected_ndims, is_input
688
+ ): # noqa: E501
689
+ if isinstance(ty, types.ArrayCompatible):
690
+ # Create an array helper that iteration returns a subarray
691
+ # with ndim specified by "exp_ndim"
692
+ arr = context.make_array(ty)(context, builder, arg)
693
+ shape = cgutils.unpack_tuple(builder, arr.shape, ty.ndim)
694
+ strides = cgutils.unpack_tuple(builder, arr.strides, ty.ndim)
695
+ inner_arr_ty = ty.copy(ndim=exp_ndim)
696
+ ndim = ty.ndim
697
+ layout = ty.layout
698
+ base_type = ty.dtype
699
+ array_helper = _ArrayGUHelper(
700
+ context,
701
+ builder,
702
+ shape,
703
+ strides,
704
+ arg,
705
+ layout,
706
+ base_type,
707
+ ndim,
708
+ inner_arr_ty,
709
+ is_inp,
710
+ )
711
+ arguments.append(array_helper)
712
+ else:
713
+ scalar_helper = _ScalarHelper(context, builder, arg, ty)
714
+ arguments.append(scalar_helper)
715
+ kernel = kernel_class(context, builder, sig)
716
+
717
+ layouts = [
718
+ arg.layout for arg in arguments if isinstance(arg, _ArrayGUHelper)
719
+ ]
720
+ num_c_layout = len([x for x in layouts if x == "C"])
721
+ num_f_layout = len([x for x in layouts if x == "F"])
722
+
723
+ # Only choose F iteration order if more arrays are in F layout.
724
+ # Default to C order otherwise.
725
+ # This is a best effort for performance. NumPy has more fancy logic that
726
+ # uses array iterators in non-trivial cases.
727
+ if num_f_layout > num_c_layout:
728
+ order = "F"
729
+ else:
730
+ order = "C"
731
+
732
+ outputs = arguments[ufunc.nin :]
733
+ intpty = context.get_value_type(types.intp)
734
+ indices = [inp.create_iter_indices() for inp in arguments]
735
+ loopshape_ndim = outputs[0].ndim - outputs[0].inner_arr_ty.ndim
736
+ loopshape = outputs[0].shape[:loopshape_ndim]
737
+
738
+ _sig = parse_signature(ufunc.gufunc_builder.signature)
739
+ for (idx_a, sig_a), (idx_b, sig_b) in itertools.combinations(
740
+ zip(range(len(arguments)), _sig[0] + _sig[1]), r=2
741
+ ):
742
+ # For each pair of arguments, both inputs and outputs, must match their
743
+ # inner dimensions if their signatures are the same.
744
+ arg_a, arg_b = arguments[idx_a], arguments[idx_b]
745
+ if sig_a == sig_b and all(
746
+ isinstance(x, _ArrayGUHelper) for x in (arg_a, arg_b)
747
+ ):
748
+ arg_a, arg_b = arguments[idx_a], arguments[idx_b]
749
+ arg_a.guard_match_core_dims(arg_b, len(sig_a))
750
+
751
+ for arg in arguments[: ufunc.nin]:
752
+ if isinstance(arg, _ArrayGUHelper):
753
+ arg.guard_shape(loopshape)
754
+
755
+ with cgutils.loop_nest(
756
+ builder, loopshape, intp=intpty, order=order
757
+ ) as loop_indices:
758
+ vals_in = []
759
+ for i, (index, arg) in enumerate(zip(indices, arguments)):
760
+ index.update_indices(loop_indices, i)
761
+ vals_in.append(arg.load_data(index.as_values()))
762
+
763
+ kernel.generate(*vals_in)
764
+
765
+
766
+ # Kernels are the code to be executed inside the multidimensional loop.
767
+ class _Kernel(object):
768
+ def __init__(self, context, builder, outer_sig):
769
+ self.context = context
770
+ self.builder = builder
771
+ self.outer_sig = outer_sig
772
+
773
+ def cast(self, val, fromty, toty):
774
+ """Numpy uses cast semantics that are different from standard Python
775
+ (for example, it does allow casting from complex to float).
776
+
777
+ This method acts as a patch to context.cast so that it allows
778
+ complex to real/int casts.
779
+
780
+ """
781
+ if isinstance(fromty, types.Complex) and not isinstance(
782
+ toty, types.Complex
783
+ ):
784
+ # attempt conversion of the real part to the specified type.
785
+ # note that NumPy issues a warning in this kind of conversions
786
+ newty = fromty.underlying_float
787
+ attr = self.context.get_getattr(fromty, "real")
788
+ val = attr(self.context, self.builder, fromty, val, "real")
789
+ fromty = newty
790
+ # let the regular cast do the rest...
791
+
792
+ return self.context.cast(self.builder, val, fromty, toty)
793
+
794
+ def generate(self, *args):
795
+ isig = self.inner_sig
796
+ osig = self.outer_sig
797
+ cast_args = [
798
+ self.cast(val, inty, outty)
799
+ for val, inty, outty in zip(args, osig.args, isig.args)
800
+ ]
801
+ if self.cres.objectmode:
802
+ func_type = self.context.call_conv.get_function_type(
803
+ types.pyobject, [types.pyobject] * len(isig.args)
804
+ )
805
+ else:
806
+ func_type = self.context.call_conv.get_function_type(
807
+ isig.return_type, isig.args
808
+ )
809
+ module = self.builder.block.function.module
810
+ entry_point = cgutils.get_or_insert_function(
811
+ module, func_type, self.cres.fndesc.llvm_func_name
812
+ )
813
+ entry_point.attributes.add("alwaysinline")
814
+
815
+ _, res = self.context.call_conv.call_function(
816
+ self.builder, entry_point, isig.return_type, isig.args, cast_args
817
+ )
818
+ return self.cast(res, isig.return_type, osig.return_type)
819
+
820
+
821
+ def _ufunc_db_function(ufunc):
822
+ """Use the ufunc loop type information to select the code generation
823
+ function from the table provided by the dict_of_kernels. The dict
824
+ of kernels maps the loop identifier to a function with the
825
+ following signature: (context, builder, signature, args).
826
+
827
+ The loop type information has the form 'AB->C'. The letters to the
828
+ left of '->' are the input types (specified as NumPy letter
829
+ types). The letters to the right of '->' are the output
830
+ types. There must be 'ufunc.nin' letters to the left of '->', and
831
+ 'ufunc.nout' letters to the right.
832
+
833
+ For example, a binary float loop resulting in a float, will have
834
+ the following signature: 'ff->f'.
835
+
836
+ A given ufunc implements many loops. The list of loops implemented
837
+ for a given ufunc can be accessed using the 'types' attribute in
838
+ the ufunc object. The NumPy machinery selects the first loop that
839
+ fits a given calling signature (in our case, what we call the
840
+ outer_sig). This logic is mimicked by 'ufunc_find_matching_loop'.
841
+ """
842
+
843
+ class _KernelImpl(_Kernel):
844
+ def __init__(self, context, builder, outer_sig):
845
+ super(_KernelImpl, self).__init__(context, builder, outer_sig)
846
+ loop = ufunc_find_matching_loop(
847
+ ufunc,
848
+ outer_sig.args + tuple(_unpack_output_types(ufunc, outer_sig)),
849
+ )
850
+ self.fn = context.get_ufunc_info(ufunc).get(loop.ufunc_sig)
851
+ self.inner_sig = _ufunc_loop_sig(loop.outputs, loop.inputs)
852
+
853
+ if self.fn is None:
854
+ msg = "Don't know how to lower ufunc '{0}' for loop '{1}'"
855
+ raise NotImplementedError(msg.format(ufunc.__name__, loop))
856
+
857
+ def generate(self, *args):
858
+ isig = self.inner_sig
859
+ osig = self.outer_sig
860
+
861
+ cast_args = [
862
+ self.cast(val, inty, outty)
863
+ for val, inty, outty in zip(args, osig.args, isig.args)
864
+ ]
865
+ with force_error_model(self.context, "numpy"):
866
+ res = self.fn(self.context, self.builder, isig, cast_args)
867
+ dmm = self.context.data_model_manager
868
+ res = dmm[isig.return_type].from_return(self.builder, res)
869
+ return self.cast(res, isig.return_type, osig.return_type)
870
+
871
+ return _KernelImpl
872
+
873
+
874
+ ################################################################################
875
+ # Helper functions that register the ufuncs
876
+
877
+
878
+ def register_ufunc_kernel(ufunc, kernel, lower):
879
+ def do_ufunc(context, builder, sig, args):
880
+ return numpy_ufunc_kernel(context, builder, sig, args, ufunc, kernel)
881
+
882
+ _any = types.Any
883
+ in_args = (_any,) * ufunc.nin
884
+
885
+ # Add a lowering for each out argument that is missing.
886
+ for n_explicit_out in range(ufunc.nout + 1):
887
+ out_args = (types.Array,) * n_explicit_out
888
+ lower(ufunc, *in_args, *out_args)(do_ufunc)
889
+
890
+ return kernel
891
+
892
+
893
+ def register_unary_operator_kernel(
894
+ operator, ufunc, kernel, lower, inplace=False
895
+ ):
896
+ assert not inplace # are there any inplace unary operators?
897
+
898
+ def lower_unary_operator(context, builder, sig, args):
899
+ return numpy_ufunc_kernel(context, builder, sig, args, ufunc, kernel)
900
+
901
+ _arr_kind = types.Array
902
+ lower(operator, _arr_kind)(lower_unary_operator)
903
+
904
+
905
+ def register_binary_operator_kernel(op, ufunc, kernel, lower, inplace=False):
906
+ def lower_binary_operator(context, builder, sig, args):
907
+ return numpy_ufunc_kernel(context, builder, sig, args, ufunc, kernel)
908
+
909
+ def lower_inplace_operator(context, builder, sig, args):
910
+ # The visible signature is (A, B) -> A
911
+ # The implementation's signature (with explicit output)
912
+ # is (A, B, A) -> A
913
+ args = tuple(args) + (args[0],)
914
+ sig = typing.signature(sig.return_type, *sig.args + (sig.args[0],))
915
+ return numpy_ufunc_kernel(context, builder, sig, args, ufunc, kernel)
916
+
917
+ _any = types.Any
918
+ _arr_kind = types.Array
919
+ formal_sigs = [(_arr_kind, _arr_kind), (_any, _arr_kind), (_arr_kind, _any)]
920
+ for sig in formal_sigs:
921
+ if not inplace:
922
+ lower(op, *sig)(lower_binary_operator)
923
+ else:
924
+ lower(op, *sig)(lower_inplace_operator)
925
+
926
+
927
+ ################################################################################
928
+ # Use the contents of ufunc_db to initialize the supported ufuncs
929
+
930
+
931
+ @registry.lower(operator.pos, types.Array)
932
+ def array_positive_impl(context, builder, sig, args):
933
+ """Lowering function for +(array) expressions. Defined here
934
+ (numba.targets.npyimpl) since the remaining array-operator
935
+ lowering functions are also registered in this module.
936
+ """
937
+
938
+ class _UnaryPositiveKernel(_Kernel):
939
+ def generate(self, *args):
940
+ [val] = args
941
+ return val
942
+
943
+ return numpy_ufunc_kernel(
944
+ context, builder, sig, args, np.positive, _UnaryPositiveKernel
945
+ )
946
+
947
+
948
+ def register_ufuncs(ufuncs, lower):
949
+ kernels = {}
950
+ for ufunc in ufuncs:
951
+ db_func = _ufunc_db_function(ufunc)
952
+ kernels[ufunc] = register_ufunc_kernel(ufunc, db_func, lower)
953
+
954
+ for _op_map in (
955
+ npydecl.NumpyRulesUnaryArrayOperator._op_map,
956
+ npydecl.NumpyRulesArrayOperator._op_map,
957
+ ):
958
+ for op, ufunc_name in _op_map.items():
959
+ ufunc = getattr(np, ufunc_name)
960
+ kernel = kernels[ufunc]
961
+ if ufunc.nin == 1:
962
+ register_unary_operator_kernel(op, ufunc, kernel, lower)
963
+ elif ufunc.nin == 2:
964
+ register_binary_operator_kernel(op, ufunc, kernel, lower)
965
+ else:
966
+ raise RuntimeError(
967
+ "There shouldn't be any non-unary or binary operators"
968
+ )
969
+
970
+ for _op_map in (npydecl.NumpyRulesInplaceArrayOperator._op_map,):
971
+ for op, ufunc_name in _op_map.items():
972
+ ufunc = getattr(np, ufunc_name)
973
+ kernel = kernels[ufunc]
974
+ if ufunc.nin == 1:
975
+ register_unary_operator_kernel(
976
+ op, ufunc, kernel, lower, inplace=True
977
+ )
978
+ elif ufunc.nin == 2:
979
+ register_binary_operator_kernel(
980
+ op, ufunc, kernel, lower, inplace=True
981
+ )
982
+ else:
983
+ raise RuntimeError(
984
+ "There shouldn't be any non-unary or binary operators"
985
+ )
986
+
987
+
988
+ register_ufuncs(ufunc_db.get_ufuncs(), registry.lower)
989
+
990
+
991
+ @intrinsic
992
+ def _make_dtype_object(typingctx, desc):
993
+ """Given a string or NumberClass description *desc*, returns the dtype object."""
994
+
995
+ def from_nb_type(nb_type):
996
+ return_type = types.DType(nb_type)
997
+ sig = return_type(desc)
998
+
999
+ def codegen(context, builder, signature, args):
1000
+ # All dtype objects are dummy values in LLVM.
1001
+ # They only exist in the type level.
1002
+ return context.get_dummy_value()
1003
+
1004
+ return sig, codegen
1005
+
1006
+ if isinstance(desc, types.Literal):
1007
+ # Convert the str description into np.dtype then to numba type.
1008
+ nb_type = from_dtype(np.dtype(desc.literal_value))
1009
+ return from_nb_type(nb_type)
1010
+ elif isinstance(desc, types.functions.NumberClass):
1011
+ thestr = str(desc.dtype)
1012
+ # Convert the str description into np.dtype then to numba type.
1013
+ nb_type = from_dtype(np.dtype(thestr))
1014
+ return from_nb_type(nb_type)
1015
+
1016
+
1017
+ @overload(np.dtype)
1018
+ def numpy_dtype(desc):
1019
+ """Provide an implementation so that numpy.dtype function can be lowered."""
1020
+ if isinstance(desc, (types.Literal, types.functions.NumberClass)):
1021
+
1022
+ def imp(desc):
1023
+ return _make_dtype_object(desc)
1024
+
1025
+ return imp
1026
+ else:
1027
+ raise errors.NumbaTypeError("unknown dtype descriptor: {}".format(desc))