PyPI - numba-cuda - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl - Mend

numba-cuda 0.0.1py3-none-any.whl → 0.0.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

_numba_cuda_redirector.pth +1 -0
_numba_cuda_redirector.py +74 -0
numba_cuda/VERSION +1 -0
numba_cuda/__init__.py +5 -0
numba_cuda/_version.py +19 -0
numba_cuda/numba/cuda/__init__.py +22 -0
numba_cuda/numba/cuda/api.py +526 -0
numba_cuda/numba/cuda/api_util.py +30 -0
numba_cuda/numba/cuda/args.py +77 -0
numba_cuda/numba/cuda/cg.py +62 -0
numba_cuda/numba/cuda/codegen.py +378 -0
numba_cuda/numba/cuda/compiler.py +422 -0
numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
numba_cuda/numba/cuda/cuda_paths.py +258 -0
numba_cuda/numba/cuda/cudadecl.py +806 -0
numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
numba_cuda/numba/cuda/cudadrv/error.py +36 -0
numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
numba_cuda/numba/cuda/cudaimpl.py +1055 -0
numba_cuda/numba/cuda/cudamath.py +140 -0
numba_cuda/numba/cuda/decorators.py +189 -0
numba_cuda/numba/cuda/descriptor.py +33 -0
numba_cuda/numba/cuda/device_init.py +89 -0
numba_cuda/numba/cuda/deviceufunc.py +908 -0
numba_cuda/numba/cuda/dispatcher.py +1057 -0
numba_cuda/numba/cuda/errors.py +59 -0
numba_cuda/numba/cuda/extending.py +7 -0
numba_cuda/numba/cuda/initialize.py +13 -0
numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
numba_cuda/numba/cuda/intrinsics.py +198 -0
numba_cuda/numba/cuda/kernels/__init__.py +0 -0
numba_cuda/numba/cuda/kernels/reduction.py +262 -0
numba_cuda/numba/cuda/kernels/transpose.py +65 -0
numba_cuda/numba/cuda/libdevice.py +3382 -0
numba_cuda/numba/cuda/libdevicedecl.py +17 -0
numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
numba_cuda/numba/cuda/mathimpl.py +448 -0
numba_cuda/numba/cuda/models.py +48 -0
numba_cuda/numba/cuda/nvvmutils.py +235 -0
numba_cuda/numba/cuda/printimpl.py +86 -0
numba_cuda/numba/cuda/random.py +292 -0
numba_cuda/numba/cuda/simulator/__init__.py +38 -0
numba_cuda/numba/cuda/simulator/api.py +110 -0
numba_cuda/numba/cuda/simulator/compiler.py +9 -0
numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
numba_cuda/numba/cuda/simulator/kernel.py +308 -0
numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
numba_cuda/numba/cuda/simulator/reduction.py +15 -0
numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
numba_cuda/numba/cuda/simulator_init.py +17 -0
numba_cuda/numba/cuda/stubs.py +902 -0
numba_cuda/numba/cuda/target.py +440 -0
numba_cuda/numba/cuda/testing.py +202 -0
numba_cuda/numba/cuda/tests/__init__.py +58 -0
numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
numba_cuda/numba/cuda/tests/data/error.cu +7 -0
numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
numba_cuda/numba/cuda/types.py +37 -0
numba_cuda/numba/cuda/ufuncs.py +662 -0
numba_cuda/numba/cuda/vector_types.py +209 -0
numba_cuda/numba/cuda/vectorizers.py +252 -0
numba_cuda-0.0.13.dist-info/LICENSE +25 -0
numba_cuda-0.0.13.dist-info/METADATA +69 -0
numba_cuda-0.0.13.dist-info/RECORD +231 -0
{numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/WHEEL +1 -1
numba_cuda-0.0.1.dist-info/METADATA +0 -10
numba_cuda-0.0.1.dist-info/RECORD +0 -5
{numba_cuda-0.0.1.dist-info → numba_cuda-0.0.13.dist-info}/top_level.txt +0 -0

numba_cuda/numba/cuda/libdevicefuncs.py ADDED Viewed

@@ -0,0 +1,1057 @@
+from collections import namedtuple
+from textwrap import indent
+from numba.types import float32, float64, int16, int32, int64, void, Tuple
+from numba.core.typing.templates import signature
+arg = namedtuple("arg", ("name", "ty", "is_ptr"))
+functions = {
+    "__nv_abs": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
+    "__nv_acos": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_acosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_acosh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_acoshf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_asin": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_asinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_asinh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_asinhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_atan": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_atan2": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_atan2f": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_atanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_atanh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_atanhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_brev": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
+    "__nv_brevll": (int64, [arg(name="x", ty=int64, is_ptr=False)]),
+    "__nv_byte_perm": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+            arg(name="z", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_cbrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_cbrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_ceil": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_ceilf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_clz": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
+    "__nv_clzll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
+    "__nv_copysign": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_copysignf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_cos": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_cosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_cosh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_coshf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_cospi": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_cospif": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_dadd_rd": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dadd_rn": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dadd_ru": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dadd_rz": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_ddiv_rd": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_ddiv_rn": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_ddiv_ru": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_ddiv_rz": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dmul_rd": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dmul_rn": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dmul_ru": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_dmul_rz": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_double2float_rd": (
+        float32,
+        [arg(name="d", ty=float64, is_ptr=False)],
+    ),
+    "__nv_double2float_rn": (
+        float32,
+        [arg(name="d", ty=float64, is_ptr=False)],
+    ),
+    "__nv_double2float_ru": (
+        float32,
+        [arg(name="d", ty=float64, is_ptr=False)],
+    ),
+    "__nv_double2float_rz": (
+        float32,
+        [arg(name="d", ty=float64, is_ptr=False)],
+    ),
+    "__nv_double2hiint": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2int_rd": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2int_rn": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2int_ru": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2int_rz": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2ll_rd": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2ll_rn": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2ll_ru": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2ll_rz": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2loint": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2uint_rd": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2uint_rn": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2uint_ru": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2uint_rz": (int32, [arg(name="d", ty=float64, is_ptr=False)]),
+    "__nv_double2ull_rd": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2ull_rn": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2ull_ru": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double2ull_rz": (int64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_double_as_longlong": (
+        int64,
+        [arg(name="x", ty=float64, is_ptr=False)],
+    ),
+    "__nv_drcp_rd": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_drcp_rn": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_drcp_ru": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_drcp_rz": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_dsqrt_rd": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_dsqrt_rn": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_dsqrt_ru": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_dsqrt_rz": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_erf": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_erfc": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_erfcf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_erfcinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_erfcinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_erfcx": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_erfcxf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_erff": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_erfinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_erfinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_exp": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_exp10": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_exp10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_exp2": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_exp2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_expf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_expm1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_expm1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fabs": (float64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_fabsf": (float32, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_fadd_rd": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fadd_rn": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fadd_ru": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fadd_rz": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fast_cosf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_exp10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_expf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_fdividef": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fast_log10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_log2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_logf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_powf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fast_sincosf": (
+        void,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="sptr", ty=float32, is_ptr=True),
+            arg(name="cptr", ty=float32, is_ptr=True),
+        ],
+    ),
+    "__nv_fast_sinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fast_tanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fdim": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fdimf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fdiv_rd": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fdiv_rn": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fdiv_ru": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fdiv_rz": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_ffs": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
+    "__nv_ffsll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
+    "__nv_finitef": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_float2half_rn": (int16, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2int_rd": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2int_rn": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2int_ru": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2int_rz": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2ll_rd": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2ll_rn": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2ll_ru": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2ll_rz": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2uint_rd": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2uint_rn": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2uint_ru": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2uint_rz": (int32, [arg(name="in", ty=float32, is_ptr=False)]),
+    "__nv_float2ull_rd": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2ull_rn": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2ull_ru": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float2ull_rz": (int64, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_float_as_int": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_floor": (float64, [arg(name="f", ty=float64, is_ptr=False)]),
+    "__nv_floorf": (float32, [arg(name="f", ty=float32, is_ptr=False)]),
+    "__nv_fma": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+            arg(name="z", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fma_rd": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+            arg(name="z", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fma_rn": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+            arg(name="z", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fma_ru": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+            arg(name="z", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fma_rz": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+            arg(name="z", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fmaf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+            arg(name="z", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmaf_rd": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+            arg(name="z", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmaf_rn": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+            arg(name="z", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmaf_ru": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+            arg(name="z", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmaf_rz": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+            arg(name="z", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmax": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fmaxf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmin": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fminf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmod": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_fmodf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmul_rd": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmul_rn": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmul_ru": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fmul_rz": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_frcp_rd": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_frcp_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_frcp_ru": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_frcp_rz": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_frexp": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="b", ty=int32, is_ptr=True),
+        ],
+    ),
+    "__nv_frexpf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="b", ty=int32, is_ptr=True),
+        ],
+    ),
+    "__nv_frsqrt_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fsqrt_rd": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fsqrt_rn": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fsqrt_ru": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fsqrt_rz": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_fsub_rd": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fsub_rn": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fsub_ru": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_fsub_rz": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_hadd": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_half2float": (float32, [arg(name="h", ty=int16, is_ptr=False)]),
+    "__nv_hiloint2double": (
+        float64,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_hypot": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_hypotf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_ilogb": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_ilogbf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_int2double_rn": (float64, [arg(name="i", ty=int32, is_ptr=False)]),
+    "__nv_int2float_rd": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_int2float_rn": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_int2float_ru": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_int2float_rz": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_int_as_float": (float32, [arg(name="x", ty=int32, is_ptr=False)]),
+    "__nv_isfinited": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_isinfd": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_isinff": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_isnand": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_isnanf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_j0": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_j0f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_j1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_j1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_jn": (
+        float64,
+        [
+            arg(name="n", ty=int32, is_ptr=False),
+            arg(name="x", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_jnf": (
+        float32,
+        [
+            arg(name="n", ty=int32, is_ptr=False),
+            arg(name="x", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_ldexp": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_ldexpf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_lgamma": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_lgammaf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_ll2double_rd": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2double_rn": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2double_ru": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2double_rz": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2float_rd": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2float_rn": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2float_ru": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ll2float_rz": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_llabs": (int64, [arg(name="x", ty=int64, is_ptr=False)]),
+    "__nv_llmax": (
+        int64,
+        [
+            arg(name="x", ty=int64, is_ptr=False),
+            arg(name="y", ty=int64, is_ptr=False),
+        ],
+    ),
+    "__nv_llmin": (
+        int64,
+        [
+            arg(name="x", ty=int64, is_ptr=False),
+            arg(name="y", ty=int64, is_ptr=False),
+        ],
+    ),
+    "__nv_llrint": (int64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_llrintf": (int64, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_llround": (int64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_llroundf": (int64, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_log": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_log10": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_log10f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_log1p": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_log1pf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_log2": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_log2f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_logb": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_logbf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_logf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_longlong_as_double": (
+        float64,
+        [arg(name="x", ty=int64, is_ptr=False)],
+    ),
+    "__nv_max": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_min": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_modf": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="b", ty=float64, is_ptr=True),
+        ],
+    ),
+    "__nv_modff": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="b", ty=float32, is_ptr=True),
+        ],
+    ),
+    "__nv_mul24": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_mul64hi": (
+        int64,
+        [
+            arg(name="x", ty=int64, is_ptr=False),
+            arg(name="y", ty=int64, is_ptr=False),
+        ],
+    ),
+    "__nv_mulhi": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    # __nv_nan and __nv_nanf are excluded - they return a representation of a
+    # quiet NaN, but the argument they take seems to be undocumented, and
+    # follows a strange form - it is not an output like every other pointer
+    # argument. If a NaN is required, one can be obtained in CUDA Python by
+    # other means, e.g. `math.nan`. They are left in this list for completeness
+    # / reference.
+    # "__nv_nan": (float64, [arg(name="tagp", ty=int8, is_ptr=True)]),
+    # "__nv_nanf": (float32, [arg(name="tagp", ty=int8, is_ptr=True)]),
+    "__nv_nearbyint": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_nearbyintf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_nextafter": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_nextafterf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_normcdf": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_normcdff": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_normcdfinv": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_normcdfinvf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_popc": (int32, [arg(name="x", ty=int32, is_ptr=False)]),
+    "__nv_popcll": (int32, [arg(name="x", ty=int64, is_ptr=False)]),
+    "__nv_pow": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_powf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_powi": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_powif": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_rcbrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_rcbrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_remainder": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_remainderf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+        ],
+    ),
+    "__nv_remquo": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=float64, is_ptr=False),
+            arg(name="c", ty=int32, is_ptr=True),
+        ],
+    ),
+    "__nv_remquof": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=float32, is_ptr=False),
+            arg(name="quo", ty=int32, is_ptr=True),
+        ],
+    ),
+    "__nv_rhadd": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_rint": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_rintf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_round": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_roundf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_rsqrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_rsqrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_sad": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+            arg(name="z", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_saturatef": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_scalbn": (
+        float64,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_scalbnf": (
+        float32,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_signbitd": (int32, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_signbitf": (int32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_sin": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_sincos": (
+        void,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="sptr", ty=float64, is_ptr=True),
+            arg(name="cptr", ty=float64, is_ptr=True),
+        ],
+    ),
+    "__nv_sincosf": (
+        void,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="sptr", ty=float32, is_ptr=True),
+            arg(name="cptr", ty=float32, is_ptr=True),
+        ],
+    ),
+    "__nv_sincospi": (
+        void,
+        [
+            arg(name="x", ty=float64, is_ptr=False),
+            arg(name="sptr", ty=float64, is_ptr=True),
+            arg(name="cptr", ty=float64, is_ptr=True),
+        ],
+    ),
+    "__nv_sincospif": (
+        void,
+        [
+            arg(name="x", ty=float32, is_ptr=False),
+            arg(name="sptr", ty=float32, is_ptr=True),
+            arg(name="cptr", ty=float32, is_ptr=True),
+        ],
+    ),
+    "__nv_sinf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_sinh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_sinhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_sinpi": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_sinpif": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_sqrt": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_sqrtf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_tan": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_tanf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_tanh": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_tanhf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_tgamma": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_tgammaf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_trunc": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_truncf": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_uhadd": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_uint2double_rn": (float64, [arg(name="i", ty=int32, is_ptr=False)]),
+    "__nv_uint2float_rd": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_uint2float_rn": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_uint2float_ru": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_uint2float_rz": (float32, [arg(name="in", ty=int32, is_ptr=False)]),
+    "__nv_ull2double_rd": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2double_rn": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2double_ru": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2double_rz": (float64, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2float_rd": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2float_rn": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2float_ru": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ull2float_rz": (float32, [arg(name="l", ty=int64, is_ptr=False)]),
+    "__nv_ullmax": (
+        int64,
+        [
+            arg(name="x", ty=int64, is_ptr=False),
+            arg(name="y", ty=int64, is_ptr=False),
+        ],
+    ),
+    "__nv_ullmin": (
+        int64,
+        [
+            arg(name="x", ty=int64, is_ptr=False),
+            arg(name="y", ty=int64, is_ptr=False),
+        ],
+    ),
+    "__nv_umax": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_umin": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_umul24": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_umul64hi": (
+        int64,
+        [
+            arg(name="x", ty=int64, is_ptr=False),
+            arg(name="y", ty=int64, is_ptr=False),
+        ],
+    ),
+    "__nv_umulhi": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_urhadd": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_usad": (
+        int32,
+        [
+            arg(name="x", ty=int32, is_ptr=False),
+            arg(name="y", ty=int32, is_ptr=False),
+            arg(name="z", ty=int32, is_ptr=False),
+        ],
+    ),
+    "__nv_y0": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_y0f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_y1": (float64, [arg(name="x", ty=float64, is_ptr=False)]),
+    "__nv_y1f": (float32, [arg(name="x", ty=float32, is_ptr=False)]),
+    "__nv_yn": (
+        float64,
+        [
+            arg(name="n", ty=int32, is_ptr=False),
+            arg(name="x", ty=float64, is_ptr=False),
+        ],
+    ),
+    "__nv_ynf": (
+        float32,
+        [
+            arg(name="n", ty=int32, is_ptr=False),
+            arg(name="x", ty=float32, is_ptr=False),
+        ],
+    ),
+}
+def create_signature(retty, args):
+    """
+    Given the return type and arguments for a libdevice function, return the
+    signature of the stub function used to call it from CUDA Python.
+    """
+    # Any pointer arguments should be part of the return type.
+    return_types = [arg.ty for arg in args if arg.is_ptr]
+    # If the return type is void, there is no point adding it to the list of
+    # return types.
+    if retty != void:
+        return_types.insert(0, retty)
+    if len(return_types) > 1:
+        retty = Tuple(return_types)
+    else:
+        retty = return_types[0]
+    argtypes = [arg.ty for arg in args if not arg.is_ptr]
+    return signature(retty, *argtypes)
+# The following code generates the stubs for libdevice functions.
+#
+# Stubs can be regenerated (e.g. if the functions dict above is modified) with:
+#
+# python -c "from numba.cuda.libdevicefuncs import generate_stubs; \
+#            generate_stubs()" > numba/cuda/libdevice.py
+docstring_template = """
+See https://docs.nvidia.com/cuda/libdevice-users-guide/{func}.html
+{param_types}
+:rtype: {retty}
+"""
+param_template = """\
+:param {a.name}: Argument.
+:type {a.name}: {a.ty}"""
+def generate_stubs():
+    for name, (retty, args) in functions.items():
+        # Some libdevice functions have arguments called `in`, which causes a
+        # syntax error in Python, so we rename these to `x`.
+        def argname(arg):
+            if arg.name == "in":
+                return "x"
+            else:
+                return arg.name
+        argnames = [argname(a) for a in args if not a.is_ptr]
+        argstr = ", ".join(argnames)
+        signature = create_signature(retty, args)
+        param_types = "\n".join(
+            [param_template.format(a=a) for a in args if not a.is_ptr]
+        )
+        docstring = docstring_template.format(
+            param_types=param_types, retty=signature.return_type, func=name
+        )
+        docstring = indent(docstring, "    ")
+        print(f'def {name[5:]}({argstr}):\n    """{docstring}"""\n\n')

numba-cuda 0.0.1__py3-none-any.whl → 0.0.13__py3-none-any.whl

numba-cuda 0.0.1py3-none-any.whl → 0.0.13py3-none-any.whl