numba-cuda 0.0.0__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.0.dist-info/METADATA +0 -6
- numba_cuda-0.0.0.dist-info/RECORD +0 -5
- {numba_cuda-0.0.0.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,140 @@
|
|
1
|
+
import math
|
2
|
+
from numba.core import types
|
3
|
+
from numba.core.typing.templates import ConcreteTemplate, signature, Registry
|
4
|
+
|
5
|
+
|
6
|
+
registry = Registry()
|
7
|
+
infer_global = registry.register_global
|
8
|
+
|
9
|
+
|
10
|
+
@infer_global(math.acos)
|
11
|
+
@infer_global(math.acosh)
|
12
|
+
@infer_global(math.asin)
|
13
|
+
@infer_global(math.asinh)
|
14
|
+
@infer_global(math.atan)
|
15
|
+
@infer_global(math.atanh)
|
16
|
+
@infer_global(math.cosh)
|
17
|
+
@infer_global(math.degrees)
|
18
|
+
@infer_global(math.erf)
|
19
|
+
@infer_global(math.erfc)
|
20
|
+
@infer_global(math.expm1)
|
21
|
+
@infer_global(math.gamma)
|
22
|
+
@infer_global(math.lgamma)
|
23
|
+
@infer_global(math.log1p)
|
24
|
+
@infer_global(math.radians)
|
25
|
+
@infer_global(math.sinh)
|
26
|
+
@infer_global(math.tanh)
|
27
|
+
@infer_global(math.tan)
|
28
|
+
class Math_unary(ConcreteTemplate):
|
29
|
+
cases = [
|
30
|
+
signature(types.float64, types.int64),
|
31
|
+
signature(types.float64, types.uint64),
|
32
|
+
signature(types.float32, types.float32),
|
33
|
+
signature(types.float64, types.float64),
|
34
|
+
]
|
35
|
+
|
36
|
+
|
37
|
+
@infer_global(math.sin)
|
38
|
+
@infer_global(math.cos)
|
39
|
+
@infer_global(math.ceil)
|
40
|
+
@infer_global(math.floor)
|
41
|
+
@infer_global(math.sqrt)
|
42
|
+
@infer_global(math.log)
|
43
|
+
@infer_global(math.log2)
|
44
|
+
@infer_global(math.log10)
|
45
|
+
@infer_global(math.exp)
|
46
|
+
@infer_global(math.fabs)
|
47
|
+
@infer_global(math.trunc)
|
48
|
+
class Math_unary_with_fp16(ConcreteTemplate):
|
49
|
+
cases = [
|
50
|
+
signature(types.float64, types.int64),
|
51
|
+
signature(types.float64, types.uint64),
|
52
|
+
signature(types.float32, types.float32),
|
53
|
+
signature(types.float64, types.float64),
|
54
|
+
signature(types.float16, types.float16),
|
55
|
+
]
|
56
|
+
|
57
|
+
|
58
|
+
@infer_global(math.atan2)
|
59
|
+
class Math_atan2(ConcreteTemplate):
|
60
|
+
key = math.atan2
|
61
|
+
cases = [
|
62
|
+
signature(types.float64, types.int64, types.int64),
|
63
|
+
signature(types.float64, types.uint64, types.uint64),
|
64
|
+
signature(types.float32, types.float32, types.float32),
|
65
|
+
signature(types.float64, types.float64, types.float64),
|
66
|
+
]
|
67
|
+
|
68
|
+
|
69
|
+
@infer_global(math.hypot)
|
70
|
+
class Math_hypot(ConcreteTemplate):
|
71
|
+
key = math.hypot
|
72
|
+
cases = [
|
73
|
+
signature(types.float64, types.int64, types.int64),
|
74
|
+
signature(types.float64, types.uint64, types.uint64),
|
75
|
+
signature(types.float32, types.float32, types.float32),
|
76
|
+
signature(types.float64, types.float64, types.float64),
|
77
|
+
]
|
78
|
+
|
79
|
+
|
80
|
+
@infer_global(math.copysign)
|
81
|
+
@infer_global(math.fmod)
|
82
|
+
class Math_binary(ConcreteTemplate):
|
83
|
+
cases = [
|
84
|
+
signature(types.float32, types.float32, types.float32),
|
85
|
+
signature(types.float64, types.float64, types.float64),
|
86
|
+
]
|
87
|
+
|
88
|
+
|
89
|
+
@infer_global(math.remainder)
|
90
|
+
class Math_remainder(ConcreteTemplate):
|
91
|
+
cases = [
|
92
|
+
signature(types.float32, types.float32, types.float32),
|
93
|
+
signature(types.float64, types.float64, types.float64),
|
94
|
+
]
|
95
|
+
|
96
|
+
|
97
|
+
@infer_global(math.pow)
|
98
|
+
class Math_pow(ConcreteTemplate):
|
99
|
+
cases = [
|
100
|
+
signature(types.float32, types.float32, types.float32),
|
101
|
+
signature(types.float64, types.float64, types.float64),
|
102
|
+
signature(types.float32, types.float32, types.int32),
|
103
|
+
signature(types.float64, types.float64, types.int32),
|
104
|
+
]
|
105
|
+
|
106
|
+
|
107
|
+
@infer_global(math.frexp)
|
108
|
+
class Math_frexp(ConcreteTemplate):
|
109
|
+
cases = [
|
110
|
+
signature(types.Tuple([types.float32, types.int32]), types.float32),
|
111
|
+
signature(types.Tuple([types.float64, types.int32]), types.float64),
|
112
|
+
]
|
113
|
+
|
114
|
+
|
115
|
+
@infer_global(math.ldexp)
|
116
|
+
class Math_ldexp(ConcreteTemplate):
|
117
|
+
cases = [
|
118
|
+
signature(types.float32, types.float32, types.int32),
|
119
|
+
signature(types.float64, types.float64, types.int32),
|
120
|
+
]
|
121
|
+
|
122
|
+
|
123
|
+
@infer_global(math.isinf)
|
124
|
+
@infer_global(math.isnan)
|
125
|
+
@infer_global(math.isfinite)
|
126
|
+
class Math_isnan(ConcreteTemplate):
|
127
|
+
cases = [
|
128
|
+
signature(types.boolean, types.int64),
|
129
|
+
signature(types.boolean, types.uint64),
|
130
|
+
signature(types.boolean, types.float32),
|
131
|
+
signature(types.boolean, types.float64),
|
132
|
+
]
|
133
|
+
|
134
|
+
|
135
|
+
@infer_global(math.modf)
|
136
|
+
class Math_modf(ConcreteTemplate):
|
137
|
+
cases = [
|
138
|
+
signature(types.UniTuple(types.float64, 2), types.float64),
|
139
|
+
signature(types.UniTuple(types.float32, 2), types.float32)
|
140
|
+
]
|
@@ -0,0 +1,189 @@
|
|
1
|
+
from warnings import warn
|
2
|
+
from numba.core import types, config, sigutils
|
3
|
+
from numba.core.errors import DeprecationError, NumbaInvalidConfigWarning
|
4
|
+
from numba.cuda.compiler import declare_device_function
|
5
|
+
from numba.cuda.dispatcher import CUDADispatcher
|
6
|
+
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
7
|
+
|
8
|
+
|
9
|
+
_msg_deprecated_signature_arg = ("Deprecated keyword argument `{0}`. "
|
10
|
+
"Signatures should be passed as the first "
|
11
|
+
"positional argument.")
|
12
|
+
|
13
|
+
|
14
|
+
def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
15
|
+
opt=True, lineinfo=False, cache=False, **kws):
|
16
|
+
"""
|
17
|
+
JIT compile a Python function for CUDA GPUs.
|
18
|
+
|
19
|
+
:param func_or_sig: A function to JIT compile, or *signatures* of a
|
20
|
+
function to compile. If a function is supplied, then a
|
21
|
+
:class:`Dispatcher <numba.cuda.dispatcher.CUDADispatcher>` is returned.
|
22
|
+
Otherwise, ``func_or_sig`` may be a signature or a list of signatures,
|
23
|
+
and a function is returned. The returned function accepts another
|
24
|
+
function, which it will compile and then return a :class:`Dispatcher
|
25
|
+
<numba.cuda.dispatcher.CUDADispatcher>`. See :ref:`jit-decorator` for
|
26
|
+
more information about passing signatures.
|
27
|
+
|
28
|
+
.. note:: A kernel cannot have any return value.
|
29
|
+
:param device: Indicates whether this is a device function.
|
30
|
+
:type device: bool
|
31
|
+
:param link: A list of files containing PTX or CUDA C/C++ source to link
|
32
|
+
with the function
|
33
|
+
:type link: list
|
34
|
+
:param debug: If True, check for exceptions thrown when executing the
|
35
|
+
kernel. Since this degrades performance, this should only be used for
|
36
|
+
debugging purposes. If set to True, then ``opt`` should be set to False.
|
37
|
+
Defaults to False. (The default value can be overridden by setting
|
38
|
+
environment variable ``NUMBA_CUDA_DEBUGINFO=1``.)
|
39
|
+
:param fastmath: When True, enables fastmath optimizations as outlined in
|
40
|
+
the :ref:`CUDA Fast Math documentation <cuda-fast-math>`.
|
41
|
+
:param max_registers: Request that the kernel is limited to using at most
|
42
|
+
this number of registers per thread. The limit may not be respected if
|
43
|
+
the ABI requires a greater number of registers than that requested.
|
44
|
+
Useful for increasing occupancy.
|
45
|
+
:param opt: Whether to compile from LLVM IR to PTX with optimization
|
46
|
+
enabled. When ``True``, ``-opt=3`` is passed to NVVM. When
|
47
|
+
``False``, ``-opt=0`` is passed to NVVM. Defaults to ``True``.
|
48
|
+
:type opt: bool
|
49
|
+
:param lineinfo: If True, generate a line mapping between source code and
|
50
|
+
assembly code. This enables inspection of the source code in NVIDIA
|
51
|
+
profiling tools and correlation with program counter sampling.
|
52
|
+
:type lineinfo: bool
|
53
|
+
:param cache: If True, enables the file-based cache for this function.
|
54
|
+
:type cache: bool
|
55
|
+
"""
|
56
|
+
|
57
|
+
if link and config.ENABLE_CUDASIM:
|
58
|
+
raise NotImplementedError('Cannot link PTX in the simulator')
|
59
|
+
|
60
|
+
if kws.get('boundscheck'):
|
61
|
+
raise NotImplementedError("bounds checking is not supported for CUDA")
|
62
|
+
|
63
|
+
if kws.get('argtypes') is not None:
|
64
|
+
msg = _msg_deprecated_signature_arg.format('argtypes')
|
65
|
+
raise DeprecationError(msg)
|
66
|
+
if kws.get('restype') is not None:
|
67
|
+
msg = _msg_deprecated_signature_arg.format('restype')
|
68
|
+
raise DeprecationError(msg)
|
69
|
+
if kws.get('bind') is not None:
|
70
|
+
msg = _msg_deprecated_signature_arg.format('bind')
|
71
|
+
raise DeprecationError(msg)
|
72
|
+
|
73
|
+
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
74
|
+
fastmath = kws.get('fastmath', False)
|
75
|
+
extensions = kws.get('extensions', [])
|
76
|
+
|
77
|
+
if debug and opt:
|
78
|
+
msg = ("debug=True with opt=True (the default) "
|
79
|
+
"is not supported by CUDA. This may result in a crash"
|
80
|
+
" - set debug=False or opt=False.")
|
81
|
+
warn(NumbaInvalidConfigWarning(msg))
|
82
|
+
|
83
|
+
if debug and lineinfo:
|
84
|
+
msg = ("debug and lineinfo are mutually exclusive. Use debug to get "
|
85
|
+
"full debug info (this disables some optimizations), or "
|
86
|
+
"lineinfo for line info only with code generation unaffected.")
|
87
|
+
warn(NumbaInvalidConfigWarning(msg))
|
88
|
+
|
89
|
+
if device and kws.get('link'):
|
90
|
+
raise ValueError("link keyword invalid for device function")
|
91
|
+
|
92
|
+
if sigutils.is_signature(func_or_sig):
|
93
|
+
signatures = [func_or_sig]
|
94
|
+
specialized = True
|
95
|
+
elif isinstance(func_or_sig, list):
|
96
|
+
signatures = func_or_sig
|
97
|
+
specialized = False
|
98
|
+
else:
|
99
|
+
signatures = None
|
100
|
+
|
101
|
+
if signatures is not None:
|
102
|
+
if config.ENABLE_CUDASIM:
|
103
|
+
def jitwrapper(func):
|
104
|
+
return FakeCUDAKernel(func, device=device, fastmath=fastmath)
|
105
|
+
return jitwrapper
|
106
|
+
|
107
|
+
def _jit(func):
|
108
|
+
targetoptions = kws.copy()
|
109
|
+
targetoptions['debug'] = debug
|
110
|
+
targetoptions['lineinfo'] = lineinfo
|
111
|
+
targetoptions['link'] = link
|
112
|
+
targetoptions['opt'] = opt
|
113
|
+
targetoptions['fastmath'] = fastmath
|
114
|
+
targetoptions['device'] = device
|
115
|
+
targetoptions['extensions'] = extensions
|
116
|
+
|
117
|
+
disp = CUDADispatcher(func, targetoptions=targetoptions)
|
118
|
+
|
119
|
+
if cache:
|
120
|
+
disp.enable_caching()
|
121
|
+
|
122
|
+
for sig in signatures:
|
123
|
+
argtypes, restype = sigutils.normalize_signature(sig)
|
124
|
+
|
125
|
+
if restype and not device and restype != types.void:
|
126
|
+
raise TypeError("CUDA kernel must have void return type.")
|
127
|
+
|
128
|
+
if device:
|
129
|
+
from numba.core import typeinfer
|
130
|
+
with typeinfer.register_dispatcher(disp):
|
131
|
+
disp.compile_device(argtypes, restype)
|
132
|
+
else:
|
133
|
+
disp.compile(argtypes)
|
134
|
+
|
135
|
+
disp._specialized = specialized
|
136
|
+
disp.disable_compile()
|
137
|
+
|
138
|
+
return disp
|
139
|
+
|
140
|
+
return _jit
|
141
|
+
else:
|
142
|
+
if func_or_sig is None:
|
143
|
+
if config.ENABLE_CUDASIM:
|
144
|
+
def autojitwrapper(func):
|
145
|
+
return FakeCUDAKernel(func, device=device,
|
146
|
+
fastmath=fastmath)
|
147
|
+
else:
|
148
|
+
def autojitwrapper(func):
|
149
|
+
return jit(func, device=device, debug=debug, opt=opt,
|
150
|
+
lineinfo=lineinfo, link=link, cache=cache, **kws)
|
151
|
+
|
152
|
+
return autojitwrapper
|
153
|
+
# func_or_sig is a function
|
154
|
+
else:
|
155
|
+
if config.ENABLE_CUDASIM:
|
156
|
+
return FakeCUDAKernel(func_or_sig, device=device,
|
157
|
+
fastmath=fastmath)
|
158
|
+
else:
|
159
|
+
targetoptions = kws.copy()
|
160
|
+
targetoptions['debug'] = debug
|
161
|
+
targetoptions['lineinfo'] = lineinfo
|
162
|
+
targetoptions['opt'] = opt
|
163
|
+
targetoptions['link'] = link
|
164
|
+
targetoptions['fastmath'] = fastmath
|
165
|
+
targetoptions['device'] = device
|
166
|
+
targetoptions['extensions'] = extensions
|
167
|
+
disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
|
168
|
+
|
169
|
+
if cache:
|
170
|
+
disp.enable_caching()
|
171
|
+
|
172
|
+
return disp
|
173
|
+
|
174
|
+
|
175
|
+
def declare_device(name, sig):
|
176
|
+
"""
|
177
|
+
Declare the signature of a foreign function. Returns a descriptor that can
|
178
|
+
be used to call the function from a Python kernel.
|
179
|
+
|
180
|
+
:param name: The name of the foreign function.
|
181
|
+
:type name: str
|
182
|
+
:param sig: The Numba signature of the function.
|
183
|
+
"""
|
184
|
+
argtypes, restype = sigutils.normalize_signature(sig)
|
185
|
+
if restype is None:
|
186
|
+
msg = 'Return type must be provided for device declarations'
|
187
|
+
raise TypeError(msg)
|
188
|
+
|
189
|
+
return declare_device_function(name, restype, argtypes)
|
@@ -0,0 +1,33 @@
|
|
1
|
+
from numba.core.descriptors import TargetDescriptor
|
2
|
+
from numba.core.options import TargetOptions
|
3
|
+
from .target import CUDATargetContext, CUDATypingContext
|
4
|
+
|
5
|
+
|
6
|
+
class CUDATargetOptions(TargetOptions):
|
7
|
+
pass
|
8
|
+
|
9
|
+
|
10
|
+
class CUDATarget(TargetDescriptor):
|
11
|
+
def __init__(self, name):
|
12
|
+
self.options = CUDATargetOptions
|
13
|
+
# The typing and target contexts are initialized only when needed -
|
14
|
+
# this prevents an attempt to load CUDA libraries at import time on
|
15
|
+
# systems that might not have them present.
|
16
|
+
self._typingctx = None
|
17
|
+
self._targetctx = None
|
18
|
+
super().__init__(name)
|
19
|
+
|
20
|
+
@property
|
21
|
+
def typing_context(self):
|
22
|
+
if self._typingctx is None:
|
23
|
+
self._typingctx = CUDATypingContext()
|
24
|
+
return self._typingctx
|
25
|
+
|
26
|
+
@property
|
27
|
+
def target_context(self):
|
28
|
+
if self._targetctx is None:
|
29
|
+
self._targetctx = CUDATargetContext(self._typingctx)
|
30
|
+
return self._targetctx
|
31
|
+
|
32
|
+
|
33
|
+
cuda_target = CUDATarget('cuda')
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Re export
|
2
|
+
import sys
|
3
|
+
from numba.cuda import cg
|
4
|
+
from .stubs import (threadIdx, blockIdx, blockDim, gridDim, laneid, warpsize,
|
5
|
+
syncwarp, shared, local, const, atomic,
|
6
|
+
shfl_sync_intrinsic, vote_sync_intrinsic, match_any_sync,
|
7
|
+
match_all_sync, threadfence_block, threadfence_system,
|
8
|
+
threadfence, selp, popc, brev, clz, ffs, fma, cbrt,
|
9
|
+
activemask, lanemask_lt, nanosleep, fp16,
|
10
|
+
_vector_type_stubs)
|
11
|
+
from .intrinsics import (grid, gridsize, syncthreads, syncthreads_and,
|
12
|
+
syncthreads_count, syncthreads_or)
|
13
|
+
from .cudadrv.error import CudaSupportError
|
14
|
+
from numba.cuda.cudadrv.driver import (BaseCUDAMemoryManager,
|
15
|
+
HostOnlyCUDAMemoryManager,
|
16
|
+
GetIpcHandleMixin, MemoryPointer,
|
17
|
+
MappedMemory, PinnedMemory, MemoryInfo,
|
18
|
+
IpcHandle, set_memory_manager)
|
19
|
+
from numba.cuda.cudadrv.runtime import runtime
|
20
|
+
from .cudadrv import nvvm
|
21
|
+
from numba.cuda import initialize
|
22
|
+
from .errors import KernelRuntimeError
|
23
|
+
|
24
|
+
from .decorators import jit, declare_device
|
25
|
+
from .api import *
|
26
|
+
from .api import _auto_device
|
27
|
+
from .args import In, Out, InOut
|
28
|
+
|
29
|
+
from .intrinsic_wrapper import (all_sync, any_sync, eq_sync, ballot_sync,
|
30
|
+
shfl_sync, shfl_up_sync, shfl_down_sync,
|
31
|
+
shfl_xor_sync)
|
32
|
+
|
33
|
+
from .kernels import reduction
|
34
|
+
|
35
|
+
reduce = Reduce = reduction.Reduce
|
36
|
+
|
37
|
+
# Expose vector type constructors and aliases as module level attributes.
|
38
|
+
for vector_type_stub in _vector_type_stubs:
|
39
|
+
setattr(sys.modules[__name__], vector_type_stub.__name__, vector_type_stub)
|
40
|
+
for alias in vector_type_stub.aliases:
|
41
|
+
setattr(sys.modules[__name__], alias, vector_type_stub)
|
42
|
+
del vector_type_stub, _vector_type_stubs
|
43
|
+
|
44
|
+
|
45
|
+
def is_available():
|
46
|
+
"""Returns a boolean to indicate the availability of a CUDA GPU.
|
47
|
+
|
48
|
+
This will initialize the driver if it hasn't been initialized.
|
49
|
+
"""
|
50
|
+
# whilst `driver.is_available` will init the driver itself,
|
51
|
+
# the driver initialization may raise and as a result break
|
52
|
+
# test discovery/orchestration as `cuda.is_available` is often
|
53
|
+
# used as a guard for whether to run a CUDA test, the try/except
|
54
|
+
# below is to handle this case.
|
55
|
+
driver_is_available = False
|
56
|
+
try:
|
57
|
+
driver_is_available = driver.driver.is_available
|
58
|
+
except CudaSupportError:
|
59
|
+
pass
|
60
|
+
|
61
|
+
return driver_is_available and nvvm.is_available()
|
62
|
+
|
63
|
+
|
64
|
+
def is_supported_version():
|
65
|
+
"""Returns True if the CUDA Runtime is a supported version.
|
66
|
+
|
67
|
+
Unsupported versions (e.g. newer versions than those known to Numba)
|
68
|
+
may still work; this function provides a facility to check whether the
|
69
|
+
current Numba version is tested and known to work with the current
|
70
|
+
runtime version. If the current version is unsupported, the caller can
|
71
|
+
decide how to act. Options include:
|
72
|
+
|
73
|
+
- Continuing silently,
|
74
|
+
- Emitting a warning,
|
75
|
+
- Generating an error or otherwise preventing the use of CUDA.
|
76
|
+
"""
|
77
|
+
|
78
|
+
return runtime.is_supported_version()
|
79
|
+
|
80
|
+
|
81
|
+
def cuda_error():
|
82
|
+
"""Returns None if there was no error initializing the CUDA driver.
|
83
|
+
If there was an error initializing the driver, a string describing the
|
84
|
+
error is returned.
|
85
|
+
"""
|
86
|
+
return driver.driver.initialization_error
|
87
|
+
|
88
|
+
|
89
|
+
initialize.initialize_all()
|