numba-cuda 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +232 -113
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_fp16.h +661 -661
- numba_cuda/numba/cuda/cuda_fp16.hpp +3 -3
- numba_cuda/numba/cuda/cuda_paths.py +291 -99
- numba_cuda/numba/cuda/cudadecl.py +125 -69
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +463 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +16 -1
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +138 -29
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +317 -233
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +8 -6
- numba_cuda/numba/cuda/decorators.py +75 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +69 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +1 -1
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +3 -3
- numba_cuda/numba/cuda/intrinsics.py +31 -27
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +139 -102
- numba_cuda/numba/cuda/target.py +64 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +7 -6
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +57 -21
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +31 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +19 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +6 -6
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +31 -25
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +2 -2
- numba_cuda/numba/cuda/vectorizers.py +37 -32
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/METADATA +1 -1
- numba_cuda-0.9.0.dist-info/RECORD +253 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.0.dist-info/RECORD +0 -251
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.0.dist-info → numba_cuda-0.9.0.dist-info}/top_level.txt +0 -0
@@ -136,5 +136,5 @@ class Math_isnan(ConcreteTemplate):
|
|
136
136
|
class Math_modf(ConcreteTemplate):
|
137
137
|
cases = [
|
138
138
|
signature(types.UniTuple(types.float64, 2), types.float64),
|
139
|
-
signature(types.UniTuple(types.float32, 2), types.float32)
|
139
|
+
signature(types.UniTuple(types.float32, 2), types.float32),
|
140
140
|
]
|
@@ -7,7 +7,6 @@ _BYTE_SIZE = 8
|
|
7
7
|
|
8
8
|
|
9
9
|
class CUDADIBuilder(DIBuilder):
|
10
|
-
|
11
10
|
def _var_type(self, lltype, size, datamodel=None):
|
12
11
|
is_bool = False
|
13
12
|
is_grid_group = False
|
@@ -34,11 +33,14 @@ class CUDADIBuilder(DIBuilder):
|
|
34
33
|
elif is_grid_group:
|
35
34
|
ditok = "DW_ATE_unsigned"
|
36
35
|
|
37
|
-
return m.add_debug_info(
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
return m.add_debug_info(
|
37
|
+
"DIBasicType",
|
38
|
+
{
|
39
|
+
"name": name,
|
40
|
+
"size": bitsize,
|
41
|
+
"encoding": ir.DIToken(ditok),
|
42
|
+
},
|
43
|
+
)
|
42
44
|
|
43
45
|
# For other cases, use upstream Numba implementation
|
44
46
|
return super()._var_type(lltype, size, datamodel=datamodel)
|
@@ -6,13 +6,24 @@ from numba.cuda.dispatcher import CUDADispatcher
|
|
6
6
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
7
7
|
|
8
8
|
|
9
|
-
_msg_deprecated_signature_arg = (
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
_msg_deprecated_signature_arg = (
|
10
|
+
"Deprecated keyword argument `{0}`. "
|
11
|
+
"Signatures should be passed as the first "
|
12
|
+
"positional argument."
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
def jit(
|
17
|
+
func_or_sig=None,
|
18
|
+
device=False,
|
19
|
+
inline=False,
|
20
|
+
link=[],
|
21
|
+
debug=None,
|
22
|
+
opt=None,
|
23
|
+
lineinfo=False,
|
24
|
+
cache=False,
|
25
|
+
**kws,
|
26
|
+
):
|
16
27
|
"""
|
17
28
|
JIT compile a Python function for CUDA GPUs.
|
18
29
|
|
@@ -55,39 +66,43 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
55
66
|
"""
|
56
67
|
|
57
68
|
if link and config.ENABLE_CUDASIM:
|
58
|
-
raise NotImplementedError(
|
69
|
+
raise NotImplementedError("Cannot link PTX in the simulator")
|
59
70
|
|
60
|
-
if kws.get(
|
71
|
+
if kws.get("boundscheck"):
|
61
72
|
raise NotImplementedError("bounds checking is not supported for CUDA")
|
62
73
|
|
63
|
-
if kws.get(
|
64
|
-
msg = _msg_deprecated_signature_arg.format(
|
74
|
+
if kws.get("argtypes") is not None:
|
75
|
+
msg = _msg_deprecated_signature_arg.format("argtypes")
|
65
76
|
raise DeprecationError(msg)
|
66
|
-
if kws.get(
|
67
|
-
msg = _msg_deprecated_signature_arg.format(
|
77
|
+
if kws.get("restype") is not None:
|
78
|
+
msg = _msg_deprecated_signature_arg.format("restype")
|
68
79
|
raise DeprecationError(msg)
|
69
|
-
if kws.get(
|
70
|
-
msg = _msg_deprecated_signature_arg.format(
|
80
|
+
if kws.get("bind") is not None:
|
81
|
+
msg = _msg_deprecated_signature_arg.format("bind")
|
71
82
|
raise DeprecationError(msg)
|
72
83
|
|
73
84
|
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
74
85
|
opt = (config.OPT != 0) if opt is None else opt
|
75
|
-
fastmath = kws.get(
|
76
|
-
extensions = kws.get(
|
86
|
+
fastmath = kws.get("fastmath", False)
|
87
|
+
extensions = kws.get("extensions", [])
|
77
88
|
|
78
89
|
if debug and opt:
|
79
|
-
msg = (
|
80
|
-
|
81
|
-
|
90
|
+
msg = (
|
91
|
+
"debug=True with opt=True "
|
92
|
+
"is not supported by CUDA. This may result in a crash"
|
93
|
+
" - set debug=False or opt=False."
|
94
|
+
)
|
82
95
|
warn(NumbaInvalidConfigWarning(msg))
|
83
96
|
|
84
97
|
if debug and lineinfo:
|
85
|
-
msg = (
|
86
|
-
|
87
|
-
|
98
|
+
msg = (
|
99
|
+
"debug and lineinfo are mutually exclusive. Use debug to get "
|
100
|
+
"full debug info (this disables some optimizations), or "
|
101
|
+
"lineinfo for line info only with code generation unaffected."
|
102
|
+
)
|
88
103
|
warn(NumbaInvalidConfigWarning(msg))
|
89
104
|
|
90
|
-
if device and kws.get(
|
105
|
+
if device and kws.get("link"):
|
91
106
|
raise ValueError("link keyword invalid for device function")
|
92
107
|
|
93
108
|
if sigutils.is_signature(func_or_sig):
|
@@ -101,19 +116,21 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
101
116
|
|
102
117
|
if signatures is not None:
|
103
118
|
if config.ENABLE_CUDASIM:
|
119
|
+
|
104
120
|
def jitwrapper(func):
|
105
121
|
return FakeCUDAKernel(func, device=device, fastmath=fastmath)
|
122
|
+
|
106
123
|
return jitwrapper
|
107
124
|
|
108
125
|
def _jit(func):
|
109
126
|
targetoptions = kws.copy()
|
110
|
-
targetoptions[
|
111
|
-
targetoptions[
|
112
|
-
targetoptions[
|
113
|
-
targetoptions[
|
114
|
-
targetoptions[
|
115
|
-
targetoptions[
|
116
|
-
targetoptions[
|
127
|
+
targetoptions["debug"] = debug
|
128
|
+
targetoptions["lineinfo"] = lineinfo
|
129
|
+
targetoptions["link"] = link
|
130
|
+
targetoptions["opt"] = opt
|
131
|
+
targetoptions["fastmath"] = fastmath
|
132
|
+
targetoptions["device"] = device
|
133
|
+
targetoptions["extensions"] = extensions
|
117
134
|
|
118
135
|
disp = CUDADispatcher(func, targetoptions=targetoptions)
|
119
136
|
|
@@ -128,6 +145,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
128
145
|
|
129
146
|
if device:
|
130
147
|
from numba.core import typeinfer
|
148
|
+
|
131
149
|
with typeinfer.register_dispatcher(disp):
|
132
150
|
disp.compile_device(argtypes, restype)
|
133
151
|
else:
|
@@ -142,29 +160,41 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
142
160
|
else:
|
143
161
|
if func_or_sig is None:
|
144
162
|
if config.ENABLE_CUDASIM:
|
163
|
+
|
145
164
|
def autojitwrapper(func):
|
146
|
-
return FakeCUDAKernel(
|
147
|
-
|
165
|
+
return FakeCUDAKernel(
|
166
|
+
func, device=device, fastmath=fastmath
|
167
|
+
)
|
148
168
|
else:
|
169
|
+
|
149
170
|
def autojitwrapper(func):
|
150
|
-
return jit(
|
151
|
-
|
171
|
+
return jit(
|
172
|
+
func,
|
173
|
+
device=device,
|
174
|
+
debug=debug,
|
175
|
+
opt=opt,
|
176
|
+
lineinfo=lineinfo,
|
177
|
+
link=link,
|
178
|
+
cache=cache,
|
179
|
+
**kws,
|
180
|
+
)
|
152
181
|
|
153
182
|
return autojitwrapper
|
154
183
|
# func_or_sig is a function
|
155
184
|
else:
|
156
185
|
if config.ENABLE_CUDASIM:
|
157
|
-
return FakeCUDAKernel(
|
158
|
-
|
186
|
+
return FakeCUDAKernel(
|
187
|
+
func_or_sig, device=device, fastmath=fastmath
|
188
|
+
)
|
159
189
|
else:
|
160
190
|
targetoptions = kws.copy()
|
161
|
-
targetoptions[
|
162
|
-
targetoptions[
|
163
|
-
targetoptions[
|
164
|
-
targetoptions[
|
165
|
-
targetoptions[
|
166
|
-
targetoptions[
|
167
|
-
targetoptions[
|
191
|
+
targetoptions["debug"] = debug
|
192
|
+
targetoptions["lineinfo"] = lineinfo
|
193
|
+
targetoptions["opt"] = opt
|
194
|
+
targetoptions["link"] = link
|
195
|
+
targetoptions["fastmath"] = fastmath
|
196
|
+
targetoptions["device"] = device
|
197
|
+
targetoptions["extensions"] = extensions
|
168
198
|
disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
|
169
199
|
|
170
200
|
if cache:
|
@@ -191,7 +221,7 @@ def declare_device(name, sig, link=None):
|
|
191
221
|
|
192
222
|
argtypes, restype = sigutils.normalize_signature(sig)
|
193
223
|
if restype is None:
|
194
|
-
msg =
|
224
|
+
msg = "Return type must be provided for device declarations"
|
195
225
|
raise TypeError(msg)
|
196
226
|
|
197
227
|
return declare_device_function(name, restype, argtypes, link)
|
@@ -1,21 +1,58 @@
|
|
1
1
|
# Re export
|
2
2
|
import sys
|
3
3
|
from numba.cuda import cg
|
4
|
-
from .stubs import (
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
4
|
+
from .stubs import (
|
5
|
+
threadIdx,
|
6
|
+
blockIdx,
|
7
|
+
blockDim,
|
8
|
+
gridDim,
|
9
|
+
laneid,
|
10
|
+
warpsize,
|
11
|
+
syncwarp,
|
12
|
+
shared,
|
13
|
+
local,
|
14
|
+
const,
|
15
|
+
atomic,
|
16
|
+
shfl_sync_intrinsic,
|
17
|
+
vote_sync_intrinsic,
|
18
|
+
match_any_sync,
|
19
|
+
match_all_sync,
|
20
|
+
threadfence_block,
|
21
|
+
threadfence_system,
|
22
|
+
threadfence,
|
23
|
+
selp,
|
24
|
+
popc,
|
25
|
+
brev,
|
26
|
+
clz,
|
27
|
+
ffs,
|
28
|
+
fma,
|
29
|
+
cbrt,
|
30
|
+
activemask,
|
31
|
+
lanemask_lt,
|
32
|
+
nanosleep,
|
33
|
+
fp16,
|
34
|
+
_vector_type_stubs,
|
35
|
+
)
|
36
|
+
from .intrinsics import (
|
37
|
+
grid,
|
38
|
+
gridsize,
|
39
|
+
syncthreads,
|
40
|
+
syncthreads_and,
|
41
|
+
syncthreads_count,
|
42
|
+
syncthreads_or,
|
43
|
+
)
|
13
44
|
from .cudadrv.error import CudaSupportError
|
14
|
-
from numba.cuda.cudadrv.driver import (
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
45
|
+
from numba.cuda.cudadrv.driver import (
|
46
|
+
BaseCUDAMemoryManager,
|
47
|
+
HostOnlyCUDAMemoryManager,
|
48
|
+
GetIpcHandleMixin,
|
49
|
+
MemoryPointer,
|
50
|
+
MappedMemory,
|
51
|
+
PinnedMemory,
|
52
|
+
MemoryInfo,
|
53
|
+
IpcHandle,
|
54
|
+
set_memory_manager,
|
55
|
+
)
|
19
56
|
from numba.cuda.cudadrv.runtime import runtime
|
20
57
|
from .cudadrv import nvvm
|
21
58
|
from numba.cuda import initialize
|
@@ -26,13 +63,27 @@ from .api import *
|
|
26
63
|
from .api import _auto_device
|
27
64
|
from .args import In, Out, InOut
|
28
65
|
|
29
|
-
from .intrinsic_wrapper import (
|
30
|
-
|
31
|
-
|
66
|
+
from .intrinsic_wrapper import (
|
67
|
+
all_sync,
|
68
|
+
any_sync,
|
69
|
+
eq_sync,
|
70
|
+
ballot_sync,
|
71
|
+
shfl_sync,
|
72
|
+
shfl_up_sync,
|
73
|
+
shfl_down_sync,
|
74
|
+
shfl_xor_sync,
|
75
|
+
)
|
32
76
|
|
33
77
|
from .kernels import reduction
|
34
78
|
from numba.cuda.cudadrv.linkable_code import (
|
35
|
-
Archive,
|
79
|
+
Archive,
|
80
|
+
CUSource,
|
81
|
+
Cubin,
|
82
|
+
Fatbin,
|
83
|
+
LinkableCode,
|
84
|
+
LTOIR,
|
85
|
+
Object,
|
86
|
+
PTXSource,
|
36
87
|
)
|
37
88
|
|
38
89
|
reduce = Reduce = reduction.Reduce
|