numba-cuda 0.8.1__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.py +17 -13
- numba_cuda/VERSION +1 -1
- numba_cuda/_version.py +4 -1
- numba_cuda/numba/cuda/__init__.py +6 -2
- numba_cuda/numba/cuda/api.py +129 -86
- numba_cuda/numba/cuda/api_util.py +3 -3
- numba_cuda/numba/cuda/args.py +12 -16
- numba_cuda/numba/cuda/cg.py +6 -6
- numba_cuda/numba/cuda/codegen.py +74 -43
- numba_cuda/numba/cuda/compiler.py +246 -114
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +1 -2
- numba_cuda/numba/cuda/cuda_bf16.py +5155 -0
- numba_cuda/numba/cuda/cuda_paths.py +293 -99
- numba_cuda/numba/cuda/cudadecl.py +93 -79
- numba_cuda/numba/cuda/cudadrv/__init__.py +3 -1
- numba_cuda/numba/cuda/cudadrv/devicearray.py +185 -135
- numba_cuda/numba/cuda/cudadrv/devices.py +16 -11
- numba_cuda/numba/cuda/cudadrv/driver.py +460 -297
- numba_cuda/numba/cuda/cudadrv/drvapi.py +241 -207
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +66 -54
- numba_cuda/numba/cuda/cudadrv/enums.py +1 -1
- numba_cuda/numba/cuda/cudadrv/error.py +6 -2
- numba_cuda/numba/cuda/cudadrv/libs.py +67 -63
- numba_cuda/numba/cuda/cudadrv/linkable_code.py +27 -3
- numba_cuda/numba/cuda/cudadrv/mappings.py +16 -14
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +146 -30
- numba_cuda/numba/cuda/cudadrv/nvvm.py +296 -161
- numba_cuda/numba/cuda/cudadrv/rtapi.py +1 -1
- numba_cuda/numba/cuda/cudadrv/runtime.py +20 -8
- numba_cuda/numba/cuda/cudaimpl.py +296 -275
- numba_cuda/numba/cuda/cudamath.py +1 -1
- numba_cuda/numba/cuda/debuginfo.py +99 -7
- numba_cuda/numba/cuda/decorators.py +87 -45
- numba_cuda/numba/cuda/descriptor.py +1 -1
- numba_cuda/numba/cuda/device_init.py +68 -18
- numba_cuda/numba/cuda/deviceufunc.py +143 -98
- numba_cuda/numba/cuda/dispatcher.py +300 -213
- numba_cuda/numba/cuda/errors.py +13 -10
- numba_cuda/numba/cuda/extending.py +55 -1
- numba_cuda/numba/cuda/include/11/cuda_bf16.h +3749 -0
- numba_cuda/numba/cuda/include/11/cuda_bf16.hpp +2683 -0
- numba_cuda/numba/cuda/{cuda_fp16.h → include/11/cuda_fp16.h} +1090 -927
- numba_cuda/numba/cuda/{cuda_fp16.hpp → include/11/cuda_fp16.hpp} +468 -319
- numba_cuda/numba/cuda/include/12/cuda_bf16.h +5118 -0
- numba_cuda/numba/cuda/include/12/cuda_bf16.hpp +3865 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.h +5363 -0
- numba_cuda/numba/cuda/include/12/cuda_fp16.hpp +3483 -0
- numba_cuda/numba/cuda/initialize.py +5 -3
- numba_cuda/numba/cuda/intrinsic_wrapper.py +0 -39
- numba_cuda/numba/cuda/intrinsics.py +203 -28
- numba_cuda/numba/cuda/kernels/reduction.py +13 -13
- numba_cuda/numba/cuda/kernels/transpose.py +3 -6
- numba_cuda/numba/cuda/libdevice.py +317 -317
- numba_cuda/numba/cuda/libdeviceimpl.py +3 -2
- numba_cuda/numba/cuda/locks.py +16 -0
- numba_cuda/numba/cuda/lowering.py +43 -0
- numba_cuda/numba/cuda/mathimpl.py +62 -57
- numba_cuda/numba/cuda/models.py +1 -5
- numba_cuda/numba/cuda/nvvmutils.py +103 -88
- numba_cuda/numba/cuda/printimpl.py +9 -5
- numba_cuda/numba/cuda/random.py +46 -36
- numba_cuda/numba/cuda/reshape_funcs.cu +1 -1
- numba_cuda/numba/cuda/runtime/__init__.py +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cu +1 -1
- numba_cuda/numba/cuda/runtime/memsys.cuh +1 -1
- numba_cuda/numba/cuda/runtime/nrt.cu +3 -3
- numba_cuda/numba/cuda/runtime/nrt.py +48 -43
- numba_cuda/numba/cuda/simulator/__init__.py +22 -12
- numba_cuda/numba/cuda/simulator/api.py +38 -22
- numba_cuda/numba/cuda/simulator/compiler.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +8 -2
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +63 -55
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +13 -11
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +5 -5
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +2 -2
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +1 -1
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +3 -3
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +3 -3
- numba_cuda/numba/cuda/simulator/kernel.py +43 -34
- numba_cuda/numba/cuda/simulator/kernelapi.py +31 -26
- numba_cuda/numba/cuda/simulator/reduction.py +1 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +13 -9
- numba_cuda/numba/cuda/simulator_init.py +2 -4
- numba_cuda/numba/cuda/stubs.py +134 -108
- numba_cuda/numba/cuda/target.py +92 -47
- numba_cuda/numba/cuda/testing.py +24 -19
- numba_cuda/numba/cuda/tests/__init__.py +14 -12
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +16 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +7 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +73 -54
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +48 -50
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +47 -29
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +3 -3
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +19 -19
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +108 -103
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +20 -11
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +20 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +8 -6
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +13 -13
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +12 -9
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +36 -31
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +8 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_module_callbacks.py +294 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +10 -7
- numba_cuda/numba/cuda/tests/cudadrv/test_nvjitlink.py +24 -15
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +43 -41
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +4 -5
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +2 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +28 -17
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +1 -2
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +22 -14
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +1 -1
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +4 -3
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +10 -4
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +10 -7
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +1 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +6 -5
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +52 -42
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +5 -6
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +501 -304
- numba_cuda/numba/cuda/tests/cudapy/test_bfloat16_bindings.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +59 -23
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +3 -3
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +50 -37
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +29 -24
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +11 -6
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +84 -50
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +144 -73
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +2 -2
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +37 -27
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +43 -45
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +21 -14
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +60 -55
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +3 -2
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +26 -22
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +29 -27
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +77 -28
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +52 -45
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +55 -43
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +24 -7
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +30 -15
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +11 -12
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +21 -12
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +77 -66
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +5 -3
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +3 -5
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +144 -126
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +23 -18
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +16 -22
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_inline.py +59 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +29 -20
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +147 -99
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +50 -36
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +1 -2
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +24 -20
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +36 -31
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +13 -13
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +13 -6
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +83 -66
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +1 -3
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +19 -58
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +4 -4
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +9 -8
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +180 -96
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +37 -18
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +9 -7
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +15 -10
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +88 -87
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +12 -10
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +26 -11
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +7 -10
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +4 -6
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +1 -1
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +10 -9
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +62 -43
- numba_cuda/numba/cuda/tests/cudapy/test_stream_api.py +7 -3
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +7 -5
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +18 -11
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +111 -88
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +2 -3
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +305 -130
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +33 -36
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +5 -5
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +16 -12
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +7 -7
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +6 -7
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +31 -29
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +81 -30
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +19 -13
- numba_cuda/numba/cuda/tests/data/jitlink.cu +1 -1
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +0 -2
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +15 -8
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +4 -7
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +14 -9
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +22 -18
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +7 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +2 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +8 -4
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +2 -1
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +94 -19
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +2 -2
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +91 -62
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +14 -5
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +25 -25
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +40 -40
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +12 -10
- numba_cuda/numba/cuda/tests/nrt/test_nrt.py +16 -20
- numba_cuda/numba/cuda/tests/nrt/test_nrt_refct.py +12 -10
- numba_cuda/numba/cuda/tests/test_binary_generation/generate_raw_ltoir.py +2 -2
- numba_cuda/numba/cuda/types.py +5 -2
- numba_cuda/numba/cuda/ufuncs.py +382 -362
- numba_cuda/numba/cuda/utils.py +2 -2
- numba_cuda/numba/cuda/vector_types.py +5 -3
- numba_cuda/numba/cuda/vectorizers.py +38 -33
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/METADATA +1 -1
- numba_cuda-0.10.0.dist-info/RECORD +263 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/WHEEL +1 -1
- numba_cuda-0.8.1.dist-info/RECORD +0 -251
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {numba_cuda-0.8.1.dist-info → numba_cuda-0.10.0.dist-info}/top_level.txt +0 -0
@@ -136,5 +136,5 @@ class Math_isnan(ConcreteTemplate):
|
|
136
136
|
class Math_modf(ConcreteTemplate):
|
137
137
|
cases = [
|
138
138
|
signature(types.UniTuple(types.float64, 2), types.float64),
|
139
|
-
signature(types.UniTuple(types.float32, 2), types.float32)
|
139
|
+
signature(types.UniTuple(types.float32, 2), types.float32),
|
140
140
|
]
|
@@ -1,5 +1,5 @@
|
|
1
1
|
from llvmlite import ir
|
2
|
-
from numba.core import types
|
2
|
+
from numba.core import types, cgutils
|
3
3
|
from numba.core.debuginfo import DIBuilder
|
4
4
|
from numba.cuda.types import GridGroup
|
5
5
|
|
@@ -7,9 +7,14 @@ _BYTE_SIZE = 8
|
|
7
7
|
|
8
8
|
|
9
9
|
class CUDADIBuilder(DIBuilder):
|
10
|
+
def __init__(self, module, filepath, cgctx, directives_only):
|
11
|
+
super().__init__(module, filepath, cgctx, directives_only)
|
12
|
+
# Cache for local variable metadata type and line deduplication
|
13
|
+
self._vartypelinemap = {}
|
10
14
|
|
11
15
|
def _var_type(self, lltype, size, datamodel=None):
|
12
16
|
is_bool = False
|
17
|
+
is_int_literal = False
|
13
18
|
is_grid_group = False
|
14
19
|
|
15
20
|
if isinstance(lltype, ir.IntType):
|
@@ -21,24 +26,111 @@ class CUDADIBuilder(DIBuilder):
|
|
21
26
|
name = str(datamodel.fe_type)
|
22
27
|
if isinstance(datamodel.fe_type, types.Boolean):
|
23
28
|
is_bool = True
|
29
|
+
if isinstance(datamodel.fe_type, types.BooleanLiteral):
|
30
|
+
name = "bool"
|
31
|
+
elif isinstance(datamodel.fe_type, types.Integer):
|
32
|
+
if isinstance(datamodel.fe_type, types.IntegerLiteral):
|
33
|
+
name = f"int{_BYTE_SIZE * size}"
|
34
|
+
is_int_literal = True
|
24
35
|
elif isinstance(datamodel.fe_type, GridGroup):
|
25
36
|
is_grid_group = True
|
26
37
|
|
27
|
-
if is_bool or is_grid_group:
|
38
|
+
if is_bool or is_int_literal or is_grid_group:
|
28
39
|
m = self.module
|
29
40
|
bitsize = _BYTE_SIZE * size
|
30
41
|
# Boolean type workaround until upstream Numba is fixed
|
31
42
|
if is_bool:
|
32
43
|
ditok = "DW_ATE_boolean"
|
44
|
+
elif is_int_literal:
|
45
|
+
ditok = "DW_ATE_signed"
|
33
46
|
# GridGroup type should use numba.cuda implementation
|
34
47
|
elif is_grid_group:
|
35
48
|
ditok = "DW_ATE_unsigned"
|
36
49
|
|
37
|
-
return m.add_debug_info(
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
50
|
+
return m.add_debug_info(
|
51
|
+
"DIBasicType",
|
52
|
+
{
|
53
|
+
"name": name,
|
54
|
+
"size": bitsize,
|
55
|
+
"encoding": ir.DIToken(ditok),
|
56
|
+
},
|
57
|
+
)
|
42
58
|
|
43
59
|
# For other cases, use upstream Numba implementation
|
44
60
|
return super()._var_type(lltype, size, datamodel=datamodel)
|
61
|
+
|
62
|
+
def mark_variable(
|
63
|
+
self,
|
64
|
+
builder,
|
65
|
+
allocavalue,
|
66
|
+
name,
|
67
|
+
lltype,
|
68
|
+
size,
|
69
|
+
line,
|
70
|
+
datamodel=None,
|
71
|
+
argidx=None,
|
72
|
+
):
|
73
|
+
if name.startswith("$") or "." in name:
|
74
|
+
# Do not emit llvm.dbg.declare on user variable alias
|
75
|
+
return
|
76
|
+
else:
|
77
|
+
int_type = (ir.IntType,)
|
78
|
+
real_type = ir.FloatType, ir.DoubleType
|
79
|
+
if isinstance(lltype, int_type + real_type):
|
80
|
+
# Start with scalar variable, swtiching llvm.dbg.declare
|
81
|
+
# to llvm.dbg.value
|
82
|
+
return
|
83
|
+
else:
|
84
|
+
return super().mark_variable(
|
85
|
+
builder,
|
86
|
+
allocavalue,
|
87
|
+
name,
|
88
|
+
lltype,
|
89
|
+
size,
|
90
|
+
line,
|
91
|
+
datamodel,
|
92
|
+
argidx,
|
93
|
+
)
|
94
|
+
|
95
|
+
def update_variable(
|
96
|
+
self,
|
97
|
+
builder,
|
98
|
+
value,
|
99
|
+
name,
|
100
|
+
lltype,
|
101
|
+
size,
|
102
|
+
line,
|
103
|
+
datamodel=None,
|
104
|
+
argidx=None,
|
105
|
+
):
|
106
|
+
m = self.module
|
107
|
+
fnty = ir.FunctionType(ir.VoidType(), [ir.MetaDataType()] * 3)
|
108
|
+
decl = cgutils.get_or_insert_function(m, fnty, "llvm.dbg.value")
|
109
|
+
|
110
|
+
mdtype = self._var_type(lltype, size, datamodel)
|
111
|
+
index = name.find(".")
|
112
|
+
if index >= 0:
|
113
|
+
name = name[:index]
|
114
|
+
# Merge DILocalVariable nodes with same name and type but different
|
115
|
+
# lines. Use the cached [(name, type) -> line] info to deduplicate
|
116
|
+
# metadata. Use the lltype as part of key.
|
117
|
+
key = (name, lltype)
|
118
|
+
if key in self._vartypelinemap:
|
119
|
+
line = self._vartypelinemap[key]
|
120
|
+
else:
|
121
|
+
self._vartypelinemap[key] = line
|
122
|
+
arg_index = 0 if argidx is None else argidx
|
123
|
+
mdlocalvar = m.add_debug_info(
|
124
|
+
"DILocalVariable",
|
125
|
+
{
|
126
|
+
"name": name,
|
127
|
+
"arg": arg_index,
|
128
|
+
"scope": self.subprograms[-1],
|
129
|
+
"file": self.difile,
|
130
|
+
"line": line,
|
131
|
+
"type": mdtype,
|
132
|
+
},
|
133
|
+
)
|
134
|
+
mdexpr = m.add_debug_info("DIExpression", {})
|
135
|
+
|
136
|
+
return builder.call(decl, [value, mdlocalvar, mdexpr])
|
@@ -6,13 +6,24 @@ from numba.cuda.dispatcher import CUDADispatcher
|
|
6
6
|
from numba.cuda.simulator.kernel import FakeCUDAKernel
|
7
7
|
|
8
8
|
|
9
|
-
_msg_deprecated_signature_arg = (
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
_msg_deprecated_signature_arg = (
|
10
|
+
"Deprecated keyword argument `{0}`. "
|
11
|
+
"Signatures should be passed as the first "
|
12
|
+
"positional argument."
|
13
|
+
)
|
14
|
+
|
15
|
+
|
16
|
+
def jit(
|
17
|
+
func_or_sig=None,
|
18
|
+
device=False,
|
19
|
+
inline="never",
|
20
|
+
link=[],
|
21
|
+
debug=None,
|
22
|
+
opt=None,
|
23
|
+
lineinfo=False,
|
24
|
+
cache=False,
|
25
|
+
**kws,
|
26
|
+
):
|
16
27
|
"""
|
17
28
|
JIT compile a Python function for CUDA GPUs.
|
18
29
|
|
@@ -55,39 +66,52 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
55
66
|
"""
|
56
67
|
|
57
68
|
if link and config.ENABLE_CUDASIM:
|
58
|
-
raise NotImplementedError(
|
69
|
+
raise NotImplementedError("Cannot link PTX in the simulator")
|
59
70
|
|
60
|
-
if kws.get(
|
71
|
+
if kws.get("boundscheck"):
|
61
72
|
raise NotImplementedError("bounds checking is not supported for CUDA")
|
62
73
|
|
63
|
-
if kws.get(
|
64
|
-
msg = _msg_deprecated_signature_arg.format(
|
74
|
+
if kws.get("argtypes") is not None:
|
75
|
+
msg = _msg_deprecated_signature_arg.format("argtypes")
|
65
76
|
raise DeprecationError(msg)
|
66
|
-
if kws.get(
|
67
|
-
msg = _msg_deprecated_signature_arg.format(
|
77
|
+
if kws.get("restype") is not None:
|
78
|
+
msg = _msg_deprecated_signature_arg.format("restype")
|
68
79
|
raise DeprecationError(msg)
|
69
|
-
if kws.get(
|
70
|
-
msg = _msg_deprecated_signature_arg.format(
|
80
|
+
if kws.get("bind") is not None:
|
81
|
+
msg = _msg_deprecated_signature_arg.format("bind")
|
71
82
|
raise DeprecationError(msg)
|
72
83
|
|
84
|
+
if isinstance(inline, bool):
|
85
|
+
DeprecationWarning(
|
86
|
+
"Passing bool to inline argument is deprecated, please refer to "
|
87
|
+
"Numba's documentation on inlining: "
|
88
|
+
"https://numba.readthedocs.io/en/stable/developer/inlining.html"
|
89
|
+
)
|
90
|
+
|
91
|
+
inline = "always" if inline else "never"
|
92
|
+
|
73
93
|
debug = config.CUDA_DEBUGINFO_DEFAULT if debug is None else debug
|
74
94
|
opt = (config.OPT != 0) if opt is None else opt
|
75
|
-
fastmath = kws.get(
|
76
|
-
extensions = kws.get(
|
95
|
+
fastmath = kws.get("fastmath", False)
|
96
|
+
extensions = kws.get("extensions", [])
|
77
97
|
|
78
98
|
if debug and opt:
|
79
|
-
msg = (
|
80
|
-
|
81
|
-
|
99
|
+
msg = (
|
100
|
+
"debug=True with opt=True "
|
101
|
+
"is not supported by CUDA. This may result in a crash"
|
102
|
+
" - set debug=False or opt=False."
|
103
|
+
)
|
82
104
|
warn(NumbaInvalidConfigWarning(msg))
|
83
105
|
|
84
106
|
if debug and lineinfo:
|
85
|
-
msg = (
|
86
|
-
|
87
|
-
|
107
|
+
msg = (
|
108
|
+
"debug and lineinfo are mutually exclusive. Use debug to get "
|
109
|
+
"full debug info (this disables some optimizations), or "
|
110
|
+
"lineinfo for line info only with code generation unaffected."
|
111
|
+
)
|
88
112
|
warn(NumbaInvalidConfigWarning(msg))
|
89
113
|
|
90
|
-
if device and kws.get(
|
114
|
+
if device and kws.get("link"):
|
91
115
|
raise ValueError("link keyword invalid for device function")
|
92
116
|
|
93
117
|
if sigutils.is_signature(func_or_sig):
|
@@ -101,19 +125,22 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
101
125
|
|
102
126
|
if signatures is not None:
|
103
127
|
if config.ENABLE_CUDASIM:
|
128
|
+
|
104
129
|
def jitwrapper(func):
|
105
130
|
return FakeCUDAKernel(func, device=device, fastmath=fastmath)
|
131
|
+
|
106
132
|
return jitwrapper
|
107
133
|
|
108
134
|
def _jit(func):
|
109
135
|
targetoptions = kws.copy()
|
110
|
-
targetoptions[
|
111
|
-
targetoptions[
|
112
|
-
targetoptions[
|
113
|
-
targetoptions[
|
114
|
-
targetoptions[
|
115
|
-
targetoptions[
|
116
|
-
targetoptions[
|
136
|
+
targetoptions["debug"] = debug
|
137
|
+
targetoptions["lineinfo"] = lineinfo
|
138
|
+
targetoptions["link"] = link
|
139
|
+
targetoptions["opt"] = opt
|
140
|
+
targetoptions["fastmath"] = fastmath
|
141
|
+
targetoptions["device"] = device
|
142
|
+
targetoptions["inline"] = inline
|
143
|
+
targetoptions["extensions"] = extensions
|
117
144
|
|
118
145
|
disp = CUDADispatcher(func, targetoptions=targetoptions)
|
119
146
|
|
@@ -128,6 +155,7 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
128
155
|
|
129
156
|
if device:
|
130
157
|
from numba.core import typeinfer
|
158
|
+
|
131
159
|
with typeinfer.register_dispatcher(disp):
|
132
160
|
disp.compile_device(argtypes, restype)
|
133
161
|
else:
|
@@ -142,29 +170,43 @@ def jit(func_or_sig=None, device=False, inline=False, link=[], debug=None,
|
|
142
170
|
else:
|
143
171
|
if func_or_sig is None:
|
144
172
|
if config.ENABLE_CUDASIM:
|
173
|
+
|
145
174
|
def autojitwrapper(func):
|
146
|
-
return FakeCUDAKernel(
|
147
|
-
|
175
|
+
return FakeCUDAKernel(
|
176
|
+
func, device=device, fastmath=fastmath
|
177
|
+
)
|
148
178
|
else:
|
179
|
+
|
149
180
|
def autojitwrapper(func):
|
150
|
-
return jit(
|
151
|
-
|
181
|
+
return jit(
|
182
|
+
func,
|
183
|
+
device=device,
|
184
|
+
inline=inline,
|
185
|
+
debug=debug,
|
186
|
+
opt=opt,
|
187
|
+
lineinfo=lineinfo,
|
188
|
+
link=link,
|
189
|
+
cache=cache,
|
190
|
+
**kws,
|
191
|
+
)
|
152
192
|
|
153
193
|
return autojitwrapper
|
154
194
|
# func_or_sig is a function
|
155
195
|
else:
|
156
196
|
if config.ENABLE_CUDASIM:
|
157
|
-
return FakeCUDAKernel(
|
158
|
-
|
197
|
+
return FakeCUDAKernel(
|
198
|
+
func_or_sig, device=device, fastmath=fastmath
|
199
|
+
)
|
159
200
|
else:
|
160
201
|
targetoptions = kws.copy()
|
161
|
-
targetoptions[
|
162
|
-
targetoptions[
|
163
|
-
targetoptions[
|
164
|
-
targetoptions[
|
165
|
-
targetoptions[
|
166
|
-
targetoptions[
|
167
|
-
targetoptions[
|
202
|
+
targetoptions["debug"] = debug
|
203
|
+
targetoptions["lineinfo"] = lineinfo
|
204
|
+
targetoptions["opt"] = opt
|
205
|
+
targetoptions["link"] = link
|
206
|
+
targetoptions["fastmath"] = fastmath
|
207
|
+
targetoptions["device"] = device
|
208
|
+
targetoptions["inline"] = inline
|
209
|
+
targetoptions["extensions"] = extensions
|
168
210
|
disp = CUDADispatcher(func_or_sig, targetoptions=targetoptions)
|
169
211
|
|
170
212
|
if cache:
|
@@ -191,7 +233,7 @@ def declare_device(name, sig, link=None):
|
|
191
233
|
|
192
234
|
argtypes, restype = sigutils.normalize_signature(sig)
|
193
235
|
if restype is None:
|
194
|
-
msg =
|
236
|
+
msg = "Return type must be provided for device declarations"
|
195
237
|
raise TypeError(msg)
|
196
238
|
|
197
239
|
return declare_device_function(name, restype, argtypes, link)
|
@@ -1,21 +1,61 @@
|
|
1
1
|
# Re export
|
2
2
|
import sys
|
3
3
|
from numba.cuda import cg
|
4
|
-
from .stubs import (
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
4
|
+
from .stubs import (
|
5
|
+
threadIdx,
|
6
|
+
blockIdx,
|
7
|
+
blockDim,
|
8
|
+
gridDim,
|
9
|
+
laneid,
|
10
|
+
warpsize,
|
11
|
+
syncwarp,
|
12
|
+
shared,
|
13
|
+
local,
|
14
|
+
const,
|
15
|
+
atomic,
|
16
|
+
vote_sync_intrinsic,
|
17
|
+
match_any_sync,
|
18
|
+
match_all_sync,
|
19
|
+
threadfence_block,
|
20
|
+
threadfence_system,
|
21
|
+
threadfence,
|
22
|
+
selp,
|
23
|
+
popc,
|
24
|
+
brev,
|
25
|
+
clz,
|
26
|
+
ffs,
|
27
|
+
fma,
|
28
|
+
cbrt,
|
29
|
+
activemask,
|
30
|
+
lanemask_lt,
|
31
|
+
nanosleep,
|
32
|
+
fp16,
|
33
|
+
_vector_type_stubs,
|
34
|
+
)
|
35
|
+
from .intrinsics import (
|
36
|
+
grid,
|
37
|
+
gridsize,
|
38
|
+
syncthreads,
|
39
|
+
syncthreads_and,
|
40
|
+
syncthreads_count,
|
41
|
+
syncthreads_or,
|
42
|
+
shfl_sync,
|
43
|
+
shfl_up_sync,
|
44
|
+
shfl_down_sync,
|
45
|
+
shfl_xor_sync,
|
46
|
+
)
|
13
47
|
from .cudadrv.error import CudaSupportError
|
14
|
-
from numba.cuda.cudadrv.driver import (
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
48
|
+
from numba.cuda.cudadrv.driver import (
|
49
|
+
BaseCUDAMemoryManager,
|
50
|
+
HostOnlyCUDAMemoryManager,
|
51
|
+
GetIpcHandleMixin,
|
52
|
+
MemoryPointer,
|
53
|
+
MappedMemory,
|
54
|
+
PinnedMemory,
|
55
|
+
MemoryInfo,
|
56
|
+
IpcHandle,
|
57
|
+
set_memory_manager,
|
58
|
+
)
|
19
59
|
from numba.cuda.cudadrv.runtime import runtime
|
20
60
|
from .cudadrv import nvvm
|
21
61
|
from numba.cuda import initialize
|
@@ -26,13 +66,23 @@ from .api import *
|
|
26
66
|
from .api import _auto_device
|
27
67
|
from .args import In, Out, InOut
|
28
68
|
|
29
|
-
from .intrinsic_wrapper import (
|
30
|
-
|
31
|
-
|
69
|
+
from .intrinsic_wrapper import (
|
70
|
+
all_sync,
|
71
|
+
any_sync,
|
72
|
+
eq_sync,
|
73
|
+
ballot_sync,
|
74
|
+
)
|
32
75
|
|
33
76
|
from .kernels import reduction
|
34
77
|
from numba.cuda.cudadrv.linkable_code import (
|
35
|
-
Archive,
|
78
|
+
Archive,
|
79
|
+
CUSource,
|
80
|
+
Cubin,
|
81
|
+
Fatbin,
|
82
|
+
LinkableCode,
|
83
|
+
LTOIR,
|
84
|
+
Object,
|
85
|
+
PTXSource,
|
36
86
|
)
|
37
87
|
|
38
88
|
reduce = Reduce = reduction.Reduce
|