numba-cuda 0.0.1__py3-none-any.whl → 0.0.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _numba_cuda_redirector.pth +1 -0
- _numba_cuda_redirector.py +74 -0
- numba_cuda/VERSION +1 -0
- numba_cuda/__init__.py +5 -0
- numba_cuda/_version.py +19 -0
- numba_cuda/numba/cuda/__init__.py +22 -0
- numba_cuda/numba/cuda/api.py +526 -0
- numba_cuda/numba/cuda/api_util.py +30 -0
- numba_cuda/numba/cuda/args.py +77 -0
- numba_cuda/numba/cuda/cg.py +62 -0
- numba_cuda/numba/cuda/codegen.py +378 -0
- numba_cuda/numba/cuda/compiler.py +422 -0
- numba_cuda/numba/cuda/cpp_function_wrappers.cu +47 -0
- numba_cuda/numba/cuda/cuda_fp16.h +3631 -0
- numba_cuda/numba/cuda/cuda_fp16.hpp +2465 -0
- numba_cuda/numba/cuda/cuda_paths.py +258 -0
- numba_cuda/numba/cuda/cudadecl.py +806 -0
- numba_cuda/numba/cuda/cudadrv/__init__.py +9 -0
- numba_cuda/numba/cuda/cudadrv/devicearray.py +904 -0
- numba_cuda/numba/cuda/cudadrv/devices.py +248 -0
- numba_cuda/numba/cuda/cudadrv/driver.py +3201 -0
- numba_cuda/numba/cuda/cudadrv/drvapi.py +398 -0
- numba_cuda/numba/cuda/cudadrv/dummyarray.py +452 -0
- numba_cuda/numba/cuda/cudadrv/enums.py +607 -0
- numba_cuda/numba/cuda/cudadrv/error.py +36 -0
- numba_cuda/numba/cuda/cudadrv/libs.py +176 -0
- numba_cuda/numba/cuda/cudadrv/ndarray.py +20 -0
- numba_cuda/numba/cuda/cudadrv/nvrtc.py +260 -0
- numba_cuda/numba/cuda/cudadrv/nvvm.py +707 -0
- numba_cuda/numba/cuda/cudadrv/rtapi.py +10 -0
- numba_cuda/numba/cuda/cudadrv/runtime.py +142 -0
- numba_cuda/numba/cuda/cudaimpl.py +1055 -0
- numba_cuda/numba/cuda/cudamath.py +140 -0
- numba_cuda/numba/cuda/decorators.py +189 -0
- numba_cuda/numba/cuda/descriptor.py +33 -0
- numba_cuda/numba/cuda/device_init.py +89 -0
- numba_cuda/numba/cuda/deviceufunc.py +908 -0
- numba_cuda/numba/cuda/dispatcher.py +1057 -0
- numba_cuda/numba/cuda/errors.py +59 -0
- numba_cuda/numba/cuda/extending.py +7 -0
- numba_cuda/numba/cuda/initialize.py +13 -0
- numba_cuda/numba/cuda/intrinsic_wrapper.py +77 -0
- numba_cuda/numba/cuda/intrinsics.py +198 -0
- numba_cuda/numba/cuda/kernels/__init__.py +0 -0
- numba_cuda/numba/cuda/kernels/reduction.py +262 -0
- numba_cuda/numba/cuda/kernels/transpose.py +65 -0
- numba_cuda/numba/cuda/libdevice.py +3382 -0
- numba_cuda/numba/cuda/libdevicedecl.py +17 -0
- numba_cuda/numba/cuda/libdevicefuncs.py +1057 -0
- numba_cuda/numba/cuda/libdeviceimpl.py +83 -0
- numba_cuda/numba/cuda/mathimpl.py +448 -0
- numba_cuda/numba/cuda/models.py +48 -0
- numba_cuda/numba/cuda/nvvmutils.py +235 -0
- numba_cuda/numba/cuda/printimpl.py +86 -0
- numba_cuda/numba/cuda/random.py +292 -0
- numba_cuda/numba/cuda/simulator/__init__.py +38 -0
- numba_cuda/numba/cuda/simulator/api.py +110 -0
- numba_cuda/numba/cuda/simulator/compiler.py +9 -0
- numba_cuda/numba/cuda/simulator/cudadrv/__init__.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devicearray.py +432 -0
- numba_cuda/numba/cuda/simulator/cudadrv/devices.py +117 -0
- numba_cuda/numba/cuda/simulator/cudadrv/driver.py +62 -0
- numba_cuda/numba/cuda/simulator/cudadrv/drvapi.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/dummyarray.py +4 -0
- numba_cuda/numba/cuda/simulator/cudadrv/error.py +6 -0
- numba_cuda/numba/cuda/simulator/cudadrv/libs.py +2 -0
- numba_cuda/numba/cuda/simulator/cudadrv/nvvm.py +29 -0
- numba_cuda/numba/cuda/simulator/cudadrv/runtime.py +19 -0
- numba_cuda/numba/cuda/simulator/kernel.py +308 -0
- numba_cuda/numba/cuda/simulator/kernelapi.py +495 -0
- numba_cuda/numba/cuda/simulator/reduction.py +15 -0
- numba_cuda/numba/cuda/simulator/vector_types.py +58 -0
- numba_cuda/numba/cuda/simulator_init.py +17 -0
- numba_cuda/numba/cuda/stubs.py +902 -0
- numba_cuda/numba/cuda/target.py +440 -0
- numba_cuda/numba/cuda/testing.py +202 -0
- numba_cuda/numba/cuda/tests/__init__.py +58 -0
- numba_cuda/numba/cuda/tests/cudadrv/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_context_stack.py +145 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_array_slicing.py +375 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_auto_context.py +21 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_devicerecord.py +179 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +235 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_libraries.py +22 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_memory.py +193 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_cuda_ndarray.py +547 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_deallocations.py +249 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_detect.py +81 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_emm_plugins.py +192 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_events.py +38 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_host_alloc.py +65 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_init.py +139 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_inline_ptx.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_is_fp16.py +12 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_linker.py +317 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_managed_alloc.py +127 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_mvc.py +54 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_nvvm_driver.py +199 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_pinned.py +37 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_profiler.py +20 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_ptds.py +149 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_reset_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_runtime.py +85 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_select_device.py +41 -0
- numba_cuda/numba/cuda/tests/cudadrv/test_streams.py +122 -0
- numba_cuda/numba/cuda/tests/cudapy/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py +234 -0
- numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py +41 -0
- numba_cuda/numba/cuda/tests/cudapy/extensions_usecases.py +58 -0
- numba_cuda/numba/cuda/tests/cudapy/jitlink.ptx +30 -0
- numba_cuda/numba/cuda/tests/cudapy/recursion_usecases.py +100 -0
- numba_cuda/numba/cuda/tests/cudapy/test_alignment.py +42 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array.py +260 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_args.py +201 -0
- numba_cuda/numba/cuda/tests/cudapy/test_array_methods.py +35 -0
- numba_cuda/numba/cuda/tests/cudapy/test_atomics.py +1620 -0
- numba_cuda/numba/cuda/tests/cudapy/test_blackscholes.py +120 -0
- numba_cuda/numba/cuda/tests/cudapy/test_boolean.py +24 -0
- numba_cuda/numba/cuda/tests/cudapy/test_caching.py +545 -0
- numba_cuda/numba/cuda/tests/cudapy/test_casting.py +257 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cffi.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_compiler.py +276 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex.py +296 -0
- numba_cuda/numba/cuda/tests/cudapy/test_complex_kernel.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_const_string.py +129 -0
- numba_cuda/numba/cuda/tests/cudapy/test_constmem.py +176 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cooperative_groups.py +147 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_array_interface.py +435 -0
- numba_cuda/numba/cuda/tests/cudapy/test_cuda_jit_no_types.py +90 -0
- numba_cuda/numba/cuda/tests/cudapy/test_datetime.py +94 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debug.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py +221 -0
- numba_cuda/numba/cuda/tests/cudapy/test_device_func.py +222 -0
- numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py +700 -0
- numba_cuda/numba/cuda/tests/cudapy/test_enums.py +121 -0
- numba_cuda/numba/cuda/tests/cudapy/test_errors.py +79 -0
- numba_cuda/numba/cuda/tests/cudapy/test_exception.py +174 -0
- numba_cuda/numba/cuda/tests/cudapy/test_extending.py +155 -0
- numba_cuda/numba/cuda/tests/cudapy/test_fastmath.py +244 -0
- numba_cuda/numba/cuda/tests/cudapy/test_forall.py +52 -0
- numba_cuda/numba/cuda/tests/cudapy/test_freevar.py +29 -0
- numba_cuda/numba/cuda/tests/cudapy/test_frexp_ldexp.py +66 -0
- numba_cuda/numba/cuda/tests/cudapy/test_globals.py +60 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc.py +456 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py +159 -0
- numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scheduling.py +95 -0
- numba_cuda/numba/cuda/tests/cudapy/test_idiv.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_inspect.py +165 -0
- numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py +1106 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +318 -0
- numba_cuda/numba/cuda/tests/cudapy/test_iterators.py +99 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lang.py +64 -0
- numba_cuda/numba/cuda/tests/cudapy/test_laplace.py +119 -0
- numba_cuda/numba/cuda/tests/cudapy/test_libdevice.py +187 -0
- numba_cuda/numba/cuda/tests/cudapy/test_lineinfo.py +199 -0
- numba_cuda/numba/cuda/tests/cudapy/test_localmem.py +164 -0
- numba_cuda/numba/cuda/tests/cudapy/test_mandel.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_math.py +786 -0
- numba_cuda/numba/cuda/tests/cudapy/test_matmul.py +74 -0
- numba_cuda/numba/cuda/tests/cudapy/test_minmax.py +113 -0
- numba_cuda/numba/cuda/tests/cudapy/test_montecarlo.py +22 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multigpu.py +140 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multiprocessing.py +46 -0
- numba_cuda/numba/cuda/tests/cudapy/test_multithreads.py +101 -0
- numba_cuda/numba/cuda/tests/cudapy/test_nondet.py +49 -0
- numba_cuda/numba/cuda/tests/cudapy/test_operator.py +401 -0
- numba_cuda/numba/cuda/tests/cudapy/test_optimization.py +86 -0
- numba_cuda/numba/cuda/tests/cudapy/test_overload.py +335 -0
- numba_cuda/numba/cuda/tests/cudapy/test_powi.py +124 -0
- numba_cuda/numba/cuda/tests/cudapy/test_print.py +128 -0
- numba_cuda/numba/cuda/tests/cudapy/test_py2_div_issue.py +33 -0
- numba_cuda/numba/cuda/tests/cudapy/test_random.py +104 -0
- numba_cuda/numba/cuda/tests/cudapy/test_record_dtype.py +610 -0
- numba_cuda/numba/cuda/tests/cudapy/test_recursion.py +125 -0
- numba_cuda/numba/cuda/tests/cudapy/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/cudapy/test_retrieve_autoconverted_arrays.py +83 -0
- numba_cuda/numba/cuda/tests/cudapy/test_serialize.py +85 -0
- numba_cuda/numba/cuda/tests/cudapy/test_slicing.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm.py +444 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sm_creation.py +205 -0
- numba_cuda/numba/cuda/tests/cudapy/test_sync.py +271 -0
- numba_cuda/numba/cuda/tests/cudapy/test_transpose.py +80 -0
- numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py +277 -0
- numba_cuda/numba/cuda/tests/cudapy/test_userexc.py +47 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vector_type.py +307 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize.py +283 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_complex.py +20 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_decor.py +69 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_device.py +36 -0
- numba_cuda/numba/cuda/tests/cudapy/test_vectorize_scalar_arg.py +37 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warning.py +139 -0
- numba_cuda/numba/cuda/tests/cudapy/test_warp_ops.py +276 -0
- numba_cuda/numba/cuda/tests/cudasim/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/support.py +6 -0
- numba_cuda/numba/cuda/tests/cudasim/test_cudasim_issues.py +102 -0
- numba_cuda/numba/cuda/tests/data/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/data/cuda_include.cu +5 -0
- numba_cuda/numba/cuda/tests/data/error.cu +7 -0
- numba_cuda/numba/cuda/tests/data/jitlink.cu +23 -0
- numba_cuda/numba/cuda/tests/data/jitlink.ptx +51 -0
- numba_cuda/numba/cuda/tests/data/warn.cu +7 -0
- numba_cuda/numba/cuda/tests/doc_examples/__init__.py +6 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/__init__.py +0 -0
- numba_cuda/numba/cuda/tests/doc_examples/ffi/functions.cu +49 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cg.py +77 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_cpu_gpu_compat.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ffi.py +82 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_laplace.py +155 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_matmul.py +173 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_montecarlo.py +109 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_random.py +59 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_reduction.py +76 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_sessionize.py +130 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_ufunc.py +50 -0
- numba_cuda/numba/cuda/tests/doc_examples/test_vecadd.py +73 -0
- numba_cuda/numba/cuda/tests/nocuda/__init__.py +8 -0
- numba_cuda/numba/cuda/tests/nocuda/test_dummyarray.py +359 -0
- numba_cuda/numba/cuda/tests/nocuda/test_function_resolution.py +36 -0
- numba_cuda/numba/cuda/tests/nocuda/test_import.py +49 -0
- numba_cuda/numba/cuda/tests/nocuda/test_library_lookup.py +238 -0
- numba_cuda/numba/cuda/tests/nocuda/test_nvvm.py +54 -0
- numba_cuda/numba/cuda/types.py +37 -0
- numba_cuda/numba/cuda/ufuncs.py +662 -0
- numba_cuda/numba/cuda/vector_types.py +209 -0
- numba_cuda/numba/cuda/vectorizers.py +252 -0
- numba_cuda-0.0.12.dist-info/LICENSE +25 -0
- numba_cuda-0.0.12.dist-info/METADATA +68 -0
- numba_cuda-0.0.12.dist-info/RECORD +231 -0
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/WHEEL +1 -1
- numba_cuda-0.0.1.dist-info/METADATA +0 -10
- numba_cuda-0.0.1.dist-info/RECORD +0 -5
- {numba_cuda-0.0.1.dist-info → numba_cuda-0.0.12.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,607 @@
|
|
1
|
+
"""
|
2
|
+
Enum values for CUDA driver. Information about the values
|
3
|
+
can be found on the official NVIDIA documentation website.
|
4
|
+
ref: https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html
|
5
|
+
anchor: #group__CUDA__TYPES
|
6
|
+
"""
|
7
|
+
|
8
|
+
|
9
|
+
# Error codes
|
10
|
+
|
11
|
+
CUDA_SUCCESS = 0
|
12
|
+
CUDA_ERROR_INVALID_VALUE = 1
|
13
|
+
CUDA_ERROR_OUT_OF_MEMORY = 2
|
14
|
+
CUDA_ERROR_NOT_INITIALIZED = 3
|
15
|
+
CUDA_ERROR_DEINITIALIZED = 4
|
16
|
+
CUDA_ERROR_PROFILER_DISABLED = 5
|
17
|
+
CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6
|
18
|
+
CUDA_ERROR_PROFILER_ALREADY_STARTED = 7
|
19
|
+
CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8
|
20
|
+
CUDA_ERROR_STUB_LIBRARY = 34
|
21
|
+
CUDA_ERROR_DEVICE_UNAVAILABLE = 46
|
22
|
+
CUDA_ERROR_NO_DEVICE = 100
|
23
|
+
CUDA_ERROR_INVALID_DEVICE = 101
|
24
|
+
CUDA_ERROR_DEVICE_NOT_LICENSED = 102
|
25
|
+
CUDA_ERROR_INVALID_IMAGE = 200
|
26
|
+
CUDA_ERROR_INVALID_CONTEXT = 201
|
27
|
+
CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202
|
28
|
+
CUDA_ERROR_MAP_FAILED = 205
|
29
|
+
CUDA_ERROR_UNMAP_FAILED = 206
|
30
|
+
CUDA_ERROR_ARRAY_IS_MAPPED = 207
|
31
|
+
CUDA_ERROR_ALREADY_MAPPED = 208
|
32
|
+
CUDA_ERROR_NO_BINARY_FOR_GPU = 209
|
33
|
+
CUDA_ERROR_ALREADY_ACQUIRED = 210
|
34
|
+
CUDA_ERROR_NOT_MAPPED = 211
|
35
|
+
CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212
|
36
|
+
CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213
|
37
|
+
CUDA_ERROR_ECC_UNCORRECTABLE = 214
|
38
|
+
CUDA_ERROR_UNSUPPORTED_LIMIT = 215
|
39
|
+
CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216
|
40
|
+
CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217
|
41
|
+
CUDA_ERROR_INVALID_PTX = 218
|
42
|
+
CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219
|
43
|
+
CUDA_ERROR_NVLINK_UNCORRECTABLE = 220
|
44
|
+
CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221
|
45
|
+
CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222
|
46
|
+
CUDA_ERROR_JIT_COMPILATION_DISABLED = 223
|
47
|
+
CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224
|
48
|
+
CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC = 225
|
49
|
+
CUDA_ERROR_INVALID_SOURCE = 300
|
50
|
+
CUDA_ERROR_FILE_NOT_FOUND = 301
|
51
|
+
CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302
|
52
|
+
CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303
|
53
|
+
CUDA_ERROR_OPERATING_SYSTEM = 304
|
54
|
+
CUDA_ERROR_INVALID_HANDLE = 400
|
55
|
+
CUDA_ERROR_ILLEGAL_STATE = 401
|
56
|
+
CUDA_ERROR_NOT_FOUND = 500
|
57
|
+
CUDA_ERROR_NOT_READY = 600
|
58
|
+
CUDA_ERROR_LAUNCH_FAILED = 700
|
59
|
+
CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701
|
60
|
+
CUDA_ERROR_LAUNCH_TIMEOUT = 702
|
61
|
+
CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703
|
62
|
+
CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704
|
63
|
+
CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705
|
64
|
+
CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708
|
65
|
+
CUDA_ERROR_CONTEXT_IS_DESTROYED = 709
|
66
|
+
CUDA_ERROR_ASSERT = 710
|
67
|
+
CUDA_ERROR_TOO_MANY_PEERS = 711
|
68
|
+
CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712
|
69
|
+
CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713
|
70
|
+
CUDA_ERROR_HARDWARE_STACK_ERROR = 714
|
71
|
+
CUDA_ERROR_ILLEGAL_INSTRUCTION = 715
|
72
|
+
CUDA_ERROR_MISALIGNED_ADDRESS = 716
|
73
|
+
CUDA_ERROR_INVALID_ADDRESS_SPACE = 717
|
74
|
+
CUDA_ERROR_INVALID_PC = 718
|
75
|
+
CUDA_ERROR_LAUNCH_FAILED = 719
|
76
|
+
CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720
|
77
|
+
CUDA_ERROR_NOT_PERMITTED = 800
|
78
|
+
CUDA_ERROR_NOT_SUPPORTED = 801
|
79
|
+
CUDA_ERROR_SYSTEM_NOT_READY = 802
|
80
|
+
CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803
|
81
|
+
CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804
|
82
|
+
CUDA_ERROR_MPS_CONNECTION_FAILED = 805
|
83
|
+
CUDA_ERROR_MPS_RPC_FAILURE = 806
|
84
|
+
CUDA_ERROR_MPS_SERVER_NOT_READY = 807
|
85
|
+
CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808
|
86
|
+
CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809
|
87
|
+
CUDA_ERROR_MPS_CLIENT_TERMINATED = 810
|
88
|
+
CUDA_ERROR_CDP_NOT_SUPPORTED = 811
|
89
|
+
CUDA_ERROR_CDP_VERSION_MISMATCH = 812
|
90
|
+
CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900
|
91
|
+
CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901
|
92
|
+
CUDA_ERROR_STREAM_CAPTURE_MERGE = 902
|
93
|
+
CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903
|
94
|
+
CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904
|
95
|
+
CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905
|
96
|
+
CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906
|
97
|
+
CUDA_ERROR_CAPTURED_EVENT = 907
|
98
|
+
CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908
|
99
|
+
CUDA_ERROR_TIMEOUT = 909
|
100
|
+
CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910
|
101
|
+
CUDA_ERROR_EXTERNAL_DEVICE = 911
|
102
|
+
CUDA_ERROR_INVALID_CLUSTER_SIZE = 912
|
103
|
+
CUDA_ERROR_UNKNOWN = 999
|
104
|
+
|
105
|
+
|
106
|
+
# Function cache configurations
|
107
|
+
|
108
|
+
# no preference for shared memory or L1 (default)
|
109
|
+
CU_FUNC_CACHE_PREFER_NONE = 0x00
|
110
|
+
# prefer larger shared memory and smaller L1 cache
|
111
|
+
CU_FUNC_CACHE_PREFER_SHARED = 0x01
|
112
|
+
# prefer larger L1 cache and smaller shared memory
|
113
|
+
CU_FUNC_CACHE_PREFER_L1 = 0x02
|
114
|
+
# prefer equal sized L1 cache and shared memory
|
115
|
+
CU_FUNC_CACHE_PREFER_EQUAL = 0x03
|
116
|
+
|
117
|
+
|
118
|
+
# Context creation flags
|
119
|
+
|
120
|
+
# Automatic scheduling
|
121
|
+
CU_CTX_SCHED_AUTO = 0x00
|
122
|
+
# Set spin as default scheduling
|
123
|
+
CU_CTX_SCHED_SPIN = 0x01
|
124
|
+
# Set yield as default scheduling
|
125
|
+
CU_CTX_SCHED_YIELD = 0x02
|
126
|
+
# Set blocking synchronization as default scheduling
|
127
|
+
CU_CTX_SCHED_BLOCKING_SYNC = 0x04
|
128
|
+
|
129
|
+
CU_CTX_SCHED_MASK = 0x07
|
130
|
+
# Support mapped pinned allocations
|
131
|
+
# This flag was deprecated as of CUDA 11.0 and it no longer has effect.
|
132
|
+
# All contexts as of CUDA 3.2 behave as though the flag is enabled.
|
133
|
+
CU_CTX_MAP_HOST = 0x08
|
134
|
+
# Keep local memory allocation after launch
|
135
|
+
CU_CTX_LMEM_RESIZE_TO_MAX = 0x10
|
136
|
+
# Trigger coredumps from exceptions in this context
|
137
|
+
CU_CTX_COREDUMP_ENABLE = 0x20
|
138
|
+
# Enable user pipe to trigger coredumps in this context
|
139
|
+
CU_CTX_USER_COREDUMP_ENABLE = 0x40
|
140
|
+
# Force synchronous blocking on cudaMemcpy/cudaMemset
|
141
|
+
CU_CTX_SYNC_MEMOPS = 0x80
|
142
|
+
|
143
|
+
CU_CTX_FLAGS_MASK = 0xff
|
144
|
+
|
145
|
+
|
146
|
+
# DEFINES
|
147
|
+
|
148
|
+
# If set, host memory is portable between CUDA contexts.
|
149
|
+
# Flag for cuMemHostAlloc()
|
150
|
+
CU_MEMHOSTALLOC_PORTABLE = 0x01
|
151
|
+
|
152
|
+
# If set, host memory is mapped into CUDA address space and
|
153
|
+
# cuMemHostGetDevicePointer() may be called on the host pointer.
|
154
|
+
# Flag for cuMemHostAlloc()
|
155
|
+
CU_MEMHOSTALLOC_DEVICEMAP = 0x02
|
156
|
+
|
157
|
+
# If set, host memory is allocated as write-combined - fast to write,
|
158
|
+
# faster to DMA, slow to read except via SSE4 streaming load instruction
|
159
|
+
# (MOVNTDQA).
|
160
|
+
# Flag for cuMemHostAlloc()
|
161
|
+
CU_MEMHOSTALLOC_WRITECOMBINED = 0x04
|
162
|
+
|
163
|
+
|
164
|
+
# If set, host memory is portable between CUDA contexts.
|
165
|
+
# Flag for cuMemHostRegister()
|
166
|
+
CU_MEMHOSTREGISTER_PORTABLE = 0x01
|
167
|
+
|
168
|
+
# If set, host memory is mapped into CUDA address space and
|
169
|
+
# cuMemHostGetDevicePointer() may be called on the host pointer.
|
170
|
+
# Flag for cuMemHostRegister()
|
171
|
+
CU_MEMHOSTREGISTER_DEVICEMAP = 0x02
|
172
|
+
|
173
|
+
# If set, the passed memory pointer is treated as pointing to some
|
174
|
+
# memory-mapped I/O space, e.g. belonging to a third-party PCIe device.
|
175
|
+
# On Windows the flag is a no-op. On Linux that memory is marked
|
176
|
+
# as non cache-coherent for the GPU and is expected
|
177
|
+
# to be physically contiguous. It may return CUDA_ERROR_NOT_PERMITTED
|
178
|
+
# if run as an unprivileged user, CUDA_ERROR_NOT_SUPPORTED on older
|
179
|
+
# Linux kernel versions. On all other platforms, it is not supported
|
180
|
+
# and CUDA_ERROR_NOT_SUPPORTED is returned.
|
181
|
+
# Flag for cuMemHostRegister()
|
182
|
+
CU_MEMHOSTREGISTER_IOMEMORY = 0x04
|
183
|
+
|
184
|
+
# If set, the passed memory pointer is treated as pointing to memory
|
185
|
+
# that is considered read-only by the device. On platforms without
|
186
|
+
# CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES,
|
187
|
+
# this flag is required in order to register memory mapped
|
188
|
+
# to the CPU as read-only. Support for the use of this flag can be
|
189
|
+
# queried from the device attribute
|
190
|
+
# CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED.
|
191
|
+
# Using this flag with a current context associated with a device
|
192
|
+
# that does not have this attribute set will cause cuMemHostRegister
|
193
|
+
# to error with CUDA_ERROR_NOT_SUPPORTED.
|
194
|
+
CU_MEMHOSTREGISTER_READ_ONLY = 0x08
|
195
|
+
|
196
|
+
|
197
|
+
# CUDA Mem Attach Flags
|
198
|
+
|
199
|
+
# If set, managed memory is accessible from all streams on all devices.
|
200
|
+
CU_MEM_ATTACH_GLOBAL = 0x01
|
201
|
+
|
202
|
+
# If set on a platform where the device attribute
|
203
|
+
# cudaDevAttrConcurrentManagedAccess is zero, then managed memory is
|
204
|
+
# only accessible on the host (unless explicitly attached to a stream
|
205
|
+
# with cudaStreamAttachMemAsync, in which case it can be used in kernels
|
206
|
+
# launched on that stream).
|
207
|
+
CU_MEM_ATTACH_HOST = 0x02
|
208
|
+
|
209
|
+
# If set on a platform where the device attribute
|
210
|
+
# cudaDevAttrConcurrentManagedAccess is zero, then managed memory accesses
|
211
|
+
# on the associated device must only be from a single stream.
|
212
|
+
CU_MEM_ATTACH_SINGLE = 0x04
|
213
|
+
|
214
|
+
|
215
|
+
# Event creation flags
|
216
|
+
|
217
|
+
# Default event flag
|
218
|
+
CU_EVENT_DEFAULT = 0x0
|
219
|
+
# Event uses blocking synchronization
|
220
|
+
CU_EVENT_BLOCKING_SYNC = 0x1
|
221
|
+
# Event will not record timing data
|
222
|
+
CU_EVENT_DISABLE_TIMING = 0x2
|
223
|
+
# Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set
|
224
|
+
CU_EVENT_INTERPROCESS = 0x4
|
225
|
+
|
226
|
+
|
227
|
+
# Pointer information
|
228
|
+
|
229
|
+
# The CUcontext on which a pointer was allocated or registered
|
230
|
+
CU_POINTER_ATTRIBUTE_CONTEXT = 1
|
231
|
+
# The CUmemorytype describing the physical location of a pointer
|
232
|
+
CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2
|
233
|
+
# The address at which a pointer's memory may be accessed on the device
|
234
|
+
CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3
|
235
|
+
# The address at which a pointer's memory may be accessed on the host
|
236
|
+
CU_POINTER_ATTRIBUTE_HOST_POINTER = 4
|
237
|
+
# A pair of tokens for use with the nv-p2p.h Linux kernel interface
|
238
|
+
CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5
|
239
|
+
# Synchronize every synchronous memory operation initiated on this region
|
240
|
+
CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6
|
241
|
+
# A process-wide unique ID for an allocated memory region
|
242
|
+
CU_POINTER_ATTRIBUTE_BUFFER_ID = 7
|
243
|
+
# Indicates if the pointer points to managed memory
|
244
|
+
CU_POINTER_ATTRIBUTE_IS_MANAGED = 8
|
245
|
+
# A device ordinal of a device on which a pointer was allocated or registered
|
246
|
+
CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9
|
247
|
+
# 1 if this pointer maps to an allocation
|
248
|
+
# that is suitable for cudaIpcGetMemHandle, 0 otherwise
|
249
|
+
CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10
|
250
|
+
# Starting address for this requested pointer
|
251
|
+
CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11
|
252
|
+
# Size of the address range for this requested pointer
|
253
|
+
CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12
|
254
|
+
# 1 if this pointer is in a valid address range
|
255
|
+
# that is mapped to a backing allocation, 0 otherwise
|
256
|
+
CU_POINTER_ATTRIBUTE_MAPPED = 13
|
257
|
+
# Bitmask of allowed CUmemAllocationHandleType for this allocation
|
258
|
+
CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14
|
259
|
+
# 1 if the memory this pointer is referencing
|
260
|
+
# can be used with the GPUDirect RDMA API
|
261
|
+
CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15
|
262
|
+
# Returns the access flags the device associated
|
263
|
+
# with the current context has on the corresponding
|
264
|
+
# memory referenced by the pointer given
|
265
|
+
CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16
|
266
|
+
# Returns the mempool handle for the allocation
|
267
|
+
# if it was allocated from a mempool. Otherwise returns NULL
|
268
|
+
CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17
|
269
|
+
# Size of the actual underlying mapping that the pointer belongs to
|
270
|
+
CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18
|
271
|
+
# The start address of the mapping that the pointer belongs to
|
272
|
+
CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19
|
273
|
+
# A process-wide unique id corresponding to the
|
274
|
+
# physical allocation the pointer belongs to
|
275
|
+
CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20
|
276
|
+
|
277
|
+
|
278
|
+
# Memory types
|
279
|
+
|
280
|
+
# Host memory
|
281
|
+
CU_MEMORYTYPE_HOST = 0x01
|
282
|
+
# Device memory
|
283
|
+
CU_MEMORYTYPE_DEVICE = 0x02
|
284
|
+
# Array memory
|
285
|
+
CU_MEMORYTYPE_ARRAY = 0x03
|
286
|
+
# Unified device or host memory
|
287
|
+
CU_MEMORYTYPE_UNIFIED = 0x04
|
288
|
+
|
289
|
+
|
290
|
+
# Device code formats
|
291
|
+
|
292
|
+
# Compiled device-class-specific device code
|
293
|
+
# Applicable options: none
|
294
|
+
CU_JIT_INPUT_CUBIN = 0
|
295
|
+
|
296
|
+
# PTX source code
|
297
|
+
# Applicable options: PTX compiler options
|
298
|
+
CU_JIT_INPUT_PTX = 1
|
299
|
+
|
300
|
+
# Bundle of multiple cubins and/or PTX of some device code
|
301
|
+
# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
302
|
+
CU_JIT_INPUT_FATBINARY = 2
|
303
|
+
|
304
|
+
# Host object with embedded device code
|
305
|
+
# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
306
|
+
CU_JIT_INPUT_OBJECT = 3
|
307
|
+
|
308
|
+
# Archive of host objects with embedded device code
|
309
|
+
# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
|
310
|
+
CU_JIT_INPUT_LIBRARY = 4
|
311
|
+
|
312
|
+
CU_JIT_NUM_INPUT_TYPES = 6
|
313
|
+
|
314
|
+
|
315
|
+
# Online compiler and linker options
|
316
|
+
|
317
|
+
# Max number of registers that a thread may use.
|
318
|
+
# Option type: unsigned int
|
319
|
+
# Applies to: compiler only
|
320
|
+
CU_JIT_MAX_REGISTERS = 0
|
321
|
+
|
322
|
+
# IN: Specifies minimum number of threads per block to target compilation
|
323
|
+
# for
|
324
|
+
# OUT: Returns the number of threads the compiler actually targeted.
|
325
|
+
# This restricts the resource utilization fo the compiler (e.g. max
|
326
|
+
# registers) such that a block with the given number of threads should be
|
327
|
+
# able to launch based on register limitations. Note, this option does not
|
328
|
+
# currently take into account any other resource limitations, such as
|
329
|
+
# shared memory utilization.
|
330
|
+
# Cannot be combined with ::CU_JIT_TARGET.
|
331
|
+
# Option type: unsigned int
|
332
|
+
# Applies to: compiler only
|
333
|
+
CU_JIT_THREADS_PER_BLOCK = 1
|
334
|
+
|
335
|
+
# Overwrites the option value with the total wall clock time, in
|
336
|
+
# milliseconds, spent in the compiler and linker
|
337
|
+
# Option type: float
|
338
|
+
# Applies to: compiler and linker
|
339
|
+
CU_JIT_WALL_TIME = 2
|
340
|
+
|
341
|
+
# Pointer to a buffer in which to print any log messages
|
342
|
+
# that are informational in nature (the buffer size is specified via
|
343
|
+
# option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)
|
344
|
+
# Option type: char *
|
345
|
+
# Applies to: compiler and linker
|
346
|
+
CU_JIT_INFO_LOG_BUFFER = 3
|
347
|
+
|
348
|
+
# IN: Log buffer size in bytes. Log messages will be capped at this size
|
349
|
+
# (including null terminator)
|
350
|
+
# OUT: Amount of log buffer filled with messages
|
351
|
+
# Option type: unsigned int
|
352
|
+
# Applies to: compiler and linker
|
353
|
+
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
|
354
|
+
|
355
|
+
# Pointer to a buffer in which to print any log messages that
|
356
|
+
# reflect errors (the buffer size is specified via option
|
357
|
+
# ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)
|
358
|
+
# Option type: char *
|
359
|
+
# Applies to: compiler and linker
|
360
|
+
CU_JIT_ERROR_LOG_BUFFER = 5
|
361
|
+
|
362
|
+
# IN: Log buffer size in bytes. Log messages will be capped at this size
|
363
|
+
# (including null terminator)
|
364
|
+
# OUT: Amount of log buffer filled with messages
|
365
|
+
# Option type: unsigned int
|
366
|
+
# Applies to: compiler and linker
|
367
|
+
CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
|
368
|
+
|
369
|
+
# Level of optimizations to apply to generated code (0 - 4), with 4
|
370
|
+
# being the default and highest level of optimizations.
|
371
|
+
# Option type: unsigned int
|
372
|
+
# Applies to: compiler only
|
373
|
+
CU_JIT_OPTIMIZATION_LEVEL = 7
|
374
|
+
|
375
|
+
# No option value required. Determines the target based on the current
|
376
|
+
# attached context (default)
|
377
|
+
# Option type: No option value needed
|
378
|
+
# Applies to: compiler and linker
|
379
|
+
CU_JIT_TARGET_FROM_CUCONTEXT = 8
|
380
|
+
|
381
|
+
# Target is chosen based on supplied ::CUjit_target. Cannot be
|
382
|
+
# combined with ::CU_JIT_THREADS_PER_BLOCK.
|
383
|
+
# Option type: unsigned int for enumerated type ::CUjit_target
|
384
|
+
# Applies to: compiler and linker
|
385
|
+
CU_JIT_TARGET = 9
|
386
|
+
|
387
|
+
# Specifies choice of fallback strategy if matching cubin is not found.
|
388
|
+
# Choice is based on supplied ::CUjit_fallback.
|
389
|
+
# Option type: unsigned int for enumerated type ::CUjit_fallback
|
390
|
+
# Applies to: compiler only
|
391
|
+
CU_JIT_FALLBACK_STRATEGY = 10
|
392
|
+
|
393
|
+
# Specifies whether to create debug information in output (-g)
|
394
|
+
# (0: false, default)
|
395
|
+
# Option type: int
|
396
|
+
# Applies to: compiler and linker
|
397
|
+
CU_JIT_GENERATE_DEBUG_INFO = 11
|
398
|
+
|
399
|
+
# Generate verbose log messages (0: false, default)
|
400
|
+
# Option type: int
|
401
|
+
# Applies to: compiler and linker
|
402
|
+
CU_JIT_LOG_VERBOSE = 12
|
403
|
+
|
404
|
+
# Generate line number information (-lineinfo) (0: false, default)
|
405
|
+
# Option type: int
|
406
|
+
# Applies to: compiler only
|
407
|
+
CU_JIT_GENERATE_LINE_INFO = 13
|
408
|
+
|
409
|
+
# Specifies whether to enable caching explicitly (-dlcm)
|
410
|
+
# Choice is based on supplied ::CUjit_cacheMode_enum.
|
411
|
+
# Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum
|
412
|
+
# Applies to: compiler only
|
413
|
+
CU_JIT_CACHE_MODE = 14
|
414
|
+
|
415
|
+
|
416
|
+
# CUfunction_attribute
|
417
|
+
|
418
|
+
# The maximum number of threads per block, beyond which a launch of the
|
419
|
+
# function would fail. This number depends on both the function and the
|
420
|
+
# device on which the function is currently loaded.
|
421
|
+
CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0
|
422
|
+
|
423
|
+
# The size in bytes of statically-allocated shared memory required by
|
424
|
+
# this function. This does not include dynamically-allocated shared
|
425
|
+
# memory requested by the user at runtime.
|
426
|
+
CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1
|
427
|
+
|
428
|
+
# The size in bytes of user-allocated constant memory required by this
|
429
|
+
# function.
|
430
|
+
CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2
|
431
|
+
|
432
|
+
# The size in bytes of local memory used by each thread of this function.
|
433
|
+
CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3
|
434
|
+
|
435
|
+
# The number of registers used by each thread of this function.
|
436
|
+
CU_FUNC_ATTRIBUTE_NUM_REGS = 4
|
437
|
+
|
438
|
+
# The PTX virtual architecture version for which the function was
|
439
|
+
# compiled. This value is the major PTX version * 10 + the minor PTX
|
440
|
+
# version, so a PTX version 1.3 function would return the value 13.
|
441
|
+
# Note that this may return the undefined value of 0 for cubins
|
442
|
+
# compiled prior to CUDA 3.0.
|
443
|
+
CU_FUNC_ATTRIBUTE_PTX_VERSION = 5
|
444
|
+
|
445
|
+
# The binary architecture version for which the function was compiled.
|
446
|
+
# This value is the major binary version * 10 + the minor binary version,
|
447
|
+
# so a binary version 1.3 function would return the value 13. Note that
|
448
|
+
# this will return a value of 10 for legacy cubins that do not have a
|
449
|
+
# properly-encoded binary architecture version.
|
450
|
+
CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6
|
451
|
+
|
452
|
+
# The attribute to indicate whether the function has been compiled
|
453
|
+
# with user specified option "-Xptxas --dlcm=ca" set
|
454
|
+
CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7
|
455
|
+
|
456
|
+
# The maximum size in bytes of dynamically-allocated shared memory
|
457
|
+
# that can be used by this function. If the user-specified
|
458
|
+
# dynamic shared memory size is larger than this value,
|
459
|
+
# the launch will fail. See cuFuncSetAttribute, cuKernelSetAttribute
|
460
|
+
CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8
|
461
|
+
|
462
|
+
# On devices where the L1 cache and shared memory use the same
|
463
|
+
# hardware resources, this sets the shared memory carveout preference,
|
464
|
+
# in percent of the total shared memory. Refer to
|
465
|
+
# CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR.
|
466
|
+
# This is only a hint, and the driver can choose a different ratio
|
467
|
+
# if required to execute the function.
|
468
|
+
# See cuFuncSetAttribute, cuKernelSetAttribute
|
469
|
+
CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9
|
470
|
+
|
471
|
+
# If this attribute is set, the kernel must launch with a valid cluster
|
472
|
+
# size specified. See cuFuncSetAttribute, cuKernelSetAttribute
|
473
|
+
CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET = 10
|
474
|
+
|
475
|
+
# The required cluster width in blocks. The values must either all be 0
|
476
|
+
# or all be positive. The validity of the cluster dimensions
|
477
|
+
# is otherwise checked at launch time. If the value is set during
|
478
|
+
# compile time, it cannot be set at runtime.
|
479
|
+
# Setting it at runtime will return CUDA_ERROR_NOT_PERMITTED.
|
480
|
+
# See cuFuncSetAttribute, cuKernelSetAttribute
|
481
|
+
CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH = 11
|
482
|
+
|
483
|
+
# The required cluster height in blocks. The values must either all be 0
|
484
|
+
# or all be positive. The validity of the cluster dimensions
|
485
|
+
# is otherwise checked at launch time.If the value is set during
|
486
|
+
# compile time, it cannot be set at runtime.
|
487
|
+
# Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED.
|
488
|
+
# See cuFuncSetAttribute, cuKernelSetAttribute
|
489
|
+
CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT = 12
|
490
|
+
|
491
|
+
# The required cluster depth in blocks. The values must either all be 0
|
492
|
+
# or all be positive. The validity of the cluster dimensions
|
493
|
+
# is otherwise checked at launch time.If the value is set during
|
494
|
+
# compile time, it cannot be set at runtime.
|
495
|
+
# Setting it at runtime should return CUDA_ERROR_NOT_PERMITTED.
|
496
|
+
# See cuFuncSetAttribute, cuKernelSetAttribute
|
497
|
+
CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH = 13
|
498
|
+
|
499
|
+
# Whether the function can be launched with non-portable cluster size.
|
500
|
+
# 1 is allowed, 0 is disallowed. A non-portable cluster size may only
|
501
|
+
# function on the specific SKUs the program is tested on.
|
502
|
+
# The launch might fail if the program is run on a different hardware platform.
|
503
|
+
# For more details refer to link :
|
504
|
+
# https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES
|
505
|
+
CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED = 14
|
506
|
+
|
507
|
+
# The block scheduling policy of a function.
|
508
|
+
# The value type is CUclusterSchedulingPolicy / cudaClusterSchedulingPolicy.
|
509
|
+
# See cuFuncSetAttribute, cuKernelSetAttribute
|
510
|
+
CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE = 15
|
511
|
+
|
512
|
+
|
513
|
+
# Device attributes
|
514
|
+
|
515
|
+
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1
|
516
|
+
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2
|
517
|
+
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3
|
518
|
+
CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4
|
519
|
+
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5
|
520
|
+
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6
|
521
|
+
CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7
|
522
|
+
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8
|
523
|
+
CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9
|
524
|
+
CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10
|
525
|
+
CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11
|
526
|
+
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12
|
527
|
+
CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
|
528
|
+
CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14
|
529
|
+
CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15
|
530
|
+
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
|
531
|
+
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17
|
532
|
+
CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
|
533
|
+
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19
|
534
|
+
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
|
535
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_WIDTH = 21
|
536
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_WIDTH = 22
|
537
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_HEIGHT = 23
|
538
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH = 24
|
539
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT = 25
|
540
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH = 26
|
541
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_WIDTH = 27
|
542
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_HEIGHT = 28
|
543
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_LAYERS = 29
|
544
|
+
CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30
|
545
|
+
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31
|
546
|
+
CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32
|
547
|
+
CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33
|
548
|
+
CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34
|
549
|
+
CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
|
550
|
+
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
|
551
|
+
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37
|
552
|
+
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38
|
553
|
+
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTI_PROCESSOR = 39
|
554
|
+
CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40
|
555
|
+
CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41
|
556
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_WIDTH = 42
|
557
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_LAYERS = 43
|
558
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_WIDTH = 45
|
559
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_HEIGHT = 46
|
560
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH_ALT = 47
|
561
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT_ALT = 48
|
562
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH_ALT = 49
|
563
|
+
CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50
|
564
|
+
CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51
|
565
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_WIDTH = 52
|
566
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_WIDTH = 53
|
567
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_LAYERS = 54
|
568
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_WIDTH = 55
|
569
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_WIDTH = 56
|
570
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_HEIGHT = 57
|
571
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_WIDTH = 58
|
572
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_HEIGHT = 59
|
573
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_DEPTH = 60
|
574
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_WIDTH = 61
|
575
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_LAYERS = 62
|
576
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_WIDTH = 63
|
577
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_HEIGHT = 64
|
578
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_LAYERS = 65
|
579
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_WIDTH = 66
|
580
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_WIDTH = 67
|
581
|
+
CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_LAYERS = 68
|
582
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LINEAR_WIDTH = 69
|
583
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_WIDTH = 70
|
584
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_HEIGHT = 71
|
585
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_PITCH = 72
|
586
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_MIPMAPPED_WIDTH = 73
|
587
|
+
CU_DEVICE_ATTRIBUTE_MAX_MAX_TEXTURE_2D_MIPMAPPED_HEIGHT = 74
|
588
|
+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75
|
589
|
+
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
|
590
|
+
CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_MIPMAPPED_WIDTH = 77
|
591
|
+
CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78
|
592
|
+
CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79
|
593
|
+
CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80
|
594
|
+
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81
|
595
|
+
CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
|
596
|
+
CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83
|
597
|
+
CU_DEVICE_ATTRIBUTE_IS_MULTI_GPU_BOARD = 84
|
598
|
+
CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85
|
599
|
+
CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86
|
600
|
+
CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87
|
601
|
+
CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
|
602
|
+
CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89
|
603
|
+
CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90
|
604
|
+
CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91
|
605
|
+
CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95
|
606
|
+
CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96
|
607
|
+
CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97
|
@@ -0,0 +1,36 @@
|
|
1
|
+
class CudaDriverError(Exception):
|
2
|
+
pass
|
3
|
+
|
4
|
+
|
5
|
+
class CudaRuntimeError(Exception):
|
6
|
+
pass
|
7
|
+
|
8
|
+
|
9
|
+
class CudaSupportError(ImportError):
|
10
|
+
pass
|
11
|
+
|
12
|
+
|
13
|
+
class NvvmError(Exception):
|
14
|
+
def __str__(self):
|
15
|
+
return '\n'.join(map(str, self.args))
|
16
|
+
|
17
|
+
|
18
|
+
class NvvmSupportError(ImportError):
|
19
|
+
pass
|
20
|
+
|
21
|
+
|
22
|
+
class NvvmWarning(Warning):
|
23
|
+
pass
|
24
|
+
|
25
|
+
|
26
|
+
class NvrtcError(Exception):
|
27
|
+
def __str__(self):
|
28
|
+
return '\n'.join(map(str, self.args))
|
29
|
+
|
30
|
+
|
31
|
+
class NvrtcCompilationError(NvrtcError):
|
32
|
+
pass
|
33
|
+
|
34
|
+
|
35
|
+
class NvrtcSupportError(ImportError):
|
36
|
+
pass
|