nvidia-cudnn-cu13 9.12.0.46__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nvidia/cudnn/bin/cudnn64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_adv64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_engines_precompiled64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_heuristic64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_ops64_9.dll +0 -0
- nvidia/cudnn/include/cudnn.h +68 -0
- nvidia/cudnn/include/cudnn_adv.h +669 -0
- nvidia/cudnn/include/cudnn_backend.h +60 -0
- nvidia/cudnn/include/cudnn_cnn.h +693 -0
- nvidia/cudnn/include/cudnn_graph.h +996 -0
- nvidia/cudnn/include/cudnn_ops.h +1316 -0
- nvidia/cudnn/include/cudnn_version.h +70 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/METADATA +47 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/RECORD +20 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/WHEEL +5 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/licenses/License.txt +154 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,996 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* NOTICE TO LICENSEE:
|
|
5
|
+
*
|
|
6
|
+
* This source code and/or documentation ("Licensed Deliverables") are
|
|
7
|
+
* subject to NVIDIA intellectual property rights under U.S. and
|
|
8
|
+
* international Copyright laws.
|
|
9
|
+
*
|
|
10
|
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
|
11
|
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
|
12
|
+
* conditions of a form of NVIDIA software license agreement by and
|
|
13
|
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
|
14
|
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
|
15
|
+
* the contrary in the License Agreement, reproduction or disclosure
|
|
16
|
+
* of the Licensed Deliverables to any third party without the express
|
|
17
|
+
* written consent of NVIDIA is prohibited.
|
|
18
|
+
*
|
|
19
|
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
20
|
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
|
21
|
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
|
22
|
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
|
23
|
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
|
24
|
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
26
|
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
27
|
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
|
28
|
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
29
|
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
30
|
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
31
|
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
|
32
|
+
* OF THESE LICENSED DELIVERABLES.
|
|
33
|
+
*
|
|
34
|
+
* U.S. Government End Users. These Licensed Deliverables are a
|
|
35
|
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
|
36
|
+
* 1995), consisting of "commercial computer software" and "commercial
|
|
37
|
+
* computer software documentation" as such terms are used in 48
|
|
38
|
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
|
39
|
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
|
40
|
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
|
41
|
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
|
42
|
+
* only those rights set forth herein.
|
|
43
|
+
*
|
|
44
|
+
* Any use of the Licensed Deliverables in individual and commercial
|
|
45
|
+
* software must include, in the user documentation and internal
|
|
46
|
+
* comments to the code, the above Disclaimer and U.S. Government End
|
|
47
|
+
* Users Notice.
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
/*
|
|
51
|
+
* cudnn_graph : cuDNN's basic definitions operations.
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
#if !defined(CUDNN_GRAPH_H_)
|
|
55
|
+
#define CUDNN_GRAPH_H_
|
|
56
|
+
|
|
57
|
+
#include <cuda_runtime_api.h>
|
|
58
|
+
#include <library_types.h>
|
|
59
|
+
|
|
60
|
+
#include <stdint.h>
|
|
61
|
+
|
|
62
|
+
#include "cudnn_version.h"
|
|
63
|
+
|
|
64
|
+
/* These version numbers are autogenerated, do not edit manually. */
|
|
65
|
+
#define CUDNN_GRAPH_MAJOR 9
|
|
66
|
+
#define CUDNN_GRAPH_MINOR 12
|
|
67
|
+
#define CUDNN_GRAPH_PATCH 0
|
|
68
|
+
|
|
69
|
+
#if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL)
|
|
70
|
+
#error Version mismatch in cuDNN GRAPH!!!
|
|
71
|
+
#endif
|
|
72
|
+
|
|
73
|
+
#ifndef CUDNNWINAPI
|
|
74
|
+
#ifdef _WIN32
|
|
75
|
+
#define CUDNNWINAPI __stdcall
|
|
76
|
+
#else
|
|
77
|
+
#define CUDNNWINAPI
|
|
78
|
+
#endif
|
|
79
|
+
#endif
|
|
80
|
+
|
|
81
|
+
/* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
|
|
82
|
+
#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
|
|
83
|
+
/* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
|
|
84
|
+
#define CUDNN_DEPRECATED __attribute__((deprecated))
|
|
85
|
+
#define CUDNN_DEPRECATED_ENUM __attribute__((deprecated))
|
|
86
|
+
#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
|
|
87
|
+
/* Microsoft Visual C++ */
|
|
88
|
+
#define CUDNN_DEPRECATED __declspec(deprecated)
|
|
89
|
+
#define CUDNN_DEPRECATED_ENUM __declspec(deprecated)
|
|
90
|
+
#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
|
|
91
|
+
/* C++14 compilers */
|
|
92
|
+
#define CUDNN_DEPRECATED [[deprecated]]
|
|
93
|
+
#define CUDNN_DEPRECATED_ENUM [[deprecated]]
|
|
94
|
+
#else
|
|
95
|
+
/* No support for the deprecated attribute */
|
|
96
|
+
#define CUDNN_DEPRECATED
|
|
97
|
+
#define CUDNN_DEPRECATED_ENUM
|
|
98
|
+
#endif
|
|
99
|
+
|
|
100
|
+
#if defined(__cplusplus)
|
|
101
|
+
extern "C" {
|
|
102
|
+
#endif
|
|
103
|
+
|
|
104
|
+
struct cudnnContext;
|
|
105
|
+
typedef struct cudnnContext *cudnnHandle_t;
|
|
106
|
+
|
|
107
|
+
size_t CUDNNWINAPI
|
|
108
|
+
cudnnGetVersion(void);
|
|
109
|
+
|
|
110
|
+
size_t CUDNNWINAPI
|
|
111
|
+
cudnnGetMaxDeviceVersion(void);
|
|
112
|
+
|
|
113
|
+
/* Returns CUDA Runtime version statically linked against cudnn */
|
|
114
|
+
size_t CUDNNWINAPI
|
|
115
|
+
cudnnGetCudartVersion(void);
|
|
116
|
+
|
|
117
|
+
/*
|
|
118
|
+
* CUDNN return codes
|
|
119
|
+
*/
|
|
120
|
+
typedef enum {
|
|
121
|
+
CUDNN_STATUS_SUCCESS = 0,
|
|
122
|
+
|
|
123
|
+
/* Uncategorized errors */
|
|
124
|
+
CUDNN_STATUS_NOT_INITIALIZED = 1001,
|
|
125
|
+
CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH = 1002,
|
|
126
|
+
CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH = 1003,
|
|
127
|
+
CUDNN_STATUS_DEPRECATED = 1004,
|
|
128
|
+
CUDNN_STATUS_LICENSE_ERROR = 1005,
|
|
129
|
+
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 1006,
|
|
130
|
+
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 1007,
|
|
131
|
+
CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED = 1008,
|
|
132
|
+
|
|
133
|
+
CUDNN_STATUS_BAD_PARAM = 2000,
|
|
134
|
+
CUDNN_STATUS_BAD_PARAM_NULL_POINTER = 2002,
|
|
135
|
+
CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER = 2003,
|
|
136
|
+
CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED = 2004,
|
|
137
|
+
CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND = 2005,
|
|
138
|
+
CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT = 2006,
|
|
139
|
+
CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH = 2007,
|
|
140
|
+
CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH = 2008,
|
|
141
|
+
CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES = 2009,
|
|
142
|
+
CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE = 2010,
|
|
143
|
+
CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011,
|
|
144
|
+
CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE = 2012,
|
|
145
|
+
|
|
146
|
+
CUDNN_STATUS_NOT_SUPPORTED = 3000,
|
|
147
|
+
CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN = 3001,
|
|
148
|
+
CUDNN_STATUS_NOT_SUPPORTED_SHAPE = 3002,
|
|
149
|
+
CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE = 3003,
|
|
150
|
+
CUDNN_STATUS_NOT_SUPPORTED_LAYOUT = 3004,
|
|
151
|
+
CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER = 3005,
|
|
152
|
+
CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART = 3006,
|
|
153
|
+
CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH = 3007,
|
|
154
|
+
CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING = 3008,
|
|
155
|
+
CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE = 3009,
|
|
156
|
+
CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT = 3010,
|
|
157
|
+
CUDNN_STATUS_NOT_SUPPORTED_PADDING = 3011,
|
|
158
|
+
CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM = 3012,
|
|
159
|
+
CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API = 3013,
|
|
160
|
+
|
|
161
|
+
CUDNN_STATUS_INTERNAL_ERROR = 4000,
|
|
162
|
+
CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED = 4001,
|
|
163
|
+
CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE = 4002,
|
|
164
|
+
CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED = 4003,
|
|
165
|
+
CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED = 4004,
|
|
166
|
+
CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM = 4005,
|
|
167
|
+
CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED = 4006,
|
|
168
|
+
|
|
169
|
+
CUDNN_STATUS_EXECUTION_FAILED = 5000,
|
|
170
|
+
CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER = 5001,
|
|
171
|
+
CUDNN_STATUS_EXECUTION_FAILED_CUBLAS = 5002,
|
|
172
|
+
CUDNN_STATUS_EXECUTION_FAILED_CUDART = 5003,
|
|
173
|
+
CUDNN_STATUS_EXECUTION_FAILED_CURAND = 5004,
|
|
174
|
+
|
|
175
|
+
CUDNN_STATUS_ALLOC_FAILED CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED,
|
|
176
|
+
CUDNN_STATUS_INVALID_VALUE CUDNN_DEPRECATED_ENUM = 2001 /* please transition to CUDNN_STATUS_BAD_PARAM instead */,
|
|
177
|
+
CUDNN_STATUS_ARCH_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH,
|
|
178
|
+
CUDNN_STATUS_MAPPING_ERROR CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED,
|
|
179
|
+
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING CUDNN_DEPRECATED_ENUM =
|
|
180
|
+
CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING,
|
|
181
|
+
CUDNN_STATUS_VERSION_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH,
|
|
182
|
+
} cudnnStatus_t;
|
|
183
|
+
|
|
184
|
+
#define CUDNN_STATUS_FULL_ERROR_CODE(category, specific_err) ((cudnnStatus_t)(0 + (category) + (specific_err)))
|
|
185
|
+
#define CUDNN_STATUS_CATEGORY(full_error_code) ((full_error_code) / 1000 * 1000)
|
|
186
|
+
#define CUDNN_STATUS_SPECIFIC_ERROR(full_error_code) ((full_error_code) % 1000)
|
|
187
|
+
|
|
188
|
+
/* human-readable error messages */
|
|
189
|
+
const char *CUDNNWINAPI
|
|
190
|
+
cudnnGetErrorString(cudnnStatus_t status);
|
|
191
|
+
|
|
192
|
+
void CUDNNWINAPI
|
|
193
|
+
cudnnGetLastErrorString(char *message, size_t max_size);
|
|
194
|
+
|
|
195
|
+
/* Forward definition in this version only */
|
|
196
|
+
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t CUDNN_DEPRECATED;
|
|
197
|
+
|
|
198
|
+
typedef enum {
|
|
199
|
+
CUDNN_ERRQUERY_RAWCODE = 0,
|
|
200
|
+
CUDNN_ERRQUERY_NONBLOCKING = 1,
|
|
201
|
+
CUDNN_ERRQUERY_BLOCKING = 2,
|
|
202
|
+
} cudnnErrQueryMode_t;
|
|
203
|
+
|
|
204
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
205
|
+
cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
|
|
206
|
+
|
|
207
|
+
cudnnStatus_t CUDNNWINAPI
|
|
208
|
+
cudnnGetProperty(libraryPropertyType type, int *value);
|
|
209
|
+
|
|
210
|
+
cudnnStatus_t CUDNNWINAPI
|
|
211
|
+
cudnnCreate(cudnnHandle_t *handle);
|
|
212
|
+
cudnnStatus_t CUDNNWINAPI
|
|
213
|
+
cudnnDestroy(cudnnHandle_t handle);
|
|
214
|
+
cudnnStatus_t CUDNNWINAPI
|
|
215
|
+
cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
|
216
|
+
cudnnStatus_t CUDNNWINAPI
|
|
217
|
+
cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
|
|
218
|
+
/*
|
|
219
|
+
* CUDNN data type
|
|
220
|
+
*/
|
|
221
|
+
typedef enum {
|
|
222
|
+
CUDNN_DATA_FLOAT = 0,
|
|
223
|
+
CUDNN_DATA_DOUBLE = 1,
|
|
224
|
+
CUDNN_DATA_HALF = 2,
|
|
225
|
+
CUDNN_DATA_INT8 = 3,
|
|
226
|
+
CUDNN_DATA_INT32 = 4,
|
|
227
|
+
CUDNN_DATA_INT8x4 CUDNN_DEPRECATED_ENUM = 5,
|
|
228
|
+
CUDNN_DATA_UINT8 = 6,
|
|
229
|
+
CUDNN_DATA_UINT8x4 CUDNN_DEPRECATED_ENUM = 7,
|
|
230
|
+
CUDNN_DATA_INT8x32 CUDNN_DEPRECATED_ENUM = 8,
|
|
231
|
+
CUDNN_DATA_BFLOAT16 = 9,
|
|
232
|
+
CUDNN_DATA_INT64 = 10,
|
|
233
|
+
CUDNN_DATA_BOOLEAN = 11,
|
|
234
|
+
CUDNN_DATA_FP8_E4M3 = 12,
|
|
235
|
+
CUDNN_DATA_FP8_E5M2 = 13,
|
|
236
|
+
CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14,
|
|
237
|
+
CUDNN_DATA_FP8_E8M0 = 15,
|
|
238
|
+
CUDNN_DATA_FP4_E2M1 = 16,
|
|
239
|
+
CUDNN_DATA_INT4 = 17,
|
|
240
|
+
CUDNN_DATA_UINT4 = 18,
|
|
241
|
+
CUDNN_DATA_UINT32 = 19,
|
|
242
|
+
} cudnnDataType_t;
|
|
243
|
+
|
|
244
|
+
/*
|
|
245
|
+
* CUDNN math type
|
|
246
|
+
*/
|
|
247
|
+
typedef enum {
|
|
248
|
+
CUDNN_DEFAULT_MATH = 0,
|
|
249
|
+
CUDNN_TENSOR_OP_MATH = 1,
|
|
250
|
+
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
|
|
251
|
+
CUDNN_FMA_MATH = 3,
|
|
252
|
+
} cudnnMathType_t;
|
|
253
|
+
|
|
254
|
+
/*
|
|
255
|
+
* CUDNN propagate Nan
|
|
256
|
+
*/
|
|
257
|
+
typedef enum {
|
|
258
|
+
CUDNN_NOT_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 0,
|
|
259
|
+
CUDNN_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 1,
|
|
260
|
+
} cudnnNanPropagation_t;
|
|
261
|
+
|
|
262
|
+
/*
|
|
263
|
+
* Behavior for OOB samples. OOB samples are samples where L+R > T is encountered during the gradient calculation. If
|
|
264
|
+
* gradMode is set to CUDNN_CTC_SKIP_OOB_GRADIENTS, then the CTC loss function does not write to the gradient buffer for
|
|
265
|
+
* that sample. Instead, the current values, even not finite, are retained. If gradMode is set to
|
|
266
|
+
* CUDNN_CTC_ZERO_OOB_GRADIENTS, then the gradient for that sample is set to zero. This guarantees a finite gradient.
|
|
267
|
+
*/
|
|
268
|
+
typedef enum {
|
|
269
|
+
CUDNN_CTC_ZERO_OOB_GRADIENTS = 0,
|
|
270
|
+
CUDNN_CTC_SKIP_OOB_GRADIENTS = 1,
|
|
271
|
+
} cudnnCTCGradMode_t;
|
|
272
|
+
|
|
273
|
+
typedef enum {
|
|
274
|
+
CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
|
|
275
|
+
CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
|
|
276
|
+
CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
|
|
277
|
+
} cudnnTensorFormat_t;
|
|
278
|
+
|
|
279
|
+
/*
|
|
280
|
+
* CUDNN ReduceTensor op type
|
|
281
|
+
*/
|
|
282
|
+
typedef enum {
|
|
283
|
+
CUDNN_REDUCE_TENSOR_ADD = 0,
|
|
284
|
+
CUDNN_REDUCE_TENSOR_MUL = 1,
|
|
285
|
+
CUDNN_REDUCE_TENSOR_MIN = 2,
|
|
286
|
+
CUDNN_REDUCE_TENSOR_MAX = 3,
|
|
287
|
+
CUDNN_REDUCE_TENSOR_AMAX = 4,
|
|
288
|
+
CUDNN_REDUCE_TENSOR_AVG = 5,
|
|
289
|
+
CUDNN_REDUCE_TENSOR_NORM1 = 6,
|
|
290
|
+
CUDNN_REDUCE_TENSOR_NORM2 = 7,
|
|
291
|
+
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
|
|
292
|
+
} cudnnReduceTensorOp_t;
|
|
293
|
+
|
|
294
|
+
/*
|
|
295
|
+
* activation mode
|
|
296
|
+
*/
|
|
297
|
+
typedef enum {
|
|
298
|
+
CUDNN_ACTIVATION_SIGMOID = 0,
|
|
299
|
+
CUDNN_ACTIVATION_RELU = 1,
|
|
300
|
+
CUDNN_ACTIVATION_TANH = 2,
|
|
301
|
+
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
|
|
302
|
+
CUDNN_ACTIVATION_ELU = 4,
|
|
303
|
+
CUDNN_ACTIVATION_IDENTITY = 5,
|
|
304
|
+
CUDNN_ACTIVATION_SWISH = 6
|
|
305
|
+
} cudnnActivationMode_t CUDNN_DEPRECATED;
|
|
306
|
+
|
|
307
|
+
typedef enum {
|
|
308
|
+
CUDNN_SEV_FATAL = 0,
|
|
309
|
+
CUDNN_SEV_ERROR = 1,
|
|
310
|
+
CUDNN_SEV_WARNING = 2,
|
|
311
|
+
CUDNN_SEV_INFO = 3,
|
|
312
|
+
} cudnnSeverity_t;
|
|
313
|
+
|
|
314
|
+
/* Message masks to be used with cudnnSetCallback() */
|
|
315
|
+
#define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
|
|
316
|
+
#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
|
|
317
|
+
#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
|
|
318
|
+
|
|
319
|
+
/* struct containing useful informaiton for each API call */
|
|
320
|
+
typedef struct cudnnDebugStruct {
|
|
321
|
+
unsigned cudnn_version;
|
|
322
|
+
cudnnStatus_t cudnnStatus;
|
|
323
|
+
unsigned time_sec; /* epoch time in seconds */
|
|
324
|
+
unsigned time_usec; /* microseconds part of epoch time */
|
|
325
|
+
unsigned time_delta; /* time since start in seconds */
|
|
326
|
+
cudnnHandle_t handle; /* cudnn handle */
|
|
327
|
+
cudaStream_t stream; /* cuda stream ID */
|
|
328
|
+
unsigned long long pid; /* process ID */
|
|
329
|
+
unsigned long long tid; /* thread ID */
|
|
330
|
+
int cudaDeviceId; /* CUDA device ID */
|
|
331
|
+
int reserved[15]; /* reserved for future use */
|
|
332
|
+
} cudnnDebug_t;
|
|
333
|
+
|
|
334
|
+
typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
|
|
335
|
+
|
|
336
|
+
cudnnStatus_t CUDNNWINAPI
|
|
337
|
+
cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
|
|
338
|
+
|
|
339
|
+
cudnnStatus_t CUDNNWINAPI
|
|
340
|
+
cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
|
|
341
|
+
|
|
342
|
+
/*
|
|
343
|
+
* \brief Cross-library version checker.
|
|
344
|
+
* This function is implemented differently in each sub-library. Each sublib
|
|
345
|
+
* checks whether its own version matches that of its dependencies.
|
|
346
|
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
|
347
|
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
|
348
|
+
*/
|
|
349
|
+
cudnnStatus_t CUDNNWINAPI
|
|
350
|
+
cudnnGraphVersionCheck(void);
|
|
351
|
+
|
|
352
|
+
/* Maximum supported number of tensor dimensions */
|
|
353
|
+
#define CUDNN_DIM_MAX 8
|
|
354
|
+
|
|
355
|
+
/*
|
|
356
|
+
* convolution mode
|
|
357
|
+
*/
|
|
358
|
+
typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
|
|
359
|
+
|
|
360
|
+
/*
|
|
361
|
+
* CUDNN Reorder
|
|
362
|
+
*/
|
|
363
|
+
typedef enum {
|
|
364
|
+
CUDNN_DEFAULT_REORDER = 0,
|
|
365
|
+
CUDNN_NO_REORDER = 1,
|
|
366
|
+
} cudnnReorderType_t CUDNN_DEPRECATED;
|
|
367
|
+
|
|
368
|
+
typedef void *cudnnBackendDescriptor_t;
|
|
369
|
+
|
|
370
|
+
typedef struct cudnnFractionStruct {
|
|
371
|
+
int64_t numerator;
|
|
372
|
+
int64_t denominator;
|
|
373
|
+
} cudnnFraction_t;
|
|
374
|
+
|
|
375
|
+
typedef enum {
|
|
376
|
+
CUDNN_POINTWISE_ADD = 0,
|
|
377
|
+
CUDNN_POINTWISE_ADD_SQUARE = 5,
|
|
378
|
+
CUDNN_POINTWISE_DIV = 6,
|
|
379
|
+
CUDNN_POINTWISE_MAX = 3,
|
|
380
|
+
CUDNN_POINTWISE_MIN = 2,
|
|
381
|
+
CUDNN_POINTWISE_MOD = 7,
|
|
382
|
+
CUDNN_POINTWISE_MUL = 1,
|
|
383
|
+
CUDNN_POINTWISE_POW = 8,
|
|
384
|
+
CUDNN_POINTWISE_SUB = 9,
|
|
385
|
+
|
|
386
|
+
CUDNN_POINTWISE_ABS = 10,
|
|
387
|
+
CUDNN_POINTWISE_CEIL = 11,
|
|
388
|
+
CUDNN_POINTWISE_COS = 12,
|
|
389
|
+
CUDNN_POINTWISE_EXP = 13,
|
|
390
|
+
CUDNN_POINTWISE_FLOOR = 14,
|
|
391
|
+
CUDNN_POINTWISE_LOG = 15,
|
|
392
|
+
CUDNN_POINTWISE_NEG = 16,
|
|
393
|
+
CUDNN_POINTWISE_RSQRT = 17,
|
|
394
|
+
CUDNN_POINTWISE_SIN = 18,
|
|
395
|
+
CUDNN_POINTWISE_SQRT = 4,
|
|
396
|
+
CUDNN_POINTWISE_TAN = 19,
|
|
397
|
+
CUDNN_POINTWISE_ERF = 20,
|
|
398
|
+
CUDNN_POINTWISE_IDENTITY = 21,
|
|
399
|
+
CUDNN_POINTWISE_RECIPROCAL = 22,
|
|
400
|
+
CUDNN_POINTWISE_ATAN2 = 23,
|
|
401
|
+
|
|
402
|
+
CUDNN_POINTWISE_RELU_FWD = 100,
|
|
403
|
+
CUDNN_POINTWISE_TANH_FWD = 101,
|
|
404
|
+
CUDNN_POINTWISE_SIGMOID_FWD = 102,
|
|
405
|
+
CUDNN_POINTWISE_ELU_FWD = 103,
|
|
406
|
+
CUDNN_POINTWISE_GELU_FWD = 104,
|
|
407
|
+
CUDNN_POINTWISE_SOFTPLUS_FWD = 105,
|
|
408
|
+
CUDNN_POINTWISE_SWISH_FWD = 106,
|
|
409
|
+
CUDNN_POINTWISE_GELU_APPROX_TANH_FWD = 107,
|
|
410
|
+
|
|
411
|
+
CUDNN_POINTWISE_RELU_BWD = 200,
|
|
412
|
+
CUDNN_POINTWISE_TANH_BWD = 201,
|
|
413
|
+
CUDNN_POINTWISE_SIGMOID_BWD = 202,
|
|
414
|
+
CUDNN_POINTWISE_ELU_BWD = 203,
|
|
415
|
+
CUDNN_POINTWISE_GELU_BWD = 204,
|
|
416
|
+
CUDNN_POINTWISE_SOFTPLUS_BWD = 205,
|
|
417
|
+
CUDNN_POINTWISE_SWISH_BWD = 206,
|
|
418
|
+
CUDNN_POINTWISE_GELU_APPROX_TANH_BWD = 207,
|
|
419
|
+
|
|
420
|
+
CUDNN_POINTWISE_CMP_EQ = 300,
|
|
421
|
+
CUDNN_POINTWISE_CMP_NEQ = 301,
|
|
422
|
+
CUDNN_POINTWISE_CMP_GT = 302,
|
|
423
|
+
CUDNN_POINTWISE_CMP_GE = 303,
|
|
424
|
+
CUDNN_POINTWISE_CMP_LT = 304,
|
|
425
|
+
CUDNN_POINTWISE_CMP_LE = 305,
|
|
426
|
+
|
|
427
|
+
CUDNN_POINTWISE_LOGICAL_AND = 400,
|
|
428
|
+
CUDNN_POINTWISE_LOGICAL_OR = 401,
|
|
429
|
+
CUDNN_POINTWISE_LOGICAL_NOT = 402,
|
|
430
|
+
|
|
431
|
+
CUDNN_POINTWISE_GEN_INDEX = 501,
|
|
432
|
+
|
|
433
|
+
CUDNN_POINTWISE_BINARY_SELECT = 601,
|
|
434
|
+
} cudnnPointwiseMode_t;
|
|
435
|
+
|
|
436
|
+
typedef enum {
|
|
437
|
+
CUDNN_RESAMPLE_NEAREST = 0,
|
|
438
|
+
CUDNN_RESAMPLE_BILINEAR = 1,
|
|
439
|
+
CUDNN_RESAMPLE_AVGPOOL = 2,
|
|
440
|
+
CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING = 2,
|
|
441
|
+
CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING = 4,
|
|
442
|
+
CUDNN_RESAMPLE_MAXPOOL = 3,
|
|
443
|
+
} cudnnResampleMode_t;
|
|
444
|
+
|
|
445
|
+
typedef enum {
|
|
446
|
+
CUDNN_SIGNAL_SET = 0,
|
|
447
|
+
CUDNN_SIGNAL_WAIT = 1,
|
|
448
|
+
} cudnnSignalMode_t;
|
|
449
|
+
|
|
450
|
+
typedef enum {
|
|
451
|
+
CUDNN_GENSTATS_SUM_SQSUM = 0,
|
|
452
|
+
} cudnnGenStatsMode_t;
|
|
453
|
+
|
|
454
|
+
typedef enum {
|
|
455
|
+
CUDNN_BN_FINALIZE_STATISTICS_TRAINING = 0,
|
|
456
|
+
CUDNN_BN_FINALIZE_STATISTICS_INFERENCE = 1,
|
|
457
|
+
} cudnnBnFinalizeStatsMode_t;
|
|
458
|
+
|
|
459
|
+
typedef enum {
|
|
460
|
+
CUDNN_RNG_DISTRIBUTION_BERNOULLI = 0,
|
|
461
|
+
CUDNN_RNG_DISTRIBUTION_UNIFORM = 1,
|
|
462
|
+
CUDNN_RNG_DISTRIBUTION_NORMAL = 2,
|
|
463
|
+
} cudnnRngDistribution_t;
|
|
464
|
+
|
|
465
|
+
typedef enum {
|
|
466
|
+
CUDNN_ATTR_POINTWISE_MODE = 0,
|
|
467
|
+
CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
|
|
468
|
+
CUDNN_ATTR_POINTWISE_NAN_PROPAGATION CUDNN_DEPRECATED_ENUM = 2,
|
|
469
|
+
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
|
|
470
|
+
CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
|
|
471
|
+
CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE = 5,
|
|
472
|
+
CUDNN_ATTR_POINTWISE_ELU_ALPHA = 6,
|
|
473
|
+
CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA = 7,
|
|
474
|
+
CUDNN_ATTR_POINTWISE_SWISH_BETA = 8,
|
|
475
|
+
CUDNN_ATTR_POINTWISE_AXIS = 9,
|
|
476
|
+
|
|
477
|
+
CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
|
|
478
|
+
CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
|
|
479
|
+
CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
|
|
480
|
+
CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
|
|
481
|
+
CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
|
|
482
|
+
CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
|
|
483
|
+
CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
|
|
484
|
+
|
|
485
|
+
CUDNN_ATTR_ENGINEHEUR_MODE = 200,
|
|
486
|
+
CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
|
|
487
|
+
CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
|
|
488
|
+
CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET = 203,
|
|
489
|
+
CUDNN_ATTR_ENGINEHEUR_DEVICEPROP = 204,
|
|
490
|
+
|
|
491
|
+
CUDNN_ATTR_ENGINECFG_ENGINE = 300,
|
|
492
|
+
CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
|
|
493
|
+
CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
|
|
494
|
+
CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE = 303,
|
|
495
|
+
CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED = 304,
|
|
496
|
+
|
|
497
|
+
CUDNN_ATTR_EXECUTION_PLAN_HANDLE CUDNN_DEPRECATED_ENUM = 400,
|
|
498
|
+
CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
|
|
499
|
+
CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
|
|
500
|
+
CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
|
|
501
|
+
CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
|
|
502
|
+
CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405,
|
|
503
|
+
CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406,
|
|
504
|
+
CUDNN_ATTR_EXECUTION_PLAN_DEVICEPROP = 407,
|
|
505
|
+
|
|
506
|
+
CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
|
|
507
|
+
CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
|
|
508
|
+
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
|
|
509
|
+
CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
|
|
510
|
+
|
|
511
|
+
CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
|
|
512
|
+
CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
|
|
513
|
+
|
|
514
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
|
|
515
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
|
|
516
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
|
|
517
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
|
|
518
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
|
|
519
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
|
|
520
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
|
|
521
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
|
|
522
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
|
|
523
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
|
|
524
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
|
|
525
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
|
|
526
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
|
|
527
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
|
|
528
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
|
|
529
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
|
|
530
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
|
|
531
|
+
CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
|
|
532
|
+
|
|
533
|
+
CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
|
|
534
|
+
CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
|
|
535
|
+
CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
|
|
536
|
+
CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
|
|
537
|
+
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
|
|
538
|
+
CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
|
|
539
|
+
CUDNN_ATTR_OPERATION_POINTWISE_DXDESC = 756,
|
|
540
|
+
CUDNN_ATTR_OPERATION_POINTWISE_DYDESC = 757,
|
|
541
|
+
CUDNN_ATTR_OPERATION_POINTWISE_TDESC = 758,
|
|
542
|
+
|
|
543
|
+
CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
|
|
544
|
+
CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
|
|
545
|
+
CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
|
|
546
|
+
CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
|
|
547
|
+
CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
|
|
548
|
+
|
|
549
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE = 780,
|
|
550
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC = 781,
|
|
551
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC = 782,
|
|
552
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC = 783,
|
|
553
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC = 784,
|
|
554
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC = 785,
|
|
555
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC = 786,
|
|
556
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC = 787,
|
|
557
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC = 788,
|
|
558
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC = 789,
|
|
559
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC = 790,
|
|
560
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC = 791,
|
|
561
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC = 792,
|
|
562
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC = 793,
|
|
563
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC = 794,
|
|
564
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC = 795,
|
|
565
|
+
CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC = 796,
|
|
566
|
+
|
|
567
|
+
CUDNN_ATTR_OPERATIONGRAPH_HANDLE CUDNN_DEPRECATED_ENUM = 800,
|
|
568
|
+
CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
|
|
569
|
+
CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
|
|
570
|
+
CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803,
|
|
571
|
+
CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY = 804,
|
|
572
|
+
|
|
573
|
+
CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
|
|
574
|
+
CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
|
|
575
|
+
CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
|
|
576
|
+
CUDNN_ATTR_TENSOR_STRIDES = 903,
|
|
577
|
+
CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
|
|
578
|
+
CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
|
|
579
|
+
CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
|
|
580
|
+
CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
|
|
581
|
+
CUDNN_ATTR_TENSOR_IS_BY_VALUE = 908,
|
|
582
|
+
CUDNN_ATTR_TENSOR_REORDERING_MODE = 909,
|
|
583
|
+
CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC = 913,
|
|
584
|
+
|
|
585
|
+
CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
|
|
586
|
+
CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
|
|
587
|
+
CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
|
|
588
|
+
CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
|
|
589
|
+
|
|
590
|
+
CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
|
|
591
|
+
CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
|
|
592
|
+
|
|
593
|
+
CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
|
|
594
|
+
CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
|
|
595
|
+
CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
|
|
596
|
+
CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
|
|
597
|
+
|
|
598
|
+
CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
|
|
599
|
+
CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
|
|
600
|
+
CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
|
|
601
|
+
CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
|
|
602
|
+
CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
|
|
603
|
+
CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305,
|
|
604
|
+
CUDNN_ATTR_ENGINE_SM_COUNT_TARGET = 1306,
|
|
605
|
+
CUDNN_ATTR_ENGINE_DEVICEPROP = 1307,
|
|
606
|
+
|
|
607
|
+
CUDNN_ATTR_MATMUL_COMP_TYPE = 1500,
|
|
608
|
+
CUDNN_ATTR_MATMUL_PADDING_VALUE = 1503,
|
|
609
|
+
|
|
610
|
+
CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520,
|
|
611
|
+
CUDNN_ATTR_OPERATION_MATMUL_BDESC = 1521,
|
|
612
|
+
CUDNN_ATTR_OPERATION_MATMUL_CDESC = 1522,
|
|
613
|
+
CUDNN_ATTR_OPERATION_MATMUL_DESC = 1523,
|
|
614
|
+
CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT CUDNN_DEPRECATED_ENUM = 1524,
|
|
615
|
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC = 1525,
|
|
616
|
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC = 1526,
|
|
617
|
+
CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC = 1527,
|
|
618
|
+
|
|
619
|
+
CUDNN_ATTR_REDUCTION_OPERATOR = 1600,
|
|
620
|
+
CUDNN_ATTR_REDUCTION_COMP_TYPE = 1601,
|
|
621
|
+
CUDNN_ATTR_REDUCTION_IS_DETERMINISTIC = 1602,
|
|
622
|
+
|
|
623
|
+
CUDNN_ATTR_OPERATION_REDUCTION_XDESC = 1610,
|
|
624
|
+
CUDNN_ATTR_OPERATION_REDUCTION_YDESC = 1611,
|
|
625
|
+
CUDNN_ATTR_OPERATION_REDUCTION_DESC = 1612,
|
|
626
|
+
|
|
627
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC = 1620,
|
|
628
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC = 1621,
|
|
629
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC = 1622,
|
|
630
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC = 1623,
|
|
631
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC = 1624,
|
|
632
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC = 1625,
|
|
633
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC = 1626,
|
|
634
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC = 1627,
|
|
635
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC = 1628,
|
|
636
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC = 1629,
|
|
637
|
+
CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS = 1630,
|
|
638
|
+
|
|
639
|
+
CUDNN_ATTR_RESAMPLE_MODE = 1700,
|
|
640
|
+
CUDNN_ATTR_RESAMPLE_COMP_TYPE = 1701,
|
|
641
|
+
CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS = 1702,
|
|
642
|
+
CUDNN_ATTR_RESAMPLE_POST_PADDINGS = 1703,
|
|
643
|
+
CUDNN_ATTR_RESAMPLE_PRE_PADDINGS = 1704,
|
|
644
|
+
CUDNN_ATTR_RESAMPLE_STRIDES = 1705,
|
|
645
|
+
CUDNN_ATTR_RESAMPLE_WINDOW_DIMS = 1706,
|
|
646
|
+
CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION = 1707,
|
|
647
|
+
CUDNN_ATTR_RESAMPLE_PADDING_MODE = 1708,
|
|
648
|
+
|
|
649
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC = 1710,
|
|
650
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC = 1711,
|
|
651
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC = 1712,
|
|
652
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA CUDNN_DEPRECATED_ENUM = 1713,
|
|
653
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA CUDNN_DEPRECATED_ENUM = 1714,
|
|
654
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC = 1716,
|
|
655
|
+
|
|
656
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC = 1720,
|
|
657
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC = 1721,
|
|
658
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC = 1722,
|
|
659
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA CUDNN_DEPRECATED_ENUM = 1723,
|
|
660
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA CUDNN_DEPRECATED_ENUM = 1724,
|
|
661
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC = 1725,
|
|
662
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC = 1726,
|
|
663
|
+
CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC = 1727,
|
|
664
|
+
|
|
665
|
+
CUDNN_ATTR_OPERATION_CONCAT_AXIS = 1800,
|
|
666
|
+
CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS = 1801,
|
|
667
|
+
CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX = 1802,
|
|
668
|
+
CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC = 1803,
|
|
669
|
+
|
|
670
|
+
CUDNN_ATTR_OPERATION_SIGNAL_MODE = 1900,
|
|
671
|
+
CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC = 1901,
|
|
672
|
+
CUDNN_ATTR_OPERATION_SIGNAL_VALUE = 1902,
|
|
673
|
+
CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903,
|
|
674
|
+
CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904,
|
|
675
|
+
|
|
676
|
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC = 1950,
|
|
677
|
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC = 1951,
|
|
678
|
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC = 1952,
|
|
679
|
+
CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953,
|
|
680
|
+
|
|
681
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000,
|
|
682
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001,
|
|
683
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002,
|
|
684
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC = 2003,
|
|
685
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC = 2004,
|
|
686
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC = 2005,
|
|
687
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC = 2006,
|
|
688
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC = 2007,
|
|
689
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC = 2008,
|
|
690
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC = 2009,
|
|
691
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC = 2010,
|
|
692
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC = 2011,
|
|
693
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC = 2012,
|
|
694
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_YDESC = 2013,
|
|
695
|
+
CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS = 2014,
|
|
696
|
+
|
|
697
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_MODE = 2100,
|
|
698
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_XDESC = 2101,
|
|
699
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC = 2102,
|
|
700
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC = 2103,
|
|
701
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC = 2104,
|
|
702
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC = 2105,
|
|
703
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC = 2106,
|
|
704
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC = 2107,
|
|
705
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC = 2108,
|
|
706
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC = 2109,
|
|
707
|
+
CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS = 2110,
|
|
708
|
+
|
|
709
|
+
CUDNN_ATTR_OPERATION_RESHAPE_XDESC = 2200,
|
|
710
|
+
CUDNN_ATTR_OPERATION_RESHAPE_YDESC = 2201,
|
|
711
|
+
|
|
712
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_XDESC = 2250,
|
|
713
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_YDESC = 2251,
|
|
714
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_LOWER_BANDWIDTH = 2252,
|
|
715
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_UPPER_BANDWIDTH = 2253,
|
|
716
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_AXIS = 2254,
|
|
717
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_PAD_VALUE = 2255,
|
|
718
|
+
CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_KV_TOKEN_OFFSET_DESC = 2256,
|
|
719
|
+
|
|
720
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_XDESC = 2270,
|
|
721
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_YDESC = 2271,
|
|
722
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_LOWER_BANDWIDTH = 2272,
|
|
723
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_UPPER_BANDWIDTH = 2273,
|
|
724
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_AXIS = 2274,
|
|
725
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_PAD_VALUE = 2275,
|
|
726
|
+
CUDNN_ATTR_OPERATION_CONTRACT_BAND_MAX_TOKEN_VALUE = 2276,
|
|
727
|
+
|
|
728
|
+
CUDNN_ATTR_RNG_DISTRIBUTION = 2300,
|
|
729
|
+
CUDNN_ATTR_RNG_NORMAL_DIST_MEAN = 2301,
|
|
730
|
+
CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION = 2302,
|
|
731
|
+
CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM = 2303,
|
|
732
|
+
CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM = 2304,
|
|
733
|
+
CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY = 2305,
|
|
734
|
+
|
|
735
|
+
CUDNN_ATTR_OPERATION_RNG_YDESC = 2310,
|
|
736
|
+
CUDNN_ATTR_OPERATION_RNG_SEED = 2311,
|
|
737
|
+
CUDNN_ATTR_OPERATION_RNG_DESC = 2312,
|
|
738
|
+
CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
|
|
739
|
+
|
|
740
|
+
CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400,
|
|
741
|
+
CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401,
|
|
742
|
+
CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION = 2402,
|
|
743
|
+
|
|
744
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_XDESC = 2500,
|
|
745
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_YDESC = 2501,
|
|
746
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_SCALE_DESC = 2502,
|
|
747
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_MATH_PREC = 2503,
|
|
748
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_BLOCK_SIZE = 2504,
|
|
749
|
+
|
|
750
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_XDESC = 2600,
|
|
751
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_SCALE_DESC = 2601,
|
|
752
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_YDESC = 2602,
|
|
753
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_MATH_PREC = 2603,
|
|
754
|
+
CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_BLOCK_SIZE = 2604,
|
|
755
|
+
|
|
756
|
+
CUDNN_ATTR_DEVICEPROP_DEVICE_ID = 2700,
|
|
757
|
+
CUDNN_ATTR_DEVICEPROP_HANDLE = 2701,
|
|
758
|
+
CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION = 2702,
|
|
759
|
+
} cudnnBackendAttributeName_t;
|
|
760
|
+
|
|
761
|
+
typedef enum {
|
|
762
|
+
CUDNN_TYPE_HANDLE = 0,
|
|
763
|
+
CUDNN_TYPE_DATA_TYPE = 1,
|
|
764
|
+
CUDNN_TYPE_BOOLEAN = 2,
|
|
765
|
+
CUDNN_TYPE_INT64 = 3,
|
|
766
|
+
CUDNN_TYPE_FLOAT = 4,
|
|
767
|
+
CUDNN_TYPE_DOUBLE = 5,
|
|
768
|
+
CUDNN_TYPE_VOID_PTR = 6,
|
|
769
|
+
CUDNN_TYPE_CONVOLUTION_MODE = 7,
|
|
770
|
+
CUDNN_TYPE_HEUR_MODE = 8,
|
|
771
|
+
CUDNN_TYPE_KNOB_TYPE = 9,
|
|
772
|
+
CUDNN_TYPE_NAN_PROPOGATION CUDNN_DEPRECATED_ENUM = 10,
|
|
773
|
+
CUDNN_TYPE_NUMERICAL_NOTE = 11,
|
|
774
|
+
CUDNN_TYPE_LAYOUT_TYPE = 12,
|
|
775
|
+
CUDNN_TYPE_ATTRIB_NAME = 13,
|
|
776
|
+
CUDNN_TYPE_POINTWISE_MODE = 14,
|
|
777
|
+
CUDNN_TYPE_BACKEND_DESCRIPTOR = 15,
|
|
778
|
+
CUDNN_TYPE_GENSTATS_MODE = 16,
|
|
779
|
+
CUDNN_TYPE_BN_FINALIZE_STATS_MODE = 17,
|
|
780
|
+
CUDNN_TYPE_REDUCTION_OPERATOR_TYPE = 18,
|
|
781
|
+
CUDNN_TYPE_BEHAVIOR_NOTE = 19,
|
|
782
|
+
CUDNN_TYPE_TENSOR_REORDERING_MODE = 20,
|
|
783
|
+
CUDNN_TYPE_RESAMPLE_MODE = 21,
|
|
784
|
+
CUDNN_TYPE_PADDING_MODE = 22,
|
|
785
|
+
CUDNN_TYPE_INT32 = 23,
|
|
786
|
+
CUDNN_TYPE_CHAR = 24,
|
|
787
|
+
CUDNN_TYPE_SIGNAL_MODE = 25,
|
|
788
|
+
CUDNN_TYPE_FRACTION = 26,
|
|
789
|
+
CUDNN_TYPE_NORM_MODE = 27,
|
|
790
|
+
CUDNN_TYPE_NORM_FWD_PHASE = 28,
|
|
791
|
+
CUDNN_TYPE_RNG_DISTRIBUTION = 29,
|
|
792
|
+
} cudnnBackendAttributeType_t;
|
|
793
|
+
|
|
794
|
+
typedef enum {
|
|
795
|
+
CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
|
|
796
|
+
CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR = 1,
|
|
797
|
+
CUDNN_BACKEND_ENGINE_DESCRIPTOR = 2,
|
|
798
|
+
CUDNN_BACKEND_ENGINECFG_DESCRIPTOR = 3,
|
|
799
|
+
CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR = 4,
|
|
800
|
+
CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR = 5,
|
|
801
|
+
CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR = 6,
|
|
802
|
+
CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR = 7,
|
|
803
|
+
CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR = 8,
|
|
804
|
+
CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR = 9,
|
|
805
|
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR = 10,
|
|
806
|
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR = 11,
|
|
807
|
+
CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR = 12,
|
|
808
|
+
CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR = 13,
|
|
809
|
+
CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR = 14,
|
|
810
|
+
CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR = 15,
|
|
811
|
+
CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR = 16,
|
|
812
|
+
CUDNN_BACKEND_TENSOR_DESCRIPTOR = 17,
|
|
813
|
+
CUDNN_BACKEND_MATMUL_DESCRIPTOR = 18,
|
|
814
|
+
CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR = 19,
|
|
815
|
+
CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR = 20,
|
|
816
|
+
CUDNN_BACKEND_REDUCTION_DESCRIPTOR = 21,
|
|
817
|
+
CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR = 22,
|
|
818
|
+
CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR = 23,
|
|
819
|
+
CUDNN_BACKEND_RESAMPLE_DESCRIPTOR = 24,
|
|
820
|
+
CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR = 25,
|
|
821
|
+
CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR = 26,
|
|
822
|
+
CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR = 27,
|
|
823
|
+
CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR = 28,
|
|
824
|
+
CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR = 29,
|
|
825
|
+
CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30,
|
|
826
|
+
CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31,
|
|
827
|
+
CUDNN_BACKEND_RNG_DESCRIPTOR = 32,
|
|
828
|
+
CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33,
|
|
829
|
+
CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34,
|
|
830
|
+
CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35,
|
|
831
|
+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_QUANTIZE_DESCRIPTOR = 36,
|
|
832
|
+
CUDNN_BACKEND_OPERATION_BLOCK_SCALE_DEQUANTIZE_DESCRIPTOR = 37,
|
|
833
|
+
CUDNN_BACKEND_DEVICEPROP_DESCRIPTOR = 38,
|
|
834
|
+
CUDNN_BACKEND_OPERATION_EXPAND_BAND_MATRIX_DESCRIPTOR = 39,
|
|
835
|
+
CUDNN_BACKEND_OPERATION_CONTRACT_BAND_MATRIX_DESCRIPTOR = 40,
|
|
836
|
+
} cudnnBackendDescriptorType_t;
|
|
837
|
+
|
|
838
|
+
typedef enum {
|
|
839
|
+
CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
|
|
840
|
+
CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS = 1,
|
|
841
|
+
CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION = 2,
|
|
842
|
+
CUDNN_NUMERICAL_NOTE_FFT = 3,
|
|
843
|
+
CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC = 4,
|
|
844
|
+
CUDNN_NUMERICAL_NOTE_WINOGRAD = 5,
|
|
845
|
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4 = 6,
|
|
846
|
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6 = 7,
|
|
847
|
+
CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13 = 8,
|
|
848
|
+
CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP = 9,
|
|
849
|
+
CUDNN_NUMERICAL_NOTE_TYPE_COUNT = 10,
|
|
850
|
+
} cudnnBackendNumericalNote_t;
|
|
851
|
+
|
|
852
|
+
typedef enum {
|
|
853
|
+
CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0,
|
|
854
|
+
CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
|
|
855
|
+
CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2,
|
|
856
|
+
CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API = 3,
|
|
857
|
+
CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4,
|
|
858
|
+
} cudnnBackendBehaviorNote_t;
|
|
859
|
+
|
|
860
|
+
typedef enum {
|
|
861
|
+
CUDNN_KNOB_TYPE_SPLIT_K CUDNN_DEPRECATED_ENUM = 0,
|
|
862
|
+
CUDNN_KNOB_TYPE_SWIZZLE = 1,
|
|
863
|
+
CUDNN_KNOB_TYPE_TILE_SIZE = 2,
|
|
864
|
+
CUDNN_KNOB_TYPE_USE_TEX CUDNN_DEPRECATED_ENUM = 3,
|
|
865
|
+
CUDNN_KNOB_TYPE_EDGE = 4,
|
|
866
|
+
CUDNN_KNOB_TYPE_KBLOCK CUDNN_DEPRECATED_ENUM = 5,
|
|
867
|
+
CUDNN_KNOB_TYPE_LDGA CUDNN_DEPRECATED_ENUM = 6,
|
|
868
|
+
CUDNN_KNOB_TYPE_LDGB CUDNN_DEPRECATED_ENUM = 7,
|
|
869
|
+
CUDNN_KNOB_TYPE_CHUNK_K CUDNN_DEPRECATED_ENUM = 8,
|
|
870
|
+
CUDNN_KNOB_TYPE_SPLIT_H CUDNN_DEPRECATED_ENUM = 9,
|
|
871
|
+
CUDNN_KNOB_TYPE_WINO_TILE CUDNN_DEPRECATED_ENUM = 10,
|
|
872
|
+
CUDNN_KNOB_TYPE_MULTIPLY = 11,
|
|
873
|
+
CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
|
|
874
|
+
CUDNN_KNOB_TYPE_TILEK = 13,
|
|
875
|
+
CUDNN_KNOB_TYPE_STAGES = 14,
|
|
876
|
+
CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
|
|
877
|
+
CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE CUDNN_DEPRECATED_ENUM = 16,
|
|
878
|
+
CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
|
|
879
|
+
CUDNN_KNOB_TYPE_IDX_MODE = 18,
|
|
880
|
+
CUDNN_KNOB_TYPE_SLICED CUDNN_DEPRECATED_ENUM = 19,
|
|
881
|
+
CUDNN_KNOB_TYPE_SPLIT_RS CUDNN_DEPRECATED_ENUM = 20,
|
|
882
|
+
CUDNN_KNOB_TYPE_SINGLEBUFFER CUDNN_DEPRECATED_ENUM = 21,
|
|
883
|
+
CUDNN_KNOB_TYPE_LDGC CUDNN_DEPRECATED_ENUM = 22,
|
|
884
|
+
CUDNN_KNOB_TYPE_SPECFILT = 23,
|
|
885
|
+
CUDNN_KNOB_TYPE_KERNEL_CFG = 24,
|
|
886
|
+
CUDNN_KNOB_TYPE_WORKSPACE = 25,
|
|
887
|
+
CUDNN_KNOB_TYPE_TILE_CGA CUDNN_DEPRECATED_ENUM = 26,
|
|
888
|
+
CUDNN_KNOB_TYPE_TILE_CGA_M = 27,
|
|
889
|
+
CUDNN_KNOB_TYPE_TILE_CGA_N = 28,
|
|
890
|
+
CUDNN_KNOB_TYPE_BLOCK_SIZE = 29,
|
|
891
|
+
CUDNN_KNOB_TYPE_OCCUPANCY = 30,
|
|
892
|
+
CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD = 31,
|
|
893
|
+
CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK CUDNN_DEPRECATED_ENUM = 32,
|
|
894
|
+
CUDNN_KNOB_TYPE_SPLIT_COLS = 33,
|
|
895
|
+
CUDNN_KNOB_TYPE_TILE_ROWS = 34,
|
|
896
|
+
CUDNN_KNOB_TYPE_TILE_COLS = 35,
|
|
897
|
+
CUDNN_KNOB_TYPE_LOAD_SIZE = 36,
|
|
898
|
+
CUDNN_KNOB_TYPE_CTA_COUNT = 37,
|
|
899
|
+
CUDNN_KNOB_TYPE_STREAM_K = 38,
|
|
900
|
+
CUDNN_KNOB_TYPE_SPLIT_P_SLC = 39,
|
|
901
|
+
CUDNN_KNOB_TYPE_TILE_M = 40,
|
|
902
|
+
CUDNN_KNOB_TYPE_TILE_N = 41,
|
|
903
|
+
CUDNN_KNOB_TYPE_WARP_SPEC_CFG = 42,
|
|
904
|
+
CUDNN_KNOB_TYPE_COUNTS = 43,
|
|
905
|
+
} cudnnBackendKnobType_t;
|
|
906
|
+
|
|
907
|
+
typedef enum {
|
|
908
|
+
CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
|
|
909
|
+
CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
|
|
910
|
+
CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
|
|
911
|
+
CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
|
|
912
|
+
CUDNN_LAYOUT_TYPE_COUNT = 4,
|
|
913
|
+
} cudnnBackendLayoutType_t;
|
|
914
|
+
|
|
915
|
+
typedef enum {
|
|
916
|
+
CUDNN_HEUR_MODE_INSTANT = 0,
|
|
917
|
+
CUDNN_HEUR_MODE_B = 1,
|
|
918
|
+
CUDNN_HEUR_MODE_FALLBACK = 2,
|
|
919
|
+
CUDNN_HEUR_MODE_A = 3,
|
|
920
|
+
CUDNN_HEUR_MODES_COUNT = 4,
|
|
921
|
+
} cudnnBackendHeurMode_t;
|
|
922
|
+
|
|
923
|
+
typedef enum {
|
|
924
|
+
CUDNN_TENSOR_REORDERING_NONE = 0,
|
|
925
|
+
CUDNN_TENSOR_REORDERING_INT8x32 = 1,
|
|
926
|
+
CUDNN_TENSOR_REORDERING_F16x16 = 2,
|
|
927
|
+
CUDNN_TENSOR_REORDERING_F8_128x4 = 3,
|
|
928
|
+
} cudnnBackendTensorReordering_t;
|
|
929
|
+
|
|
930
|
+
typedef enum {
|
|
931
|
+
CUDNN_ZERO_PAD = 0,
|
|
932
|
+
CUDNN_NEG_INF_PAD = 1,
|
|
933
|
+
CUDNN_EDGE_VAL_PAD = 2,
|
|
934
|
+
} cudnnPaddingMode_t;
|
|
935
|
+
|
|
936
|
+
typedef enum {
|
|
937
|
+
CUDNN_LAYER_NORM = 0,
|
|
938
|
+
CUDNN_INSTANCE_NORM = 1,
|
|
939
|
+
CUDNN_BATCH_NORM = 2,
|
|
940
|
+
CUDNN_GROUP_NORM = 3,
|
|
941
|
+
CUDNN_RMS_NORM = 4,
|
|
942
|
+
CUDNN_ADA_LAYER_NORM = 5,
|
|
943
|
+
} cudnnBackendNormMode_t;
|
|
944
|
+
|
|
945
|
+
typedef enum {
|
|
946
|
+
CUDNN_NORM_FWD_INFERENCE = 0,
|
|
947
|
+
CUDNN_NORM_FWD_TRAINING = 1,
|
|
948
|
+
} cudnnBackendNormFwdPhase_t;
|
|
949
|
+
|
|
950
|
+
cudnnStatus_t CUDNNWINAPI
|
|
951
|
+
cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t *descriptor);
|
|
952
|
+
|
|
953
|
+
cudnnStatus_t CUDNNWINAPI
|
|
954
|
+
cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
|
|
955
|
+
|
|
956
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
957
|
+
cudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
|
|
958
|
+
|
|
959
|
+
cudnnStatus_t CUDNNWINAPI
|
|
960
|
+
cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
|
|
961
|
+
|
|
962
|
+
cudnnStatus_t CUDNNWINAPI
|
|
963
|
+
cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor,
|
|
964
|
+
cudnnBackendAttributeName_t attributeName,
|
|
965
|
+
cudnnBackendAttributeType_t attributeType,
|
|
966
|
+
int64_t elementCount,
|
|
967
|
+
const void *arrayOfElements);
|
|
968
|
+
|
|
969
|
+
cudnnStatus_t CUDNNWINAPI
|
|
970
|
+
cudnnBackendGetAttribute(cudnnBackendDescriptor_t const descriptor,
|
|
971
|
+
cudnnBackendAttributeName_t attributeName,
|
|
972
|
+
cudnnBackendAttributeType_t attributeType,
|
|
973
|
+
int64_t requestedElementCount,
|
|
974
|
+
int64_t *elementCount,
|
|
975
|
+
void *arrayOfElements);
|
|
976
|
+
|
|
977
|
+
cudnnStatus_t CUDNNWINAPI
|
|
978
|
+
cudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
|
|
979
|
+
|
|
980
|
+
cudnnStatus_t CUDNNWINAPI
|
|
981
|
+
cudnnBackendPopulateCudaGraph(cudnnHandle_t handle,
|
|
982
|
+
cudnnBackendDescriptor_t executionPlan,
|
|
983
|
+
cudnnBackendDescriptor_t variantPack,
|
|
984
|
+
cudaGraph_t graph);
|
|
985
|
+
|
|
986
|
+
cudnnStatus_t CUDNNWINAPI
|
|
987
|
+
cudnnBackendUpdateCudaGraph(cudnnHandle_t handle,
|
|
988
|
+
cudnnBackendDescriptor_t executionPlan,
|
|
989
|
+
cudnnBackendDescriptor_t variantPack,
|
|
990
|
+
cudaGraph_t graph);
|
|
991
|
+
|
|
992
|
+
#if defined(__cplusplus)
|
|
993
|
+
}
|
|
994
|
+
#endif
|
|
995
|
+
|
|
996
|
+
#endif /* CUDNN_GRAPH_H_ */
|