nvidia-cudnn-cu13 9.12.0.46__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,996 @@
1
+ /*
2
+ * Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /*
51
+ * cudnn_graph : cuDNN's basic definitions operations.
52
+ */
53
+
54
+ #if !defined(CUDNN_GRAPH_H_)
55
+ #define CUDNN_GRAPH_H_
56
+
57
+ #include <cuda_runtime_api.h>
58
+ #include <library_types.h>
59
+
60
+ #include <stdint.h>
61
+
62
+ #include "cudnn_version.h"
63
+
64
+ /* These version numbers are autogenerated, do not edit manually. */
65
+ #define CUDNN_GRAPH_MAJOR 9
66
+ #define CUDNN_GRAPH_MINOR 12
67
+ #define CUDNN_GRAPH_PATCH 0
68
+
69
+ #if (CUDNN_GRAPH_MAJOR != CUDNN_MAJOR) || (CUDNN_GRAPH_MINOR != CUDNN_MINOR) || (CUDNN_GRAPH_PATCH != CUDNN_PATCHLEVEL)
70
+ #error Version mismatch in cuDNN GRAPH!!!
71
+ #endif
72
+
73
+ #ifndef CUDNNWINAPI
74
+ #ifdef _WIN32
75
+ #define CUDNNWINAPI __stdcall
76
+ #else
77
+ #define CUDNNWINAPI
78
+ #endif
79
+ #endif
80
+
81
+ /* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
82
+ #if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
83
+ /* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
84
+ #define CUDNN_DEPRECATED __attribute__((deprecated))
85
+ #define CUDNN_DEPRECATED_ENUM __attribute__((deprecated))
86
+ #elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
87
+ /* Microsoft Visual C++ */
88
+ #define CUDNN_DEPRECATED __declspec(deprecated)
89
+ #define CUDNN_DEPRECATED_ENUM __declspec(deprecated)
90
+ #elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
91
+ /* C++14 compilers */
92
+ #define CUDNN_DEPRECATED [[deprecated]]
93
+ #define CUDNN_DEPRECATED_ENUM [[deprecated]]
94
+ #else
95
+ /* No support for the deprecated attribute */
96
+ #define CUDNN_DEPRECATED
97
+ #define CUDNN_DEPRECATED_ENUM
98
+ #endif
99
+
100
+ #if defined(__cplusplus)
101
+ extern "C" {
102
+ #endif
103
+
104
+ struct cudnnContext;
105
+ typedef struct cudnnContext *cudnnHandle_t;
106
+
107
+ size_t CUDNNWINAPI
108
+ cudnnGetVersion(void);
109
+
110
+ size_t CUDNNWINAPI
111
+ cudnnGetMaxDeviceVersion(void);
112
+
113
+ /* Returns CUDA Runtime version statically linked against cudnn */
114
+ size_t CUDNNWINAPI
115
+ cudnnGetCudartVersion(void);
116
+
117
+ /*
118
+ * CUDNN return codes
119
+ */
120
+ typedef enum {
121
+ CUDNN_STATUS_SUCCESS = 0,
122
+
123
+ /* Uncategorized errors */
124
+ CUDNN_STATUS_NOT_INITIALIZED = 1001,
125
+ CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH = 1002,
126
+ CUDNN_STATUS_SERIALIZATION_VERSION_MISMATCH = 1003,
127
+ CUDNN_STATUS_DEPRECATED = 1004,
128
+ CUDNN_STATUS_LICENSE_ERROR = 1005,
129
+ CUDNN_STATUS_RUNTIME_IN_PROGRESS = 1006,
130
+ CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 1007,
131
+ CUDNN_STATUS_SUBLIBRARY_LOADING_FAILED = 1008,
132
+
133
+ CUDNN_STATUS_BAD_PARAM = 2000,
134
+ CUDNN_STATUS_BAD_PARAM_NULL_POINTER = 2002,
135
+ CUDNN_STATUS_BAD_PARAM_MISALIGNED_POINTER = 2003,
136
+ CUDNN_STATUS_BAD_PARAM_NOT_FINALIZED = 2004,
137
+ CUDNN_STATUS_BAD_PARAM_OUT_OF_BOUND = 2005,
138
+ CUDNN_STATUS_BAD_PARAM_SIZE_INSUFFICIENT = 2006,
139
+ CUDNN_STATUS_BAD_PARAM_STREAM_MISMATCH = 2007,
140
+ CUDNN_STATUS_BAD_PARAM_SHAPE_MISMATCH = 2008,
141
+ CUDNN_STATUS_BAD_PARAM_DUPLICATED_ENTRIES = 2009,
142
+ CUDNN_STATUS_BAD_PARAM_ATTRIBUTE_TYPE = 2010,
143
+ CUDNN_STATUS_BAD_PARAM_CUDA_GRAPH_MISMATCH = 2011,
144
+ CUDNN_STATUS_BAD_PARAM_DESCRIPTOR_TYPE = 2012,
145
+
146
+ CUDNN_STATUS_NOT_SUPPORTED = 3000,
147
+ CUDNN_STATUS_NOT_SUPPORTED_GRAPH_PATTERN = 3001,
148
+ CUDNN_STATUS_NOT_SUPPORTED_SHAPE = 3002,
149
+ CUDNN_STATUS_NOT_SUPPORTED_DATA_TYPE = 3003,
150
+ CUDNN_STATUS_NOT_SUPPORTED_LAYOUT = 3004,
151
+ CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDA_DRIVER = 3005,
152
+ CUDNN_STATUS_NOT_SUPPORTED_INCOMPATIBLE_CUDART = 3006,
153
+ CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH = 3007,
154
+ CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING = 3008,
155
+ CUDNN_STATUS_NOT_SUPPORTED_SUBLIBRARY_UNAVAILABLE = 3009,
156
+ CUDNN_STATUS_NOT_SUPPORTED_SHARED_MEMORY_INSUFFICIENT = 3010,
157
+ CUDNN_STATUS_NOT_SUPPORTED_PADDING = 3011,
158
+ CUDNN_STATUS_NOT_SUPPORTED_BAD_LAUNCH_PARAM = 3012,
159
+ CUDNN_STATUS_NOT_SUPPORTED_CUDA_GRAPH_NATIVE_API = 3013,
160
+
161
+ CUDNN_STATUS_INTERNAL_ERROR = 4000,
162
+ CUDNN_STATUS_INTERNAL_ERROR_COMPILATION_FAILED = 4001,
163
+ CUDNN_STATUS_INTERNAL_ERROR_UNEXPECTED_VALUE = 4002,
164
+ CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED = 4003,
165
+ CUDNN_STATUS_INTERNAL_ERROR_DEVICE_ALLOCATION_FAILED = 4004,
166
+ CUDNN_STATUS_INTERNAL_ERROR_BAD_LAUNCH_PARAM = 4005,
167
+ CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED = 4006,
168
+
169
+ CUDNN_STATUS_EXECUTION_FAILED = 5000,
170
+ CUDNN_STATUS_EXECUTION_FAILED_CUDA_DRIVER = 5001,
171
+ CUDNN_STATUS_EXECUTION_FAILED_CUBLAS = 5002,
172
+ CUDNN_STATUS_EXECUTION_FAILED_CUDART = 5003,
173
+ CUDNN_STATUS_EXECUTION_FAILED_CURAND = 5004,
174
+
175
+ CUDNN_STATUS_ALLOC_FAILED CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_HOST_ALLOCATION_FAILED,
176
+ CUDNN_STATUS_INVALID_VALUE CUDNN_DEPRECATED_ENUM = 2001 /* please transition to CUDNN_STATUS_BAD_PARAM instead */,
177
+ CUDNN_STATUS_ARCH_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_NOT_SUPPORTED_ARCH_MISMATCH,
178
+ CUDNN_STATUS_MAPPING_ERROR CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_INTERNAL_ERROR_TEXTURE_CREATION_FAILED,
179
+ CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING CUDNN_DEPRECATED_ENUM =
180
+ CUDNN_STATUS_NOT_SUPPORTED_RUNTIME_PREREQUISITE_MISSING,
181
+ CUDNN_STATUS_VERSION_MISMATCH CUDNN_DEPRECATED_ENUM = CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH,
182
+ } cudnnStatus_t;
183
+
184
+ #define CUDNN_STATUS_FULL_ERROR_CODE(category, specific_err) ((cudnnStatus_t)(0 + (category) + (specific_err)))
185
+ #define CUDNN_STATUS_CATEGORY(full_error_code) ((full_error_code) / 1000 * 1000)
186
+ #define CUDNN_STATUS_SPECIFIC_ERROR(full_error_code) ((full_error_code) % 1000)
187
+
188
+ /* human-readable error messages */
189
+ const char *CUDNNWINAPI
190
+ cudnnGetErrorString(cudnnStatus_t status);
191
+
192
+ void CUDNNWINAPI
193
+ cudnnGetLastErrorString(char *message, size_t max_size);
194
+
195
+ /* Forward definition in this version only */
196
+ typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t CUDNN_DEPRECATED;
197
+
198
+ typedef enum {
199
+ CUDNN_ERRQUERY_RAWCODE = 0,
200
+ CUDNN_ERRQUERY_NONBLOCKING = 1,
201
+ CUDNN_ERRQUERY_BLOCKING = 2,
202
+ } cudnnErrQueryMode_t;
203
+
204
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
205
+ cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
206
+
207
+ cudnnStatus_t CUDNNWINAPI
208
+ cudnnGetProperty(libraryPropertyType type, int *value);
209
+
210
+ cudnnStatus_t CUDNNWINAPI
211
+ cudnnCreate(cudnnHandle_t *handle);
212
+ cudnnStatus_t CUDNNWINAPI
213
+ cudnnDestroy(cudnnHandle_t handle);
214
+ cudnnStatus_t CUDNNWINAPI
215
+ cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
216
+ cudnnStatus_t CUDNNWINAPI
217
+ cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
218
+ /*
219
+ * CUDNN data type
220
+ */
221
+ typedef enum {
222
+ CUDNN_DATA_FLOAT = 0,
223
+ CUDNN_DATA_DOUBLE = 1,
224
+ CUDNN_DATA_HALF = 2,
225
+ CUDNN_DATA_INT8 = 3,
226
+ CUDNN_DATA_INT32 = 4,
227
+ CUDNN_DATA_INT8x4 CUDNN_DEPRECATED_ENUM = 5,
228
+ CUDNN_DATA_UINT8 = 6,
229
+ CUDNN_DATA_UINT8x4 CUDNN_DEPRECATED_ENUM = 7,
230
+ CUDNN_DATA_INT8x32 CUDNN_DEPRECATED_ENUM = 8,
231
+ CUDNN_DATA_BFLOAT16 = 9,
232
+ CUDNN_DATA_INT64 = 10,
233
+ CUDNN_DATA_BOOLEAN = 11,
234
+ CUDNN_DATA_FP8_E4M3 = 12,
235
+ CUDNN_DATA_FP8_E5M2 = 13,
236
+ CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14,
237
+ CUDNN_DATA_FP8_E8M0 = 15,
238
+ CUDNN_DATA_FP4_E2M1 = 16,
239
+ CUDNN_DATA_INT4 = 17,
240
+ CUDNN_DATA_UINT4 = 18,
241
+ CUDNN_DATA_UINT32 = 19,
242
+ } cudnnDataType_t;
243
+
244
+ /*
245
+ * CUDNN math type
246
+ */
247
+ typedef enum {
248
+ CUDNN_DEFAULT_MATH = 0,
249
+ CUDNN_TENSOR_OP_MATH = 1,
250
+ CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
251
+ CUDNN_FMA_MATH = 3,
252
+ } cudnnMathType_t;
253
+
254
+ /*
255
+ * CUDNN propagate Nan
256
+ */
257
+ typedef enum {
258
+ CUDNN_NOT_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 0,
259
+ CUDNN_PROPAGATE_NAN CUDNN_DEPRECATED_ENUM = 1,
260
+ } cudnnNanPropagation_t;
261
+
262
+ /*
263
+ * Behavior for OOB samples. OOB samples are samples where L+R > T is encountered during the gradient calculation. If
264
+ * gradMode is set to CUDNN_CTC_SKIP_OOB_GRADIENTS, then the CTC loss function does not write to the gradient buffer for
265
+ * that sample. Instead, the current values, even not finite, are retained. If gradMode is set to
266
+ * CUDNN_CTC_ZERO_OOB_GRADIENTS, then the gradient for that sample is set to zero. This guarantees a finite gradient.
267
+ */
268
+ typedef enum {
269
+ CUDNN_CTC_ZERO_OOB_GRADIENTS = 0,
270
+ CUDNN_CTC_SKIP_OOB_GRADIENTS = 1,
271
+ } cudnnCTCGradMode_t;
272
+
273
+ typedef enum {
274
+ CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
275
+ CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
276
+ CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
277
+ } cudnnTensorFormat_t;
278
+
279
+ /*
280
+ * CUDNN ReduceTensor op type
281
+ */
282
+ typedef enum {
283
+ CUDNN_REDUCE_TENSOR_ADD = 0,
284
+ CUDNN_REDUCE_TENSOR_MUL = 1,
285
+ CUDNN_REDUCE_TENSOR_MIN = 2,
286
+ CUDNN_REDUCE_TENSOR_MAX = 3,
287
+ CUDNN_REDUCE_TENSOR_AMAX = 4,
288
+ CUDNN_REDUCE_TENSOR_AVG = 5,
289
+ CUDNN_REDUCE_TENSOR_NORM1 = 6,
290
+ CUDNN_REDUCE_TENSOR_NORM2 = 7,
291
+ CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
292
+ } cudnnReduceTensorOp_t;
293
+
294
+ /*
295
+ * activation mode
296
+ */
297
+ typedef enum {
298
+ CUDNN_ACTIVATION_SIGMOID = 0,
299
+ CUDNN_ACTIVATION_RELU = 1,
300
+ CUDNN_ACTIVATION_TANH = 2,
301
+ CUDNN_ACTIVATION_CLIPPED_RELU = 3,
302
+ CUDNN_ACTIVATION_ELU = 4,
303
+ CUDNN_ACTIVATION_IDENTITY = 5,
304
+ CUDNN_ACTIVATION_SWISH = 6
305
+ } cudnnActivationMode_t CUDNN_DEPRECATED;
306
+
307
+ typedef enum {
308
+ CUDNN_SEV_FATAL = 0,
309
+ CUDNN_SEV_ERROR = 1,
310
+ CUDNN_SEV_WARNING = 2,
311
+ CUDNN_SEV_INFO = 3,
312
+ } cudnnSeverity_t;
313
+
314
+ /* Message masks to be used with cudnnSetCallback() */
315
+ #define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
316
+ #define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
317
+ #define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
318
+
319
+ /* struct containing useful informaiton for each API call */
320
+ typedef struct cudnnDebugStruct {
321
+ unsigned cudnn_version;
322
+ cudnnStatus_t cudnnStatus;
323
+ unsigned time_sec; /* epoch time in seconds */
324
+ unsigned time_usec; /* microseconds part of epoch time */
325
+ unsigned time_delta; /* time since start in seconds */
326
+ cudnnHandle_t handle; /* cudnn handle */
327
+ cudaStream_t stream; /* cuda stream ID */
328
+ unsigned long long pid; /* process ID */
329
+ unsigned long long tid; /* thread ID */
330
+ int cudaDeviceId; /* CUDA device ID */
331
+ int reserved[15]; /* reserved for future use */
332
+ } cudnnDebug_t;
333
+
334
+ typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
335
+
336
+ cudnnStatus_t CUDNNWINAPI
337
+ cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
338
+
339
+ cudnnStatus_t CUDNNWINAPI
340
+ cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
341
+
342
+ /*
343
+ * \brief Cross-library version checker.
344
+ * This function is implemented differently in each sub-library. Each sublib
345
+ * checks whether its own version matches that of its dependencies.
346
+ * \returns CUDNN_STATUS_SUCCESS if the version check passes,
347
+ * CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
348
+ */
349
+ cudnnStatus_t CUDNNWINAPI
350
+ cudnnGraphVersionCheck(void);
351
+
352
+ /* Maximum supported number of tensor dimensions */
353
+ #define CUDNN_DIM_MAX 8
354
+
355
+ /*
356
+ * convolution mode
357
+ */
358
+ typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t;
359
+
360
+ /*
361
+ * CUDNN Reorder
362
+ */
363
+ typedef enum {
364
+ CUDNN_DEFAULT_REORDER = 0,
365
+ CUDNN_NO_REORDER = 1,
366
+ } cudnnReorderType_t CUDNN_DEPRECATED;
367
+
368
+ typedef void *cudnnBackendDescriptor_t;
369
+
370
+ typedef struct cudnnFractionStruct {
371
+ int64_t numerator;
372
+ int64_t denominator;
373
+ } cudnnFraction_t;
374
+
375
+ typedef enum {
376
+ CUDNN_POINTWISE_ADD = 0,
377
+ CUDNN_POINTWISE_ADD_SQUARE = 5,
378
+ CUDNN_POINTWISE_DIV = 6,
379
+ CUDNN_POINTWISE_MAX = 3,
380
+ CUDNN_POINTWISE_MIN = 2,
381
+ CUDNN_POINTWISE_MOD = 7,
382
+ CUDNN_POINTWISE_MUL = 1,
383
+ CUDNN_POINTWISE_POW = 8,
384
+ CUDNN_POINTWISE_SUB = 9,
385
+
386
+ CUDNN_POINTWISE_ABS = 10,
387
+ CUDNN_POINTWISE_CEIL = 11,
388
+ CUDNN_POINTWISE_COS = 12,
389
+ CUDNN_POINTWISE_EXP = 13,
390
+ CUDNN_POINTWISE_FLOOR = 14,
391
+ CUDNN_POINTWISE_LOG = 15,
392
+ CUDNN_POINTWISE_NEG = 16,
393
+ CUDNN_POINTWISE_RSQRT = 17,
394
+ CUDNN_POINTWISE_SIN = 18,
395
+ CUDNN_POINTWISE_SQRT = 4,
396
+ CUDNN_POINTWISE_TAN = 19,
397
+ CUDNN_POINTWISE_ERF = 20,
398
+ CUDNN_POINTWISE_IDENTITY = 21,
399
+ CUDNN_POINTWISE_RECIPROCAL = 22,
400
+ CUDNN_POINTWISE_ATAN2 = 23,
401
+
402
+ CUDNN_POINTWISE_RELU_FWD = 100,
403
+ CUDNN_POINTWISE_TANH_FWD = 101,
404
+ CUDNN_POINTWISE_SIGMOID_FWD = 102,
405
+ CUDNN_POINTWISE_ELU_FWD = 103,
406
+ CUDNN_POINTWISE_GELU_FWD = 104,
407
+ CUDNN_POINTWISE_SOFTPLUS_FWD = 105,
408
+ CUDNN_POINTWISE_SWISH_FWD = 106,
409
+ CUDNN_POINTWISE_GELU_APPROX_TANH_FWD = 107,
410
+
411
+ CUDNN_POINTWISE_RELU_BWD = 200,
412
+ CUDNN_POINTWISE_TANH_BWD = 201,
413
+ CUDNN_POINTWISE_SIGMOID_BWD = 202,
414
+ CUDNN_POINTWISE_ELU_BWD = 203,
415
+ CUDNN_POINTWISE_GELU_BWD = 204,
416
+ CUDNN_POINTWISE_SOFTPLUS_BWD = 205,
417
+ CUDNN_POINTWISE_SWISH_BWD = 206,
418
+ CUDNN_POINTWISE_GELU_APPROX_TANH_BWD = 207,
419
+
420
+ CUDNN_POINTWISE_CMP_EQ = 300,
421
+ CUDNN_POINTWISE_CMP_NEQ = 301,
422
+ CUDNN_POINTWISE_CMP_GT = 302,
423
+ CUDNN_POINTWISE_CMP_GE = 303,
424
+ CUDNN_POINTWISE_CMP_LT = 304,
425
+ CUDNN_POINTWISE_CMP_LE = 305,
426
+
427
+ CUDNN_POINTWISE_LOGICAL_AND = 400,
428
+ CUDNN_POINTWISE_LOGICAL_OR = 401,
429
+ CUDNN_POINTWISE_LOGICAL_NOT = 402,
430
+
431
+ CUDNN_POINTWISE_GEN_INDEX = 501,
432
+
433
+ CUDNN_POINTWISE_BINARY_SELECT = 601,
434
+ } cudnnPointwiseMode_t;
435
+
436
+ typedef enum {
437
+ CUDNN_RESAMPLE_NEAREST = 0,
438
+ CUDNN_RESAMPLE_BILINEAR = 1,
439
+ CUDNN_RESAMPLE_AVGPOOL = 2,
440
+ CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING = 2,
441
+ CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING = 4,
442
+ CUDNN_RESAMPLE_MAXPOOL = 3,
443
+ } cudnnResampleMode_t;
444
+
445
+ typedef enum {
446
+ CUDNN_SIGNAL_SET = 0,
447
+ CUDNN_SIGNAL_WAIT = 1,
448
+ } cudnnSignalMode_t;
449
+
450
+ typedef enum {
451
+ CUDNN_GENSTATS_SUM_SQSUM = 0,
452
+ } cudnnGenStatsMode_t;
453
+
454
+ typedef enum {
455
+ CUDNN_BN_FINALIZE_STATISTICS_TRAINING = 0,
456
+ CUDNN_BN_FINALIZE_STATISTICS_INFERENCE = 1,
457
+ } cudnnBnFinalizeStatsMode_t;
458
+
459
+ typedef enum {
460
+ CUDNN_RNG_DISTRIBUTION_BERNOULLI = 0,
461
+ CUDNN_RNG_DISTRIBUTION_UNIFORM = 1,
462
+ CUDNN_RNG_DISTRIBUTION_NORMAL = 2,
463
+ } cudnnRngDistribution_t;
464
+
465
+ typedef enum {
466
+ CUDNN_ATTR_POINTWISE_MODE = 0,
467
+ CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
468
+ CUDNN_ATTR_POINTWISE_NAN_PROPAGATION CUDNN_DEPRECATED_ENUM = 2,
469
+ CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
470
+ CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
471
+ CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE = 5,
472
+ CUDNN_ATTR_POINTWISE_ELU_ALPHA = 6,
473
+ CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA = 7,
474
+ CUDNN_ATTR_POINTWISE_SWISH_BETA = 8,
475
+ CUDNN_ATTR_POINTWISE_AXIS = 9,
476
+
477
+ CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
478
+ CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
479
+ CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
480
+ CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
481
+ CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
482
+ CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
483
+ CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
484
+
485
+ CUDNN_ATTR_ENGINEHEUR_MODE = 200,
486
+ CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
487
+ CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
488
+ CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET = 203,
489
+ CUDNN_ATTR_ENGINEHEUR_DEVICEPROP = 204,
490
+
491
+ CUDNN_ATTR_ENGINECFG_ENGINE = 300,
492
+ CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
493
+ CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
494
+ CUDNN_ATTR_ENGINECFG_WORKSPACE_SIZE = 303,
495
+ CUDNN_ATTR_ENGINECFG_SHARED_MEMORY_USED = 304,
496
+
497
+ CUDNN_ATTR_EXECUTION_PLAN_HANDLE CUDNN_DEPRECATED_ENUM = 400,
498
+ CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
499
+ CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
500
+ CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
501
+ CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
502
+ CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405,
503
+ CUDNN_ATTR_EXECUTION_PLAN_KERNEL_CACHE = 406,
504
+ CUDNN_ATTR_EXECUTION_PLAN_DEVICEPROP = 407,
505
+
506
+ CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
507
+ CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
508
+ CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
509
+ CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
510
+
511
+ CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
512
+ CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
513
+
514
+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
515
+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
516
+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
517
+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
518
+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
519
+ CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
520
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
521
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
522
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
523
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
524
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
525
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
526
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
527
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
528
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
529
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
530
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
531
+ CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
532
+
533
+ CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
534
+ CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
535
+ CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
536
+ CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
537
+ CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
538
+ CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
539
+ CUDNN_ATTR_OPERATION_POINTWISE_DXDESC = 756,
540
+ CUDNN_ATTR_OPERATION_POINTWISE_DYDESC = 757,
541
+ CUDNN_ATTR_OPERATION_POINTWISE_TDESC = 758,
542
+
543
+ CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
544
+ CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
545
+ CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
546
+ CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
547
+ CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
548
+
549
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE = 780,
550
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC = 781,
551
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC = 782,
552
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC = 783,
553
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC = 784,
554
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC = 785,
555
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC = 786,
556
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC = 787,
557
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC = 788,
558
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC = 789,
559
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC = 790,
560
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC = 791,
561
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC = 792,
562
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC = 793,
563
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC = 794,
564
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC = 795,
565
+ CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC = 796,
566
+
567
+ CUDNN_ATTR_OPERATIONGRAPH_HANDLE CUDNN_DEPRECATED_ENUM = 800,
568
+ CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
569
+ CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
570
+ CUDNN_ATTR_OPERATIONGRAPH_IS_DYNAMIC_SHAPE_ENABLED = 803,
571
+ CUDNN_ATTR_OPERATIONGRAPH_IS_SAME_TOPOLOGY = 804,
572
+
573
+ CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
574
+ CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
575
+ CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
576
+ CUDNN_ATTR_TENSOR_STRIDES = 903,
577
+ CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
578
+ CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
579
+ CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
580
+ CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
581
+ CUDNN_ATTR_TENSOR_IS_BY_VALUE = 908,
582
+ CUDNN_ATTR_TENSOR_REORDERING_MODE = 909,
583
+ CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC = 913,
584
+
585
+ CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
586
+ CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
587
+ CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
588
+ CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
589
+
590
+ CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
591
+ CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
592
+
593
+ CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
594
+ CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
595
+ CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
596
+ CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
597
+
598
+ CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
599
+ CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
600
+ CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
601
+ CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
602
+ CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
603
+ CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305,
604
+ CUDNN_ATTR_ENGINE_SM_COUNT_TARGET = 1306,
605
+ CUDNN_ATTR_ENGINE_DEVICEPROP = 1307,
606
+
607
+ CUDNN_ATTR_MATMUL_COMP_TYPE = 1500,
608
+ CUDNN_ATTR_MATMUL_PADDING_VALUE = 1503,
609
+
610
+ CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520,
611
+ CUDNN_ATTR_OPERATION_MATMUL_BDESC = 1521,
612
+ CUDNN_ATTR_OPERATION_MATMUL_CDESC = 1522,
613
+ CUDNN_ATTR_OPERATION_MATMUL_DESC = 1523,
614
+ CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT CUDNN_DEPRECATED_ENUM = 1524,
615
+ CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC = 1525,
616
+ CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC = 1526,
617
+ CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC = 1527,
618
+
619
+ CUDNN_ATTR_REDUCTION_OPERATOR = 1600,
620
+ CUDNN_ATTR_REDUCTION_COMP_TYPE = 1601,
621
+ CUDNN_ATTR_REDUCTION_IS_DETERMINISTIC = 1602,
622
+
623
+ CUDNN_ATTR_OPERATION_REDUCTION_XDESC = 1610,
624
+ CUDNN_ATTR_OPERATION_REDUCTION_YDESC = 1611,
625
+ CUDNN_ATTR_OPERATION_REDUCTION_DESC = 1612,
626
+
627
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC = 1620,
628
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC = 1621,
629
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC = 1622,
630
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC = 1623,
631
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC = 1624,
632
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC = 1625,
633
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC = 1626,
634
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC = 1627,
635
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC = 1628,
636
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC = 1629,
637
+ CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS = 1630,
638
+
639
+ CUDNN_ATTR_RESAMPLE_MODE = 1700,
640
+ CUDNN_ATTR_RESAMPLE_COMP_TYPE = 1701,
641
+ CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS = 1702,
642
+ CUDNN_ATTR_RESAMPLE_POST_PADDINGS = 1703,
643
+ CUDNN_ATTR_RESAMPLE_PRE_PADDINGS = 1704,
644
+ CUDNN_ATTR_RESAMPLE_STRIDES = 1705,
645
+ CUDNN_ATTR_RESAMPLE_WINDOW_DIMS = 1706,
646
+ CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION = 1707,
647
+ CUDNN_ATTR_RESAMPLE_PADDING_MODE = 1708,
648
+
649
+ CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC = 1710,
650
+ CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC = 1711,
651
+ CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC = 1712,
652
+ CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA CUDNN_DEPRECATED_ENUM = 1713,
653
+ CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA CUDNN_DEPRECATED_ENUM = 1714,
654
+ CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC = 1716,
655
+
656
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC = 1720,
657
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC = 1721,
658
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC = 1722,
659
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA CUDNN_DEPRECATED_ENUM = 1723,
660
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA CUDNN_DEPRECATED_ENUM = 1724,
661
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC = 1725,
662
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC = 1726,
663
+ CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC = 1727,
664
+
665
+ CUDNN_ATTR_OPERATION_CONCAT_AXIS = 1800,
666
+ CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS = 1801,
667
+ CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX = 1802,
668
+ CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC = 1803,
669
+
670
+ CUDNN_ATTR_OPERATION_SIGNAL_MODE = 1900,
671
+ CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC = 1901,
672
+ CUDNN_ATTR_OPERATION_SIGNAL_VALUE = 1902,
673
+ CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903,
674
+ CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904,
675
+
676
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_CONTAINER_DESC = 1950,
677
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_YDESC = 1951,
678
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_SEQUENCE_DESC = 1952,
679
+ CUDNN_ATTR_OPERATION_PAGED_CACHE_LOAD_PAGE_TABLE_DESC = 1953,
680
+
681
+ CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000,
682
+ CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001,
683
+ CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002,
684
+ CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC = 2003,
685
+ CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC = 2004,
686
+ CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC = 2005,
687
+ CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC = 2006,
688
+ CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC = 2007,
689
+ CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC = 2008,
690
+ CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC = 2009,
691
+ CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC = 2010,
692
+ CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC = 2011,
693
+ CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC = 2012,
694
+ CUDNN_ATTR_OPERATION_NORM_FWD_YDESC = 2013,
695
+ CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS = 2014,
696
+
697
+ CUDNN_ATTR_OPERATION_NORM_BWD_MODE = 2100,
698
+ CUDNN_ATTR_OPERATION_NORM_BWD_XDESC = 2101,
699
+ CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC = 2102,
700
+ CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC = 2103,
701
+ CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC = 2104,
702
+ CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC = 2105,
703
+ CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC = 2106,
704
+ CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC = 2107,
705
+ CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC = 2108,
706
+ CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC = 2109,
707
+ CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS = 2110,
708
+
709
+ CUDNN_ATTR_OPERATION_RESHAPE_XDESC = 2200,
710
+ CUDNN_ATTR_OPERATION_RESHAPE_YDESC = 2201,
711
+
712
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_XDESC = 2250,
713
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_YDESC = 2251,
714
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_LOWER_BANDWIDTH = 2252,
715
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_UPPER_BANDWIDTH = 2253,
716
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_AXIS = 2254,
717
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_PAD_VALUE = 2255,
718
+ CUDNN_ATTR_OPERATION_EXPAND_BAND_MATRIX_KV_TOKEN_OFFSET_DESC = 2256,
719
+
720
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_XDESC = 2270,
721
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_YDESC = 2271,
722
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_LOWER_BANDWIDTH = 2272,
723
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_UPPER_BANDWIDTH = 2273,
724
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_AXIS = 2274,
725
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MATRIX_PAD_VALUE = 2275,
726
+ CUDNN_ATTR_OPERATION_CONTRACT_BAND_MAX_TOKEN_VALUE = 2276,
727
+
728
+ CUDNN_ATTR_RNG_DISTRIBUTION = 2300,
729
+ CUDNN_ATTR_RNG_NORMAL_DIST_MEAN = 2301,
730
+ CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION = 2302,
731
+ CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM = 2303,
732
+ CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM = 2304,
733
+ CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY = 2305,
734
+
735
+ CUDNN_ATTR_OPERATION_RNG_YDESC = 2310,
736
+ CUDNN_ATTR_OPERATION_RNG_SEED = 2311,
737
+ CUDNN_ATTR_OPERATION_RNG_DESC = 2312,
738
+ CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
739
+
740
+ CUDNN_ATTR_KERNEL_CACHE_OPERATION_GRAPH = 2400,
741
+ CUDNN_ATTR_KERNEL_CACHE_IS_ENGINECFG_KERNEL_CACHED = 2401,
742
+ CUDNN_ATTR_KERNEL_CACHE_JSON_REPRESENTATION = 2402,
743
+
744
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_XDESC = 2500,
745
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_YDESC = 2501,
746
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_SCALE_DESC = 2502,
747
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_MATH_PREC = 2503,
748
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_QUANTIZE_BLOCK_SIZE = 2504,
749
+
750
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_XDESC = 2600,
751
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_SCALE_DESC = 2601,
752
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_YDESC = 2602,
753
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_MATH_PREC = 2603,
754
+ CUDNN_ATTR_OPERATION_BLOCK_SCALE_DEQUANTIZE_BLOCK_SIZE = 2604,
755
+
756
+ CUDNN_ATTR_DEVICEPROP_DEVICE_ID = 2700,
757
+ CUDNN_ATTR_DEVICEPROP_HANDLE = 2701,
758
+ CUDNN_ATTR_DEVICEPROP_JSON_REPRESENTATION = 2702,
759
+ } cudnnBackendAttributeName_t;
760
+
761
+ typedef enum {
762
+ CUDNN_TYPE_HANDLE = 0,
763
+ CUDNN_TYPE_DATA_TYPE = 1,
764
+ CUDNN_TYPE_BOOLEAN = 2,
765
+ CUDNN_TYPE_INT64 = 3,
766
+ CUDNN_TYPE_FLOAT = 4,
767
+ CUDNN_TYPE_DOUBLE = 5,
768
+ CUDNN_TYPE_VOID_PTR = 6,
769
+ CUDNN_TYPE_CONVOLUTION_MODE = 7,
770
+ CUDNN_TYPE_HEUR_MODE = 8,
771
+ CUDNN_TYPE_KNOB_TYPE = 9,
772
+ CUDNN_TYPE_NAN_PROPOGATION CUDNN_DEPRECATED_ENUM = 10,
773
+ CUDNN_TYPE_NUMERICAL_NOTE = 11,
774
+ CUDNN_TYPE_LAYOUT_TYPE = 12,
775
+ CUDNN_TYPE_ATTRIB_NAME = 13,
776
+ CUDNN_TYPE_POINTWISE_MODE = 14,
777
+ CUDNN_TYPE_BACKEND_DESCRIPTOR = 15,
778
+ CUDNN_TYPE_GENSTATS_MODE = 16,
779
+ CUDNN_TYPE_BN_FINALIZE_STATS_MODE = 17,
780
+ CUDNN_TYPE_REDUCTION_OPERATOR_TYPE = 18,
781
+ CUDNN_TYPE_BEHAVIOR_NOTE = 19,
782
+ CUDNN_TYPE_TENSOR_REORDERING_MODE = 20,
783
+ CUDNN_TYPE_RESAMPLE_MODE = 21,
784
+ CUDNN_TYPE_PADDING_MODE = 22,
785
+ CUDNN_TYPE_INT32 = 23,
786
+ CUDNN_TYPE_CHAR = 24,
787
+ CUDNN_TYPE_SIGNAL_MODE = 25,
788
+ CUDNN_TYPE_FRACTION = 26,
789
+ CUDNN_TYPE_NORM_MODE = 27,
790
+ CUDNN_TYPE_NORM_FWD_PHASE = 28,
791
+ CUDNN_TYPE_RNG_DISTRIBUTION = 29,
792
+ } cudnnBackendAttributeType_t;
793
+
794
+ typedef enum {
795
+ CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
796
+ CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR = 1,
797
+ CUDNN_BACKEND_ENGINE_DESCRIPTOR = 2,
798
+ CUDNN_BACKEND_ENGINECFG_DESCRIPTOR = 3,
799
+ CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR = 4,
800
+ CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR = 5,
801
+ CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR = 6,
802
+ CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR = 7,
803
+ CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR = 8,
804
+ CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR = 9,
805
+ CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR = 10,
806
+ CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR = 11,
807
+ CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR = 12,
808
+ CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR = 13,
809
+ CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR = 14,
810
+ CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR = 15,
811
+ CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR = 16,
812
+ CUDNN_BACKEND_TENSOR_DESCRIPTOR = 17,
813
+ CUDNN_BACKEND_MATMUL_DESCRIPTOR = 18,
814
+ CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR = 19,
815
+ CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR = 20,
816
+ CUDNN_BACKEND_REDUCTION_DESCRIPTOR = 21,
817
+ CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR = 22,
818
+ CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR = 23,
819
+ CUDNN_BACKEND_RESAMPLE_DESCRIPTOR = 24,
820
+ CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR = 25,
821
+ CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR = 26,
822
+ CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR = 27,
823
+ CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR = 28,
824
+ CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR = 29,
825
+ CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR = 30,
826
+ CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR = 31,
827
+ CUDNN_BACKEND_RNG_DESCRIPTOR = 32,
828
+ CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR = 33,
829
+ CUDNN_BACKEND_KERNEL_CACHE_DESCRIPTOR = 34,
830
+ CUDNN_BACKEND_OPERATION_PAGED_CACHE_LOAD_DESCRIPTOR = 35,
831
+ CUDNN_BACKEND_OPERATION_BLOCK_SCALE_QUANTIZE_DESCRIPTOR = 36,
832
+ CUDNN_BACKEND_OPERATION_BLOCK_SCALE_DEQUANTIZE_DESCRIPTOR = 37,
833
+ CUDNN_BACKEND_DEVICEPROP_DESCRIPTOR = 38,
834
+ CUDNN_BACKEND_OPERATION_EXPAND_BAND_MATRIX_DESCRIPTOR = 39,
835
+ CUDNN_BACKEND_OPERATION_CONTRACT_BAND_MATRIX_DESCRIPTOR = 40,
836
+ } cudnnBackendDescriptorType_t;
837
+
838
+ typedef enum {
839
+ CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
840
+ CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS = 1,
841
+ CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION = 2,
842
+ CUDNN_NUMERICAL_NOTE_FFT = 3,
843
+ CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC = 4,
844
+ CUDNN_NUMERICAL_NOTE_WINOGRAD = 5,
845
+ CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4 = 6,
846
+ CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6 = 7,
847
+ CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13 = 8,
848
+ CUDNN_NUMERICAL_NOTE_STRICT_NAN_PROP = 9,
849
+ CUDNN_NUMERICAL_NOTE_TYPE_COUNT = 10,
850
+ } cudnnBackendNumericalNote_t;
851
+
852
+ typedef enum {
853
+ CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0,
854
+ CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
855
+ CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2,
856
+ CUDNN_BEHAVIOR_NOTE_SUPPORTS_CUDA_GRAPH_NATIVE_API = 3,
857
+ CUDNN_BEHAVIOR_NOTE_TYPE_COUNT = 4,
858
+ } cudnnBackendBehaviorNote_t;
859
+
860
+ typedef enum {
861
+ CUDNN_KNOB_TYPE_SPLIT_K CUDNN_DEPRECATED_ENUM = 0,
862
+ CUDNN_KNOB_TYPE_SWIZZLE = 1,
863
+ CUDNN_KNOB_TYPE_TILE_SIZE = 2,
864
+ CUDNN_KNOB_TYPE_USE_TEX CUDNN_DEPRECATED_ENUM = 3,
865
+ CUDNN_KNOB_TYPE_EDGE = 4,
866
+ CUDNN_KNOB_TYPE_KBLOCK CUDNN_DEPRECATED_ENUM = 5,
867
+ CUDNN_KNOB_TYPE_LDGA CUDNN_DEPRECATED_ENUM = 6,
868
+ CUDNN_KNOB_TYPE_LDGB CUDNN_DEPRECATED_ENUM = 7,
869
+ CUDNN_KNOB_TYPE_CHUNK_K CUDNN_DEPRECATED_ENUM = 8,
870
+ CUDNN_KNOB_TYPE_SPLIT_H CUDNN_DEPRECATED_ENUM = 9,
871
+ CUDNN_KNOB_TYPE_WINO_TILE CUDNN_DEPRECATED_ENUM = 10,
872
+ CUDNN_KNOB_TYPE_MULTIPLY = 11,
873
+ CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
874
+ CUDNN_KNOB_TYPE_TILEK = 13,
875
+ CUDNN_KNOB_TYPE_STAGES = 14,
876
+ CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
877
+ CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE CUDNN_DEPRECATED_ENUM = 16,
878
+ CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
879
+ CUDNN_KNOB_TYPE_IDX_MODE = 18,
880
+ CUDNN_KNOB_TYPE_SLICED CUDNN_DEPRECATED_ENUM = 19,
881
+ CUDNN_KNOB_TYPE_SPLIT_RS CUDNN_DEPRECATED_ENUM = 20,
882
+ CUDNN_KNOB_TYPE_SINGLEBUFFER CUDNN_DEPRECATED_ENUM = 21,
883
+ CUDNN_KNOB_TYPE_LDGC CUDNN_DEPRECATED_ENUM = 22,
884
+ CUDNN_KNOB_TYPE_SPECFILT = 23,
885
+ CUDNN_KNOB_TYPE_KERNEL_CFG = 24,
886
+ CUDNN_KNOB_TYPE_WORKSPACE = 25,
887
+ CUDNN_KNOB_TYPE_TILE_CGA CUDNN_DEPRECATED_ENUM = 26,
888
+ CUDNN_KNOB_TYPE_TILE_CGA_M = 27,
889
+ CUDNN_KNOB_TYPE_TILE_CGA_N = 28,
890
+ CUDNN_KNOB_TYPE_BLOCK_SIZE = 29,
891
+ CUDNN_KNOB_TYPE_OCCUPANCY = 30,
892
+ CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD = 31,
893
+ CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK CUDNN_DEPRECATED_ENUM = 32,
894
+ CUDNN_KNOB_TYPE_SPLIT_COLS = 33,
895
+ CUDNN_KNOB_TYPE_TILE_ROWS = 34,
896
+ CUDNN_KNOB_TYPE_TILE_COLS = 35,
897
+ CUDNN_KNOB_TYPE_LOAD_SIZE = 36,
898
+ CUDNN_KNOB_TYPE_CTA_COUNT = 37,
899
+ CUDNN_KNOB_TYPE_STREAM_K = 38,
900
+ CUDNN_KNOB_TYPE_SPLIT_P_SLC = 39,
901
+ CUDNN_KNOB_TYPE_TILE_M = 40,
902
+ CUDNN_KNOB_TYPE_TILE_N = 41,
903
+ CUDNN_KNOB_TYPE_WARP_SPEC_CFG = 42,
904
+ CUDNN_KNOB_TYPE_COUNTS = 43,
905
+ } cudnnBackendKnobType_t;
906
+
907
+ typedef enum {
908
+ CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
909
+ CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
910
+ CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
911
+ CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
912
+ CUDNN_LAYOUT_TYPE_COUNT = 4,
913
+ } cudnnBackendLayoutType_t;
914
+
915
+ typedef enum {
916
+ CUDNN_HEUR_MODE_INSTANT = 0,
917
+ CUDNN_HEUR_MODE_B = 1,
918
+ CUDNN_HEUR_MODE_FALLBACK = 2,
919
+ CUDNN_HEUR_MODE_A = 3,
920
+ CUDNN_HEUR_MODES_COUNT = 4,
921
+ } cudnnBackendHeurMode_t;
922
+
923
+ typedef enum {
924
+ CUDNN_TENSOR_REORDERING_NONE = 0,
925
+ CUDNN_TENSOR_REORDERING_INT8x32 = 1,
926
+ CUDNN_TENSOR_REORDERING_F16x16 = 2,
927
+ CUDNN_TENSOR_REORDERING_F8_128x4 = 3,
928
+ } cudnnBackendTensorReordering_t;
929
+
930
+ typedef enum {
931
+ CUDNN_ZERO_PAD = 0,
932
+ CUDNN_NEG_INF_PAD = 1,
933
+ CUDNN_EDGE_VAL_PAD = 2,
934
+ } cudnnPaddingMode_t;
935
+
936
+ typedef enum {
937
+ CUDNN_LAYER_NORM = 0,
938
+ CUDNN_INSTANCE_NORM = 1,
939
+ CUDNN_BATCH_NORM = 2,
940
+ CUDNN_GROUP_NORM = 3,
941
+ CUDNN_RMS_NORM = 4,
942
+ CUDNN_ADA_LAYER_NORM = 5,
943
+ } cudnnBackendNormMode_t;
944
+
945
+ typedef enum {
946
+ CUDNN_NORM_FWD_INFERENCE = 0,
947
+ CUDNN_NORM_FWD_TRAINING = 1,
948
+ } cudnnBackendNormFwdPhase_t;
949
+
950
+ cudnnStatus_t CUDNNWINAPI
951
+ cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t *descriptor);
952
+
953
+ cudnnStatus_t CUDNNWINAPI
954
+ cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
955
+
956
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
957
+ cudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
958
+
959
+ cudnnStatus_t CUDNNWINAPI
960
+ cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
961
+
962
+ cudnnStatus_t CUDNNWINAPI
963
+ cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor,
964
+ cudnnBackendAttributeName_t attributeName,
965
+ cudnnBackendAttributeType_t attributeType,
966
+ int64_t elementCount,
967
+ const void *arrayOfElements);
968
+
969
+ cudnnStatus_t CUDNNWINAPI
970
+ cudnnBackendGetAttribute(cudnnBackendDescriptor_t const descriptor,
971
+ cudnnBackendAttributeName_t attributeName,
972
+ cudnnBackendAttributeType_t attributeType,
973
+ int64_t requestedElementCount,
974
+ int64_t *elementCount,
975
+ void *arrayOfElements);
976
+
977
+ cudnnStatus_t CUDNNWINAPI
978
+ cudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
979
+
980
+ cudnnStatus_t CUDNNWINAPI
981
+ cudnnBackendPopulateCudaGraph(cudnnHandle_t handle,
982
+ cudnnBackendDescriptor_t executionPlan,
983
+ cudnnBackendDescriptor_t variantPack,
984
+ cudaGraph_t graph);
985
+
986
+ cudnnStatus_t CUDNNWINAPI
987
+ cudnnBackendUpdateCudaGraph(cudnnHandle_t handle,
988
+ cudnnBackendDescriptor_t executionPlan,
989
+ cudnnBackendDescriptor_t variantPack,
990
+ cudaGraph_t graph);
991
+
992
+ #if defined(__cplusplus)
993
+ }
994
+ #endif
995
+
996
+ #endif /* CUDNN_GRAPH_H_ */