nvidia-cudnn-cu12 8.9.7.29__py3-none-win_amd64.whl → 9.1.0.70__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. nvidia/cudnn/bin/cudnn64_9.dll +0 -0
  2. nvidia/cudnn/bin/{cudnn_adv_infer64_8.dll → cudnn_adv64_9.dll} +0 -0
  3. nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
  4. nvidia/cudnn/bin/{cudnn_cnn_infer64_8.dll → cudnn_engines_precompiled64_9.dll} +0 -0
  5. nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
  6. nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
  7. nvidia/cudnn/bin/{cudnn_ops_infer64_8.dll → cudnn_heuristic64_9.dll} +0 -0
  8. nvidia/cudnn/bin/{cudnn_adv_train64_8.dll → cudnn_ops64_9.dll} +0 -0
  9. nvidia/cudnn/include/cudnn.h +8 -18
  10. nvidia/cudnn/include/{cudnn_adv_infer.h → cudnn_adv.h} +265 -252
  11. nvidia/cudnn/include/cudnn_backend.h +3 -558
  12. nvidia/cudnn/include/{cudnn_cnn_infer.h → cudnn_cnn.h} +187 -65
  13. nvidia/cudnn/include/cudnn_graph.h +909 -0
  14. nvidia/cudnn/include/{cudnn_ops_infer.h → cudnn_ops.h} +469 -336
  15. nvidia/cudnn/include/cudnn_version.h +4 -43
  16. nvidia/cudnn/lib/x64/cudnn.lib +0 -0
  17. nvidia/cudnn/lib/x64/cudnn64_9.lib +0 -0
  18. nvidia/cudnn/lib/x64/cudnn_adv.lib +0 -0
  19. nvidia/cudnn/lib/x64/cudnn_adv64_9.lib +0 -0
  20. nvidia/cudnn/lib/x64/cudnn_cnn.lib +0 -0
  21. nvidia/cudnn/lib/x64/cudnn_cnn64_9.lib +0 -0
  22. nvidia/cudnn/lib/x64/cudnn_engines_precompiled.lib +0 -0
  23. nvidia/cudnn/lib/x64/cudnn_engines_precompiled64_9.lib +0 -0
  24. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled.lib +0 -0
  25. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled64_9.lib +0 -0
  26. nvidia/cudnn/lib/x64/cudnn_graph.lib +0 -0
  27. nvidia/cudnn/lib/x64/cudnn_graph64_9.lib +0 -0
  28. nvidia/cudnn/lib/x64/cudnn_heuristic.lib +0 -0
  29. nvidia/cudnn/lib/x64/cudnn_heuristic64_9.lib +0 -0
  30. nvidia/cudnn/lib/x64/cudnn_ops.lib +0 -0
  31. nvidia/cudnn/lib/x64/cudnn_ops64_9.lib +0 -0
  32. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.1.0.70.dist-info}/METADATA +1 -1
  33. nvidia_cudnn_cu12-9.1.0.70.dist-info/RECORD +41 -0
  34. nvidia/cudnn/bin/cudnn64_8.dll +0 -0
  35. nvidia/cudnn/bin/cudnn_cnn_train64_8.dll +0 -0
  36. nvidia/cudnn/bin/cudnn_ops_train64_8.dll +0 -0
  37. nvidia/cudnn/include/cudnn_adv_train.h +0 -540
  38. nvidia/cudnn/include/cudnn_cnn_train.h +0 -219
  39. nvidia/cudnn/include/cudnn_ops_train.h +0 -501
  40. nvidia/cudnn/lib/x64/cudnn64_8.lib +0 -0
  41. nvidia/cudnn/lib/x64/cudnn_adv_infer.lib +0 -0
  42. nvidia/cudnn/lib/x64/cudnn_adv_infer64_8.lib +0 -0
  43. nvidia/cudnn/lib/x64/cudnn_adv_train.lib +0 -0
  44. nvidia/cudnn/lib/x64/cudnn_adv_train64_8.lib +0 -0
  45. nvidia/cudnn/lib/x64/cudnn_cnn_infer.lib +0 -0
  46. nvidia/cudnn/lib/x64/cudnn_cnn_infer64_8.lib +0 -0
  47. nvidia/cudnn/lib/x64/cudnn_cnn_train.lib +0 -0
  48. nvidia/cudnn/lib/x64/cudnn_cnn_train64_8.lib +0 -0
  49. nvidia/cudnn/lib/x64/cudnn_ops_infer.lib +0 -0
  50. nvidia/cudnn/lib/x64/cudnn_ops_infer64_8.lib +0 -0
  51. nvidia/cudnn/lib/x64/cudnn_ops_train.lib +0 -0
  52. nvidia/cudnn/lib/x64/cudnn_ops_train64_8.lib +0 -0
  53. nvidia_cudnn_cu12-8.9.7.29.dist-info/RECORD +0 -40
  54. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.1.0.70.dist-info}/License.txt +0 -0
  55. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.1.0.70.dist-info}/WHEEL +0 -0
  56. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.1.0.70.dist-info}/top_level.txt +0 -0
@@ -48,172 +48,41 @@
48
48
  */
49
49
 
50
50
  /*
51
- * cudnn_ops_infer : cuDNN's basic definitions and inference operations.
51
+ * cudnn_ops : cuDNN's basic definitions and basic operations.
52
52
  */
53
53
 
54
- #if !defined(CUDNN_OPS_INFER_H_)
55
- #define CUDNN_OPS_INFER_H_
54
+ #if !defined(CUDNN_OPS_H_)
55
+ #define CUDNN_OPS_H_
56
56
 
57
- #include <cuda_runtime.h>
58
57
  #include <stdint.h>
59
58
 
60
59
  #include "cudnn_version.h"
60
+ #include "cudnn_graph.h"
61
61
 
62
62
  /* These version numbers are autogenerated, do not edit manually. */
63
- #define CUDNN_OPS_INFER_MAJOR 8
64
- #define CUDNN_OPS_INFER_MINOR 9
65
- #define CUDNN_OPS_INFER_PATCH 7
63
+ #define CUDNN_OPS_MAJOR 9
64
+ #define CUDNN_OPS_MINOR 1
65
+ #define CUDNN_OPS_PATCH 0
66
66
 
67
- #if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) || \
68
- (CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL)
67
+ #if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
69
68
  #error Version mismatch in cuDNN OPS INFER!!!
70
69
  #endif
71
70
 
72
- #ifndef CUDNNWINAPI
73
- #ifdef _WIN32
74
- #define CUDNNWINAPI __stdcall
75
- #else
76
- #define CUDNNWINAPI
77
- #endif
78
- #endif
79
-
80
- /* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
81
- #if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
82
- /* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
83
- #define CUDNN_DEPRECATED __attribute__((deprecated))
84
- #elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
85
- /* Microsoft Visual C++ */
86
- #define CUDNN_DEPRECATED __declspec(deprecated)
87
- #elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
88
- /* C++14 compilers */
89
- #define CUDNN_DEPRECATED [[deprecated]]
90
- #else
91
- /* No support for the deprecated attribute */
92
- #define CUDNN_DEPRECATED
93
- #endif
94
-
95
71
  #if defined(__cplusplus)
96
72
  extern "C" {
97
73
  #endif
98
74
 
99
- struct cudnnContext;
100
- typedef struct cudnnContext *cudnnHandle_t;
101
-
102
- size_t CUDNNWINAPI
103
- cudnnGetVersion(void);
104
-
105
- size_t CUDNNWINAPI
106
- cudnnGetMaxDeviceVersion(void);
107
-
108
- /* Returns CUDA Runtime version statically linked against cudnn */
109
- size_t CUDNNWINAPI
110
- cudnnGetCudartVersion(void);
111
-
112
- /*
113
- * CUDNN return codes
114
- */
115
- typedef enum {
116
- CUDNN_STATUS_SUCCESS = 0,
117
- CUDNN_STATUS_NOT_INITIALIZED = 1,
118
- CUDNN_STATUS_ALLOC_FAILED = 2,
119
- CUDNN_STATUS_BAD_PARAM = 3,
120
- CUDNN_STATUS_INTERNAL_ERROR = 4,
121
- CUDNN_STATUS_INVALID_VALUE = 5,
122
- CUDNN_STATUS_ARCH_MISMATCH = 6,
123
- CUDNN_STATUS_MAPPING_ERROR = 7,
124
- CUDNN_STATUS_EXECUTION_FAILED = 8,
125
- CUDNN_STATUS_NOT_SUPPORTED = 9,
126
- CUDNN_STATUS_LICENSE_ERROR = 10,
127
- CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
128
- CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
129
- CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
130
- CUDNN_STATUS_VERSION_MISMATCH = 14,
131
- } cudnnStatus_t;
132
-
133
- /* human-readable error messages */
134
- const char *CUDNNWINAPI
135
- cudnnGetErrorString(cudnnStatus_t status);
136
-
137
- /* Forward definition in this version only */
138
- typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
139
-
140
- typedef enum {
141
- CUDNN_ERRQUERY_RAWCODE = 0,
142
- CUDNN_ERRQUERY_NONBLOCKING = 1,
143
- CUDNN_ERRQUERY_BLOCKING = 2,
144
- } cudnnErrQueryMode_t;
145
-
146
- cudnnStatus_t CUDNNWINAPI
147
- cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
148
-
149
- #ifndef __LIBRARY_TYPES_H__
150
-
151
- typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType;
152
-
153
- #endif
154
-
155
- cudnnStatus_t CUDNNWINAPI
156
- cudnnGetProperty(libraryPropertyType type, int *value);
157
-
158
- cudnnStatus_t CUDNNWINAPI
159
- cudnnCreate(cudnnHandle_t *handle);
160
- cudnnStatus_t CUDNNWINAPI
161
- cudnnDestroy(cudnnHandle_t handle);
162
- cudnnStatus_t CUDNNWINAPI
163
- cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
164
- cudnnStatus_t CUDNNWINAPI
165
- cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
166
-
167
75
  /* Data structures to represent Image/Filter and the Neural Network Layer */
168
76
  typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
169
- typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t;
170
- typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t;
77
+ typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED;
78
+ typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED;
171
79
  typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
172
- typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t;
80
+ typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED;
173
81
  typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
174
- typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t;
175
- typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t;
82
+ typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED;
83
+ typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED;
176
84
  typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
177
- typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t;
178
- /*
179
- * CUDNN data type
180
- */
181
- typedef enum {
182
- CUDNN_DATA_FLOAT = 0,
183
- CUDNN_DATA_DOUBLE = 1,
184
- CUDNN_DATA_HALF = 2,
185
- CUDNN_DATA_INT8 = 3,
186
- CUDNN_DATA_INT32 = 4,
187
- CUDNN_DATA_INT8x4 = 5,
188
- CUDNN_DATA_UINT8 = 6,
189
- CUDNN_DATA_UINT8x4 = 7,
190
- CUDNN_DATA_INT8x32 = 8,
191
- CUDNN_DATA_BFLOAT16 = 9,
192
- CUDNN_DATA_INT64 = 10,
193
- CUDNN_DATA_BOOLEAN = 11,
194
- CUDNN_DATA_FP8_E4M3 = 12,
195
- CUDNN_DATA_FP8_E5M2 = 13,
196
- CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14,
197
- } cudnnDataType_t;
198
-
199
- /*
200
- * CUDNN math type
201
- */
202
- typedef enum {
203
- CUDNN_DEFAULT_MATH = 0,
204
- CUDNN_TENSOR_OP_MATH = 1,
205
- CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
206
- CUDNN_FMA_MATH = 3,
207
- } cudnnMathType_t;
208
-
209
- /*
210
- * CUDNN propagate Nan
211
- */
212
- typedef enum {
213
- CUDNN_NOT_PROPAGATE_NAN = 0,
214
- CUDNN_PROPAGATE_NAN = 1,
215
- } cudnnNanPropagation_t;
216
-
85
+ typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED;
217
86
  /*
218
87
  * CUDNN Determinism
219
88
  */
@@ -222,19 +91,10 @@ typedef enum {
222
91
  CUDNN_DETERMINISTIC = 1,
223
92
  } cudnnDeterminism_t;
224
93
 
225
- /* Maximum supported number of tensor dimensions */
226
- #define CUDNN_DIM_MAX 8
227
-
228
94
  /* Create an instance of a generic Tensor descriptor */
229
95
  cudnnStatus_t CUDNNWINAPI
230
96
  cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
231
97
 
232
- typedef enum {
233
- CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
234
- CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
235
- CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
236
- } cudnnTensorFormat_t;
237
-
238
98
  cudnnStatus_t CUDNNWINAPI
239
99
  cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
240
100
  cudnnTensorFormat_t format,
@@ -327,18 +187,18 @@ typedef enum {
327
187
  } cudnnFoldingDirection_t;
328
188
 
329
189
  /** Create a destination descriptor for cudnnTransformTensor */
330
- cudnnStatus_t CUDNNWINAPI
190
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
331
191
  cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
332
192
  const cudnnTensorDescriptor_t srcDesc,
333
193
  cudnnTensorDescriptor_t destDesc,
334
194
  size_t *destSizeInBytes);
335
195
 
336
196
  /** Create an empty tensor transform descriptor */
337
- cudnnStatus_t CUDNNWINAPI
197
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
338
198
  cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
339
199
 
340
200
  /** Initialize a previously created tensor transform descriptor. */
341
- cudnnStatus_t CUDNNWINAPI
201
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
342
202
  cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
343
203
  const uint32_t nbDims,
344
204
  const cudnnTensorFormat_t destFormat,
@@ -351,7 +211,7 @@ cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc
351
211
  * Retrieves the values stored in a previously initialized tensor transform
352
212
  * descriptor.
353
213
  */
354
- cudnnStatus_t CUDNNWINAPI
214
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
355
215
  cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
356
216
  uint32_t nbDimsRequested,
357
217
  cudnnTensorFormat_t *destFormat,
@@ -363,11 +223,11 @@ cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc
363
223
  /**
364
224
  * Destroys a previously created tensor transform descriptor.
365
225
  */
366
- cudnnStatus_t CUDNNWINAPI
226
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
367
227
  cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
368
228
 
369
229
  /* Tensor layout conversion helper (y = alpha * x + beta * y) */
370
- cudnnStatus_t CUDNNWINAPI
230
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
371
231
  cudnnTransformTensor(cudnnHandle_t handle,
372
232
  const void *alpha,
373
233
  const cudnnTensorDescriptor_t xDesc,
@@ -376,7 +236,7 @@ cudnnTransformTensor(cudnnHandle_t handle,
376
236
  const cudnnTensorDescriptor_t yDesc,
377
237
  void *y);
378
238
 
379
- cudnnStatus_t CUDNNWINAPI
239
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
380
240
  cudnnTransformTensorEx(cudnnHandle_t handle,
381
241
  const cudnnTensorTransformDescriptor_t transDesc,
382
242
  const void *alpha,
@@ -387,7 +247,7 @@ cudnnTransformTensorEx(cudnnHandle_t handle,
387
247
  void *destData);
388
248
 
389
249
  /* Tensor Bias addition : C = alpha * A + beta * C */
390
- cudnnStatus_t CUDNNWINAPI
250
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
391
251
  cudnnAddTensor(cudnnHandle_t handle,
392
252
  const void *alpha,
393
253
  const cudnnTensorDescriptor_t aDesc,
@@ -408,27 +268,27 @@ typedef enum {
408
268
  CUDNN_OP_TENSOR_NOT = 5,
409
269
  } cudnnOpTensorOp_t;
410
270
 
411
- cudnnStatus_t CUDNNWINAPI
271
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
412
272
  cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
413
273
 
414
- cudnnStatus_t CUDNNWINAPI
274
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
415
275
  cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
416
276
  cudnnOpTensorOp_t opTensorOp,
417
277
  cudnnDataType_t opTensorCompType,
418
278
  cudnnNanPropagation_t opTensorNanOpt);
419
279
 
420
- cudnnStatus_t CUDNNWINAPI
280
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
421
281
  cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
422
282
  cudnnOpTensorOp_t *opTensorOp,
423
283
  cudnnDataType_t *opTensorCompType,
424
284
  cudnnNanPropagation_t *opTensorNanOpt);
425
285
 
426
- cudnnStatus_t CUDNNWINAPI
286
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
427
287
  cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
428
288
 
429
289
  /* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
430
290
  /* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
431
- cudnnStatus_t CUDNNWINAPI
291
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
432
292
  cudnnOpTensor(cudnnHandle_t handle,
433
293
  const cudnnOpTensorDescriptor_t opTensorDesc,
434
294
  const void *alpha1,
@@ -441,28 +301,13 @@ cudnnOpTensor(cudnnHandle_t handle,
441
301
  const cudnnTensorDescriptor_t cDesc,
442
302
  void *C);
443
303
 
444
- /*
445
- * CUDNN ReduceTensor op type
446
- */
447
- typedef enum {
448
- CUDNN_REDUCE_TENSOR_ADD = 0,
449
- CUDNN_REDUCE_TENSOR_MUL = 1,
450
- CUDNN_REDUCE_TENSOR_MIN = 2,
451
- CUDNN_REDUCE_TENSOR_MAX = 3,
452
- CUDNN_REDUCE_TENSOR_AMAX = 4,
453
- CUDNN_REDUCE_TENSOR_AVG = 5,
454
- CUDNN_REDUCE_TENSOR_NORM1 = 6,
455
- CUDNN_REDUCE_TENSOR_NORM2 = 7,
456
- CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
457
- } cudnnReduceTensorOp_t;
458
-
459
304
  /*
460
305
  * CUDNN ReduceTensor indices type
461
306
  */
462
307
  typedef enum {
463
308
  CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
464
309
  CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
465
- } cudnnReduceTensorIndices_t;
310
+ } cudnnReduceTensorIndices_t CUDNN_DEPRECATED;
466
311
 
467
312
  /*
468
313
  * CUDNN tensor indices type size (all unsigned)
@@ -473,12 +318,12 @@ typedef enum {
473
318
  CUDNN_64BIT_INDICES = 1,
474
319
  CUDNN_16BIT_INDICES = 2,
475
320
  CUDNN_8BIT_INDICES = 3,
476
- } cudnnIndicesType_t;
321
+ } cudnnIndicesType_t CUDNN_DEPRECATED;
477
322
 
478
- cudnnStatus_t CUDNNWINAPI
323
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
479
324
  cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
480
325
 
481
- cudnnStatus_t CUDNNWINAPI
326
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
482
327
  cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
483
328
  cudnnReduceTensorOp_t reduceTensorOp,
484
329
  cudnnDataType_t reduceTensorCompType,
@@ -486,7 +331,7 @@ cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
486
331
  cudnnReduceTensorIndices_t reduceTensorIndices,
487
332
  cudnnIndicesType_t reduceTensorIndicesType);
488
333
 
489
- cudnnStatus_t CUDNNWINAPI
334
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
490
335
  cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
491
336
  cudnnReduceTensorOp_t *reduceTensorOp,
492
337
  cudnnDataType_t *reduceTensorCompType,
@@ -494,12 +339,12 @@ cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorD
494
339
  cudnnReduceTensorIndices_t *reduceTensorIndices,
495
340
  cudnnIndicesType_t *reduceTensorIndicesType);
496
341
 
497
- cudnnStatus_t CUDNNWINAPI
342
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
498
343
  cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
499
344
 
500
345
  /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
501
346
  * output tensors */
502
- cudnnStatus_t CUDNNWINAPI
347
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
503
348
  cudnnGetReductionIndicesSize(cudnnHandle_t handle,
504
349
  const cudnnReduceTensorDescriptor_t reduceTensorDesc,
505
350
  const cudnnTensorDescriptor_t aDesc,
@@ -508,7 +353,7 @@ cudnnGetReductionIndicesSize(cudnnHandle_t handle,
508
353
 
509
354
  /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
510
355
  * tensors */
511
- cudnnStatus_t CUDNNWINAPI
356
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
512
357
  cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
513
358
  const cudnnReduceTensorDescriptor_t reduceTensorDesc,
514
359
  const cudnnTensorDescriptor_t aDesc,
@@ -518,7 +363,7 @@ cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
518
363
  /* Tensor operation : C = reduce op( alpha * A ) + beta * C */
519
364
  /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
520
365
  /* The indices space is ignored for reduce ops other than min or max. */
521
- cudnnStatus_t CUDNNWINAPI
366
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
522
367
  cudnnReduceTensor(cudnnHandle_t handle,
523
368
  const cudnnReduceTensorDescriptor_t reduceTensorDesc,
524
369
  void *indices,
@@ -537,14 +382,14 @@ cudnnStatus_t CUDNNWINAPI
537
382
  cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
538
383
 
539
384
  /* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
540
- cudnnStatus_t CUDNNWINAPI
385
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
541
386
  cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
542
387
 
543
388
  /* Create an instance of FilterStruct */
544
- cudnnStatus_t CUDNNWINAPI
389
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
545
390
  cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
546
391
 
547
- cudnnStatus_t CUDNNWINAPI
392
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
548
393
  cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
549
394
  cudnnDataType_t dataType, /* image data type */
550
395
  cudnnTensorFormat_t format,
@@ -553,7 +398,7 @@ cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
553
398
  int h, /* height of each input filter */
554
399
  int w); /* width of each input filter */
555
400
 
556
- cudnnStatus_t CUDNNWINAPI
401
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
557
402
  cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
558
403
  cudnnDataType_t *dataType, /* image data type */
559
404
  cudnnTensorFormat_t *format,
@@ -562,24 +407,24 @@ cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
562
407
  int *h, /* height of each input filter */
563
408
  int *w); /* width of each input filter */
564
409
 
565
- cudnnStatus_t CUDNNWINAPI
410
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
566
411
  cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
567
412
  cudnnDataType_t dataType, /* image data type */
568
413
  cudnnTensorFormat_t format,
569
414
  int nbDims,
570
415
  const int filterDimA[]);
571
416
 
572
- cudnnStatus_t CUDNNWINAPI
417
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
573
418
  cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
574
419
  int nbDimsRequested,
575
420
  cudnnDataType_t *dataType, /* image data type */
576
421
  cudnnTensorFormat_t *format,
577
422
  int *nbDims,
578
423
  int filterDimA[]);
579
- cudnnStatus_t CUDNNWINAPI
424
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
580
425
  cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
581
426
 
582
- cudnnStatus_t CUDNNWINAPI
427
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
583
428
  cudnnTransformFilter(cudnnHandle_t handle,
584
429
  const cudnnTensorTransformDescriptor_t transDesc,
585
430
  const void *alpha,
@@ -589,7 +434,7 @@ cudnnTransformFilter(cudnnHandle_t handle,
589
434
  const cudnnFilterDescriptor_t destDesc,
590
435
  void *destData);
591
436
 
592
- cudnnStatus_t CUDNNWINAPI
437
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
593
438
  cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
594
439
 
595
440
  /*
@@ -628,13 +473,13 @@ typedef enum {
628
473
  CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
629
474
  CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
630
475
  CUDNN_POOLING_MAX_DETERMINISTIC = 3
631
- } cudnnPoolingMode_t;
476
+ } cudnnPoolingMode_t CUDNN_DEPRECATED;
632
477
 
633
478
  /* Create an instance of pooling descriptor */
634
- cudnnStatus_t CUDNNWINAPI
479
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
635
480
  cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
636
481
 
637
- cudnnStatus_t CUDNNWINAPI
482
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
638
483
  cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
639
484
  cudnnPoolingMode_t mode,
640
485
  cudnnNanPropagation_t maxpoolingNanOpt,
@@ -645,7 +490,7 @@ cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
645
490
  int verticalStride,
646
491
  int horizontalStride);
647
492
 
648
- cudnnStatus_t CUDNNWINAPI
493
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
649
494
  cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
650
495
  cudnnPoolingMode_t *mode,
651
496
  cudnnNanPropagation_t *maxpoolingNanOpt,
@@ -656,7 +501,7 @@ cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
656
501
  int *verticalStride,
657
502
  int *horizontalStride);
658
503
 
659
- cudnnStatus_t CUDNNWINAPI
504
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
660
505
  cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
661
506
  const cudnnPoolingMode_t mode,
662
507
  const cudnnNanPropagation_t maxpoolingNanOpt,
@@ -665,7 +510,7 @@ cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
665
510
  const int paddingA[],
666
511
  const int strideA[]);
667
512
 
668
- cudnnStatus_t CUDNNWINAPI
513
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
669
514
  cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
670
515
  int nbDimsRequested,
671
516
  cudnnPoolingMode_t *mode,
@@ -675,13 +520,13 @@ cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
675
520
  int paddingA[],
676
521
  int strideA[]);
677
522
 
678
- cudnnStatus_t CUDNNWINAPI
523
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
679
524
  cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
680
525
  const cudnnTensorDescriptor_t inputTensorDesc,
681
526
  int nbDims,
682
527
  int outputTensorDimA[]);
683
528
 
684
- cudnnStatus_t CUDNNWINAPI
529
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
685
530
  cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
686
531
  const cudnnTensorDescriptor_t inputTensorDesc,
687
532
  int *n,
@@ -690,13 +535,13 @@ cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
690
535
  int *w);
691
536
 
692
537
  /* Destroy an instance of pooling descriptor */
693
- cudnnStatus_t CUDNNWINAPI
538
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
694
539
  cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
695
540
 
696
541
  /* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
697
542
 
698
543
  /* Function to perform forward pooling */
699
- cudnnStatus_t CUDNNWINAPI
544
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
700
545
  cudnnPoolingForward(cudnnHandle_t handle,
701
546
  const cudnnPoolingDescriptor_t poolingDesc,
702
547
  const void *alpha,
@@ -706,46 +551,33 @@ cudnnPoolingForward(cudnnHandle_t handle,
706
551
  const cudnnTensorDescriptor_t yDesc,
707
552
  void *y);
708
553
 
709
- /*
710
- * activation mode
711
- */
712
- typedef enum {
713
- CUDNN_ACTIVATION_SIGMOID = 0,
714
- CUDNN_ACTIVATION_RELU = 1,
715
- CUDNN_ACTIVATION_TANH = 2,
716
- CUDNN_ACTIVATION_CLIPPED_RELU = 3,
717
- CUDNN_ACTIVATION_ELU = 4,
718
- CUDNN_ACTIVATION_IDENTITY = 5,
719
- CUDNN_ACTIVATION_SWISH = 6
720
- } cudnnActivationMode_t;
721
-
722
554
  /* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
723
- cudnnStatus_t CUDNNWINAPI
555
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
724
556
  cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
725
557
 
726
- cudnnStatus_t CUDNNWINAPI
558
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
727
559
  cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
728
560
  cudnnActivationMode_t mode,
729
561
  cudnnNanPropagation_t reluNanOpt,
730
562
  double coef); /* ceiling for clipped RELU, alpha for ELU */
731
563
 
732
- cudnnStatus_t CUDNNWINAPI
564
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
733
565
  cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
734
566
  cudnnActivationMode_t *mode,
735
567
  cudnnNanPropagation_t *reluNanOpt,
736
568
  double *coef); /* ceiling for clipped RELU, alpha for ELU */
737
569
 
738
- cudnnStatus_t CUDNNWINAPI
570
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
739
571
  cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
740
572
 
741
- cudnnStatus_t CUDNNWINAPI
573
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
742
574
  cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
743
575
 
744
- cudnnStatus_t CUDNNWINAPI
576
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
745
577
  cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
746
578
 
747
579
  /* Function to perform forward activation */
748
- cudnnStatus_t CUDNNWINAPI
580
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
749
581
  cudnnActivationForward(cudnnHandle_t handle,
750
582
  cudnnActivationDescriptor_t activationDesc,
751
583
  const void *alpha,
@@ -835,7 +667,7 @@ typedef enum {
835
667
  * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
836
668
  */
837
669
  CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
838
- } cudnnBatchNormMode_t;
670
+ } cudnnBatchNormMode_t CUDNN_DEPRECATED;
839
671
 
840
672
  #define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
841
673
 
@@ -844,7 +676,7 @@ typedef enum {
844
676
  * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
845
677
  * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
846
678
  */
847
- cudnnStatus_t CUDNNWINAPI
679
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
848
680
  cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
849
681
  const cudnnTensorDescriptor_t xDesc,
850
682
  cudnnBatchNormMode_t mode);
@@ -853,7 +685,7 @@ typedef enum {
853
685
  CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
854
686
  CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
855
687
  CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
856
- } cudnnBatchNormOps_t;
688
+ } cudnnBatchNormOps_t CUDNN_DEPRECATED;
857
689
 
858
690
  /*
859
691
  * Performs Batch Normalization during Inference:
@@ -862,7 +694,7 @@ typedef enum {
862
694
  * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
863
695
  * above for notes on function arguments.
864
696
  */
865
- cudnnStatus_t CUDNNWINAPI
697
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
866
698
  cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
867
699
  cudnnBatchNormMode_t mode,
868
700
  const void *alpha, /* alpha[0] = result blend factor */
@@ -884,16 +716,16 @@ typedef enum {
884
716
 
885
717
  /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
886
718
  CUDNN_NORM_PER_CHANNEL = 1,
887
- } cudnnNormMode_t;
719
+ } cudnnNormMode_t CUDNN_DEPRECATED;
888
720
 
889
- typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t;
721
+ typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED;
890
722
 
891
723
  /*
892
724
  * Derives a tensor descriptor from layer data descriptor for Normalization
893
725
  * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
894
726
  * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
895
727
  */
896
- cudnnStatus_t CUDNNWINAPI
728
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
897
729
  cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
898
730
  cudnnTensorDescriptor_t derivedNormMeanVarDesc,
899
731
  const cudnnTensorDescriptor_t xDesc,
@@ -904,7 +736,7 @@ typedef enum {
904
736
  CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
905
737
  CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
906
738
  CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
907
- } cudnnNormOps_t;
739
+ } cudnnNormOps_t CUDNN_DEPRECATED;
908
740
 
909
741
  /*
910
742
  * Performs Normalization during Inference:
@@ -913,7 +745,7 @@ typedef enum {
913
745
  * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
914
746
  * above for notes on function arguments.
915
747
  */
916
- cudnnStatus_t CUDNNWINAPI
748
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
917
749
  cudnnNormalizationForwardInference(cudnnHandle_t handle,
918
750
  cudnnNormMode_t mode,
919
751
  cudnnNormOps_t normOps,
@@ -1021,11 +853,6 @@ cudnnDropoutForward(cudnnHandle_t handle,
1021
853
  void *reserveSpace,
1022
854
  size_t reserveSpaceSizeInBytes);
1023
855
 
1024
- /* TODO: remove */
1025
-
1026
- typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t;
1027
- typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t;
1028
-
1029
856
  /* TODO: move these enums out to the appropriate submodule */
1030
857
  typedef enum {
1031
858
  CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
@@ -1060,124 +887,430 @@ typedef enum {
1060
887
  CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
1061
888
  } cudnnConvolutionBwdDataAlgo_t;
1062
889
 
1063
- typedef enum {
1064
- CUDNN_RNN_ALGO_STANDARD = 0,
1065
- CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
1066
- CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
1067
- CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
1068
- CUDNN_RNN_ALGO_COUNT = 4,
1069
- } cudnnRNNAlgo_t;
1070
-
1071
890
  typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
1072
891
 
1073
- /* TODO: remove */
1074
- typedef struct cudnnAlgorithmUnionStruct {
1075
- union Algorithm {
1076
- cudnnConvolutionFwdAlgo_t convFwdAlgo;
1077
- cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
1078
- cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
1079
- cudnnRNNAlgo_t RNNAlgo;
1080
- cudnnCTCLossAlgo_t CTCLossAlgo;
1081
- } algo;
1082
- } cudnnAlgorithm_t;
892
+ /*
893
+ * \brief Cross-library version checker.
894
+ * This function is implemented differently in each sub-library. Each sublib
895
+ * checks whether its own version matches that of its dependencies.
896
+ * \returns CUDNN_STATUS_SUCCESS if the version check passes,
897
+ * CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
898
+ */
899
+ cudnnStatus_t CUDNNWINAPI
900
+ cudnnOpsVersionCheck(void);
1083
901
 
1084
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1085
- cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc);
902
+ /* Function to perform backward softmax */
903
+ cudnnStatus_t CUDNNWINAPI
904
+ cudnnSoftmaxBackward(cudnnHandle_t handle,
905
+ cudnnSoftmaxAlgorithm_t algo,
906
+ cudnnSoftmaxMode_t mode,
907
+ const void *alpha,
908
+ const cudnnTensorDescriptor_t yDesc,
909
+ const void *y,
910
+ const cudnnTensorDescriptor_t dyDesc,
911
+ const void *dy,
912
+ const void *beta,
913
+ const cudnnTensorDescriptor_t dxDesc,
914
+ void *dx);
1086
915
 
916
+ /* Function to perform backward pooling */
1087
917
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1088
- cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
918
+ cudnnPoolingBackward(cudnnHandle_t handle,
919
+ const cudnnPoolingDescriptor_t poolingDesc,
920
+ const void *alpha,
921
+ const cudnnTensorDescriptor_t yDesc,
922
+ const void *y,
923
+ const cudnnTensorDescriptor_t dyDesc,
924
+ const void *dy,
925
+ const cudnnTensorDescriptor_t xDesc,
926
+ const void *x,
927
+ const void *beta,
928
+ const cudnnTensorDescriptor_t dxDesc,
929
+ void *dx);
1089
930
 
931
+ /* Function to perform backward activation */
1090
932
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1091
- cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm);
933
+ cudnnActivationBackward(cudnnHandle_t handle,
934
+ cudnnActivationDescriptor_t activationDesc,
935
+ const void *alpha,
936
+ const cudnnTensorDescriptor_t yDesc,
937
+ const void *y,
938
+ const cudnnTensorDescriptor_t dyDesc,
939
+ const void *dy,
940
+ const cudnnTensorDescriptor_t xDesc,
941
+ const void *x,
942
+ const void *beta,
943
+ const cudnnTensorDescriptor_t dxDesc,
944
+ void *dx);
945
+
946
+ /* LRN cross-channel backward computation. Double parameters cast to tensor data type */
947
+ cudnnStatus_t CUDNNWINAPI
948
+ cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
949
+ cudnnLRNDescriptor_t normDesc,
950
+ cudnnLRNMode_t lrnMode,
951
+ const void *alpha,
952
+ const cudnnTensorDescriptor_t yDesc,
953
+ const void *y,
954
+ const cudnnTensorDescriptor_t dyDesc,
955
+ const void *dy,
956
+ const cudnnTensorDescriptor_t xDesc,
957
+ const void *x,
958
+ const void *beta,
959
+ const cudnnTensorDescriptor_t dxDesc,
960
+ void *dx);
961
+
962
+ cudnnStatus_t CUDNNWINAPI
963
+ cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
964
+ cudnnLRNDescriptor_t normDesc,
965
+ cudnnDivNormMode_t mode,
966
+ const void *alpha,
967
+ const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
968
+ const void *x,
969
+ const void *means, /* if NULL, means are assumed to be zero */
970
+ const void *dy,
971
+ void *temp,
972
+ void *temp2,
973
+ const void *beta,
974
+ const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
975
+ void *dx, /* output x differential */
976
+ void *dMeans); /* output means differential, can be NULL */
1092
977
 
1093
978
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1094
- cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
979
+ cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
980
+ cudnnBatchNormMode_t mode,
981
+ cudnnBatchNormOps_t bnOps,
982
+ const cudnnTensorDescriptor_t xDesc,
983
+ const cudnnTensorDescriptor_t zDesc,
984
+ const cudnnTensorDescriptor_t yDesc,
985
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
986
+ const cudnnActivationDescriptor_t activationDesc,
987
+ size_t *sizeInBytes);
1095
988
 
1096
989
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1097
- cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
990
+ cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
991
+ cudnnBatchNormMode_t mode,
992
+ cudnnBatchNormOps_t bnOps,
993
+ const cudnnTensorDescriptor_t xDesc,
994
+ const cudnnTensorDescriptor_t yDesc,
995
+ const cudnnTensorDescriptor_t dyDesc,
996
+ const cudnnTensorDescriptor_t dzDesc,
997
+ const cudnnTensorDescriptor_t dxDesc,
998
+ const cudnnTensorDescriptor_t dBnScaleBiasDesc,
999
+ const cudnnActivationDescriptor_t activationDesc,
1000
+ size_t *sizeInBytes);
1098
1001
 
1099
1002
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1100
- cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate);
1101
-
1003
+ cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
1004
+ cudnnBatchNormMode_t mode,
1005
+ cudnnBatchNormOps_t bnOps,
1006
+ const cudnnActivationDescriptor_t activationDesc,
1007
+ const cudnnTensorDescriptor_t xDesc,
1008
+ size_t *sizeInBytes);
1009
+
1010
+ /* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
1102
1011
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1103
- cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
1104
- cudnnAlgorithmDescriptor_t algoDesc,
1105
- cudnnStatus_t status,
1106
- float time,
1107
- size_t memory);
1108
-
1012
+ cudnnBatchNormalizationForwardTraining(
1013
+ cudnnHandle_t handle,
1014
+ cudnnBatchNormMode_t mode,
1015
+
1016
+ const void *alpha, /* alpha[0] = result blend factor */
1017
+ const void *beta, /* beta[0] = dest layer blend factor */
1018
+
1019
+ const cudnnTensorDescriptor_t xDesc,
1020
+ const void *x, /* NxCxHxW */
1021
+ const cudnnTensorDescriptor_t yDesc,
1022
+ void *y, /* NxCxHxW */
1023
+
1024
+ /* Shared desc for the next 6 tensors in the argument list.
1025
+ Data type to be set as follows:
1026
+ type = (typeOf(x) == double) ? double : float
1027
+ Dimensions for this descriptor depend on normalization mode
1028
+ - Spatial Normalization : tensors are expected to have dims 1xCx1x1
1029
+ (normalization is performed across NxHxW)
1030
+ - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
1031
+ (normalization is performed across N) */
1032
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
1033
+
1034
+ /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
1035
+ const void *bnScale,
1036
+ const void *bnBias,
1037
+
1038
+ /* MUST use factor=1 in the very first call of a complete training cycle.
1039
+ Use a factor=1/(1+n) at N-th call to the function to get
1040
+ Cumulative Moving Average (CMA) behavior
1041
+ CMA[n] = (x[1]+...+x[n])/n
1042
+ Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
1043
+ ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
1044
+ CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
1045
+ double exponentialAverageFactor,
1046
+
1047
+ /* Used in Training phase only.
1048
+ runningMean = newMean*factor + runningMean*(1-factor) */
1049
+ void *resultRunningMean,
1050
+ /* Output in training mode, input in inference. Is the moving average
1051
+ of variance[x] (factor is applied in the same way as for runningMean) */
1052
+ void *resultRunningVariance,
1053
+
1054
+ /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
1055
+ double epsilon,
1056
+
1057
+ /* Optionally save intermediate results from the forward pass here
1058
+ - can be reused to speed up backward pass. NULL if unused */
1059
+ void *resultSaveMean,
1060
+ void *resultSaveInvVariance);
1061
+
1062
+ /* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
1109
1063
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1110
- cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
1111
- cudnnAlgorithmDescriptor_t *algoDesc,
1112
- cudnnStatus_t *status,
1113
- float *time,
1114
- size_t *memory);
1115
-
1064
+ cudnnBatchNormalizationForwardTrainingEx(
1065
+ cudnnHandle_t handle,
1066
+ cudnnBatchNormMode_t mode,
1067
+ cudnnBatchNormOps_t bnOps,
1068
+
1069
+ const void *alpha, /* alpha[0] = result blend factor */
1070
+ const void *beta, /* beta[0] = dest layer blend factor */
1071
+
1072
+ const cudnnTensorDescriptor_t xDesc,
1073
+ const void *xData,
1074
+ const cudnnTensorDescriptor_t zDesc,
1075
+ const void *zData,
1076
+ const cudnnTensorDescriptor_t yDesc,
1077
+ void *yData,
1078
+
1079
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
1080
+ const void *bnScale,
1081
+ const void *bnBias,
1082
+
1083
+ double exponentialAverageFactor,
1084
+ void *resultRunningMean,
1085
+ void *resultRunningVariance,
1086
+
1087
+ /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
1088
+ double epsilon,
1089
+
1090
+ /* Optionally save intermediate results from the forward pass here
1091
+ - can be reused to speed up backward pass. NULL if unused */
1092
+ void *resultSaveMean,
1093
+ void *resultSaveInvVariance,
1094
+
1095
+ cudnnActivationDescriptor_t activationDesc,
1096
+ void *workspace,
1097
+ size_t workSpaceSizeInBytes,
1098
+ void *reserveSpace,
1099
+ size_t reserveSpaceSizeInBytes);
1100
+
1101
+ /* Performs backward pass of Batch Normalization layer. Returns x gradient,
1102
+ * bnScale gradient and bnBias gradient */
1116
1103
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1117
- cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy);
1104
+ cudnnBatchNormalizationBackward(cudnnHandle_t handle,
1105
+ cudnnBatchNormMode_t mode,
1106
+ const void *alphaDataDiff,
1107
+ const void *betaDataDiff,
1108
+ const void *alphaParamDiff,
1109
+ const void *betaParamDiff,
1110
+ const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
1111
+ const void *x,
1112
+ const cudnnTensorDescriptor_t dyDesc,
1113
+ const void *dy,
1114
+ const cudnnTensorDescriptor_t dxDesc,
1115
+ void *dx,
1116
+ /* Shared tensor desc for the 4 tensors below */
1117
+ const cudnnTensorDescriptor_t dBnScaleBiasDesc,
1118
+ const void *bnScale, /* bnBias doesn't affect backpropagation */
1119
+ /* scale and bias diff are not backpropagated below this layer */
1120
+ void *dBnScaleResult,
1121
+ void *dBnBiasResult,
1122
+ /* Same epsilon as forward pass */
1123
+ double epsilon,
1124
+
1125
+ /* Optionally cached intermediate results from
1126
+ forward pass */
1127
+ const void *savedMean,
1128
+ const void *savedInvVariance);
1118
1129
 
1119
1130
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1120
- cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes);
1131
+ cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
1132
+ cudnnBatchNormMode_t mode,
1133
+ cudnnBatchNormOps_t bnOps,
1134
+
1135
+ const void *alphaDataDiff,
1136
+ const void *betaDataDiff,
1137
+ const void *alphaParamDiff,
1138
+ const void *betaParamDiff,
1139
+ const cudnnTensorDescriptor_t xDesc,
1140
+ const void *xData,
1141
+ const cudnnTensorDescriptor_t yDesc,
1142
+ const void *yData,
1143
+ const cudnnTensorDescriptor_t dyDesc,
1144
+ const void *dyData,
1145
+ const cudnnTensorDescriptor_t dzDesc,
1146
+ void *dzData,
1147
+ const cudnnTensorDescriptor_t dxDesc,
1148
+ void *dxData,
1149
+
1150
+ /* Shared tensor desc for the 4 tensors below */
1151
+ const cudnnTensorDescriptor_t dBnScaleBiasDesc,
1152
+ const void *bnScaleData,
1153
+ const void *bnBiasData, /* needed if there is activation */
1154
+ void *dBnScaleData,
1155
+ void *dBnBiasData,
1156
+ double epsilon, /* Same epsilon as forward pass */
1157
+
1158
+ /* Optionally cached intermediate results from
1159
+ forward pass */
1160
+ const void *savedMean,
1161
+ const void *savedInvVariance,
1162
+ cudnnActivationDescriptor_t activationDesc,
1163
+ void *workSpace,
1164
+ size_t workSpaceSizeInBytes,
1165
+ void *reserveSpace,
1166
+ size_t reserveSpaceSizeInBytes);
1121
1167
 
1122
1168
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1123
- cudnnSaveAlgorithm(cudnnHandle_t handle,
1124
- cudnnAlgorithmDescriptor_t algoDesc,
1125
- void *algoSpace,
1126
- size_t algoSpaceSizeInBytes);
1169
+ cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle,
1170
+ cudnnNormMode_t mode,
1171
+ cudnnNormOps_t normOps,
1172
+ cudnnNormAlgo_t algo,
1173
+ const cudnnTensorDescriptor_t xDesc,
1174
+ const cudnnTensorDescriptor_t zDesc,
1175
+ const cudnnTensorDescriptor_t yDesc,
1176
+ const cudnnTensorDescriptor_t normScaleBiasDesc,
1177
+ const cudnnActivationDescriptor_t activationDesc,
1178
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1179
+ size_t *sizeInBytes,
1180
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1127
1181
 
1128
1182
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1129
- cudnnRestoreAlgorithm(cudnnHandle_t handle,
1130
- void *algoSpace,
1131
- size_t algoSpaceSizeInBytes,
1132
- cudnnAlgorithmDescriptor_t algoDesc);
1133
-
1134
- typedef enum {
1135
- CUDNN_SEV_FATAL = 0,
1136
- CUDNN_SEV_ERROR = 1,
1137
- CUDNN_SEV_WARNING = 2,
1138
- CUDNN_SEV_INFO = 3,
1139
- } cudnnSeverity_t;
1140
-
1141
- /* Message masks to be used with cudnnSetCallback() */
1142
- #define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
1143
- #define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
1144
- #define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
1145
-
1146
- /* struct containing useful informaiton for each API call */
1147
- typedef struct cudnnDebugStruct {
1148
- unsigned cudnn_version;
1149
- cudnnStatus_t cudnnStatus;
1150
- unsigned time_sec; /* epoch time in seconds */
1151
- unsigned time_usec; /* microseconds part of epoch time */
1152
- unsigned time_delta; /* time since start in seconds */
1153
- cudnnHandle_t handle; /* cudnn handle */
1154
- cudaStream_t stream; /* cuda stream ID */
1155
- unsigned long long pid; /* process ID */
1156
- unsigned long long tid; /* thread ID */
1157
- int cudaDeviceId; /* CUDA device ID */
1158
- int reserved[15]; /* reserved for future use */
1159
- } cudnnDebug_t;
1160
-
1161
- typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
1183
+ cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle,
1184
+ cudnnNormMode_t mode,
1185
+ cudnnNormOps_t normOps,
1186
+ cudnnNormAlgo_t algo,
1187
+ const cudnnTensorDescriptor_t xDesc,
1188
+ const cudnnTensorDescriptor_t yDesc,
1189
+ const cudnnTensorDescriptor_t dyDesc,
1190
+ const cudnnTensorDescriptor_t dzDesc,
1191
+ const cudnnTensorDescriptor_t dxDesc,
1192
+ const cudnnTensorDescriptor_t dNormScaleBiasDesc,
1193
+ const cudnnActivationDescriptor_t activationDesc,
1194
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1195
+ size_t *sizeInBytes,
1196
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1162
1197
 
1163
- cudnnStatus_t CUDNNWINAPI
1164
- cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
1165
-
1166
- cudnnStatus_t CUDNNWINAPI
1167
- cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
1198
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1199
+ cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle,
1200
+ cudnnNormMode_t mode,
1201
+ cudnnNormOps_t normOps,
1202
+ cudnnNormAlgo_t algo,
1203
+ const cudnnActivationDescriptor_t activationDesc,
1204
+ const cudnnTensorDescriptor_t xDesc,
1205
+ size_t *sizeInBytes,
1206
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1207
+
1208
+ /* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */
1209
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1210
+ cudnnNormalizationForwardTraining(cudnnHandle_t handle,
1211
+ cudnnNormMode_t mode,
1212
+ cudnnNormOps_t normOps,
1213
+ cudnnNormAlgo_t algo,
1214
+ const void *alpha, /* alpha[0] = result blend factor */
1215
+ const void *beta, /* beta[0] = dest layer blend factor */
1216
+ const cudnnTensorDescriptor_t xDesc,
1217
+ const void *xData,
1218
+ const cudnnTensorDescriptor_t normScaleBiasDesc,
1219
+ const void *normScale,
1220
+ const void *normBias,
1221
+ double exponentialAverageFactor,
1222
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1223
+ void *resultRunningMean,
1224
+ void *resultRunningVariance,
1225
+ /* Has to be >= 0. Should be the same in forward and backward functions. */
1226
+ double epsilon,
1227
+ /* Optionally save intermediate results from the forward pass here
1228
+ - can be reused to speed up backward pass. NULL if unused */
1229
+ void *resultSaveMean,
1230
+ void *resultSaveInvVariance,
1231
+ cudnnActivationDescriptor_t activationDesc,
1232
+ const cudnnTensorDescriptor_t zDesc,
1233
+ const void *zData,
1234
+ const cudnnTensorDescriptor_t yDesc,
1235
+ void *yData,
1236
+ void *workspace,
1237
+ size_t workSpaceSizeInBytes,
1238
+ void *reserveSpace,
1239
+ size_t reserveSpaceSizeInBytes,
1240
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1168
1241
 
1169
- /*
1170
- * \brief Cross-library version checker.
1171
- * This function is implemented differently in each sub-library. Each sublib
1172
- * checks whether its own version matches that of its dependencies.
1173
- * \returns CUDNN_STATUS_SUCCESS if the version check passes,
1174
- * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
1175
- */
1176
- cudnnStatus_t CUDNNWINAPI
1177
- cudnnOpsInferVersionCheck(void);
1242
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1243
+ cudnnNormalizationBackward(cudnnHandle_t handle,
1244
+ cudnnNormMode_t mode,
1245
+ cudnnNormOps_t normOps,
1246
+ cudnnNormAlgo_t algo,
1247
+ const void *alphaDataDiff,
1248
+ const void *betaDataDiff,
1249
+ const void *alphaParamDiff,
1250
+ const void *betaParamDiff,
1251
+ const cudnnTensorDescriptor_t xDesc,
1252
+ const void *xData,
1253
+ const cudnnTensorDescriptor_t yDesc,
1254
+ const void *yData,
1255
+ const cudnnTensorDescriptor_t dyDesc,
1256
+ const void *dyData,
1257
+ const cudnnTensorDescriptor_t dzDesc,
1258
+ void *dzData,
1259
+ const cudnnTensorDescriptor_t dxDesc,
1260
+ void *dxData,
1261
+ /* Shared tensor desc for the 4 tensors below */
1262
+ const cudnnTensorDescriptor_t dNormScaleBiasDesc,
1263
+ const void *normScaleData,
1264
+ const void *normBiasData, /* needed if there is activation */
1265
+ void *dNormScaleData,
1266
+ void *dNormBiasData,
1267
+ double epsilon, /* Same epsilon as forward pass */
1268
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1269
+ /* Optionally cached intermediate results from
1270
+ forward pass */
1271
+ const void *savedMean,
1272
+ const void *savedInvVariance,
1273
+ cudnnActivationDescriptor_t activationDesc,
1274
+ void *workSpace,
1275
+ size_t workSpaceSizeInBytes,
1276
+ void *reserveSpace,
1277
+ size_t reserveSpaceSizeInBytes,
1278
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1279
+
1280
+ cudnnStatus_t CUDNNWINAPI
1281
+ cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
1282
+ const cudnnSpatialTransformerDescriptor_t stDesc,
1283
+ const void *dgrid,
1284
+ void *dtheta);
1285
+
1286
+ cudnnStatus_t CUDNNWINAPI
1287
+ cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
1288
+ cudnnSpatialTransformerDescriptor_t stDesc,
1289
+ const void *alpha,
1290
+ const cudnnTensorDescriptor_t xDesc,
1291
+ const void *x,
1292
+ const void *beta,
1293
+ const cudnnTensorDescriptor_t dxDesc,
1294
+ void *dx,
1295
+ const void *alphaDgrid,
1296
+ const cudnnTensorDescriptor_t dyDesc,
1297
+ const void *dy,
1298
+ const void *grid,
1299
+ const void *betaDgrid,
1300
+ void *dgrid);
1301
+
1302
+ cudnnStatus_t CUDNNWINAPI
1303
+ cudnnDropoutBackward(cudnnHandle_t handle,
1304
+ const cudnnDropoutDescriptor_t dropoutDesc,
1305
+ const cudnnTensorDescriptor_t dydesc,
1306
+ const void *dy,
1307
+ const cudnnTensorDescriptor_t dxdesc,
1308
+ void *dx,
1309
+ void *reserveSpace,
1310
+ size_t reserveSpaceSizeInBytes);
1178
1311
 
1179
1312
  #if defined(__cplusplus)
1180
1313
  }
1181
1314
  #endif
1182
1315
 
1183
- #endif /* CUDNN_OPS_INFER_H_ */
1316
+ #endif /* CUDNN_OPS_H_ */