nvidia-cudnn-cu12 8.9.7.29__py3-none-win_amd64.whl → 9.0.0.312__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nvidia/cudnn/bin/cudnn64_9.dll +0 -0
- nvidia/cudnn/bin/{cudnn_adv_infer64_8.dll → cudnn_adv64_9.dll} +0 -0
- nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
- nvidia/cudnn/bin/{cudnn_cnn_infer64_8.dll → cudnn_engines_precompiled64_9.dll} +0 -0
- nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
- nvidia/cudnn/bin/{cudnn_ops_infer64_8.dll → cudnn_heuristic64_9.dll} +0 -0
- nvidia/cudnn/bin/{cudnn_adv_train64_8.dll → cudnn_ops64_9.dll} +0 -0
- nvidia/cudnn/include/cudnn.h +8 -18
- nvidia/cudnn/include/{cudnn_adv_infer.h → cudnn_adv.h} +265 -252
- nvidia/cudnn/include/cudnn_backend.h +3 -558
- nvidia/cudnn/include/{cudnn_cnn_infer.h → cudnn_cnn.h} +187 -65
- nvidia/cudnn/include/cudnn_graph.h +908 -0
- nvidia/cudnn/include/{cudnn_ops_infer.h → cudnn_ops.h} +469 -336
- nvidia/cudnn/include/cudnn_version.h +4 -43
- nvidia/cudnn/lib/x64/cudnn.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_adv.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_adv64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_cnn.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_cnn64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_engines_precompiled.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_engines_precompiled64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_graph.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_graph64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_heuristic.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_heuristic64_9.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_ops.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_ops64_9.lib +0 -0
- {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/METADATA +1 -1
- nvidia_cudnn_cu12-9.0.0.312.dist-info/RECORD +41 -0
- nvidia/cudnn/bin/cudnn64_8.dll +0 -0
- nvidia/cudnn/bin/cudnn_cnn_train64_8.dll +0 -0
- nvidia/cudnn/bin/cudnn_ops_train64_8.dll +0 -0
- nvidia/cudnn/include/cudnn_adv_train.h +0 -540
- nvidia/cudnn/include/cudnn_cnn_train.h +0 -219
- nvidia/cudnn/include/cudnn_ops_train.h +0 -501
- nvidia/cudnn/lib/x64/cudnn64_8.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_adv_infer.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_adv_infer64_8.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_adv_train.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_adv_train64_8.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_cnn_infer.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_cnn_infer64_8.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_cnn_train.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_cnn_train64_8.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_ops_infer.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_ops_infer64_8.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_ops_train.lib +0 -0
- nvidia/cudnn/lib/x64/cudnn_ops_train64_8.lib +0 -0
- nvidia_cudnn_cu12-8.9.7.29.dist-info/RECORD +0 -40
- {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/License.txt +0 -0
- {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/WHEEL +0 -0
- {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/top_level.txt +0 -0
|
@@ -48,172 +48,41 @@
|
|
|
48
48
|
*/
|
|
49
49
|
|
|
50
50
|
/*
|
|
51
|
-
*
|
|
51
|
+
* cudnn_ops : cuDNN's basic definitions and basic operations.
|
|
52
52
|
*/
|
|
53
53
|
|
|
54
|
-
#if !defined(
|
|
55
|
-
#define
|
|
54
|
+
#if !defined(CUDNN_OPS_H_)
|
|
55
|
+
#define CUDNN_OPS_H_
|
|
56
56
|
|
|
57
|
-
#include <cuda_runtime.h>
|
|
58
57
|
#include <stdint.h>
|
|
59
58
|
|
|
60
59
|
#include "cudnn_version.h"
|
|
60
|
+
#include "cudnn_graph.h"
|
|
61
61
|
|
|
62
62
|
/* These version numbers are autogenerated, do not edit manually. */
|
|
63
|
-
#define
|
|
64
|
-
#define
|
|
65
|
-
#define
|
|
63
|
+
#define CUDNN_OPS_MAJOR 9
|
|
64
|
+
#define CUDNN_OPS_MINOR 0
|
|
65
|
+
#define CUDNN_OPS_PATCH 0
|
|
66
66
|
|
|
67
|
-
#if (
|
|
68
|
-
(CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL)
|
|
67
|
+
#if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
|
|
69
68
|
#error Version mismatch in cuDNN OPS INFER!!!
|
|
70
69
|
#endif
|
|
71
70
|
|
|
72
|
-
#ifndef CUDNNWINAPI
|
|
73
|
-
#ifdef _WIN32
|
|
74
|
-
#define CUDNNWINAPI __stdcall
|
|
75
|
-
#else
|
|
76
|
-
#define CUDNNWINAPI
|
|
77
|
-
#endif
|
|
78
|
-
#endif
|
|
79
|
-
|
|
80
|
-
/* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
|
|
81
|
-
#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
|
|
82
|
-
/* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
|
|
83
|
-
#define CUDNN_DEPRECATED __attribute__((deprecated))
|
|
84
|
-
#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
|
|
85
|
-
/* Microsoft Visual C++ */
|
|
86
|
-
#define CUDNN_DEPRECATED __declspec(deprecated)
|
|
87
|
-
#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
|
|
88
|
-
/* C++14 compilers */
|
|
89
|
-
#define CUDNN_DEPRECATED [[deprecated]]
|
|
90
|
-
#else
|
|
91
|
-
/* No support for the deprecated attribute */
|
|
92
|
-
#define CUDNN_DEPRECATED
|
|
93
|
-
#endif
|
|
94
|
-
|
|
95
71
|
#if defined(__cplusplus)
|
|
96
72
|
extern "C" {
|
|
97
73
|
#endif
|
|
98
74
|
|
|
99
|
-
struct cudnnContext;
|
|
100
|
-
typedef struct cudnnContext *cudnnHandle_t;
|
|
101
|
-
|
|
102
|
-
size_t CUDNNWINAPI
|
|
103
|
-
cudnnGetVersion(void);
|
|
104
|
-
|
|
105
|
-
size_t CUDNNWINAPI
|
|
106
|
-
cudnnGetMaxDeviceVersion(void);
|
|
107
|
-
|
|
108
|
-
/* Returns CUDA Runtime version statically linked against cudnn */
|
|
109
|
-
size_t CUDNNWINAPI
|
|
110
|
-
cudnnGetCudartVersion(void);
|
|
111
|
-
|
|
112
|
-
/*
|
|
113
|
-
* CUDNN return codes
|
|
114
|
-
*/
|
|
115
|
-
typedef enum {
|
|
116
|
-
CUDNN_STATUS_SUCCESS = 0,
|
|
117
|
-
CUDNN_STATUS_NOT_INITIALIZED = 1,
|
|
118
|
-
CUDNN_STATUS_ALLOC_FAILED = 2,
|
|
119
|
-
CUDNN_STATUS_BAD_PARAM = 3,
|
|
120
|
-
CUDNN_STATUS_INTERNAL_ERROR = 4,
|
|
121
|
-
CUDNN_STATUS_INVALID_VALUE = 5,
|
|
122
|
-
CUDNN_STATUS_ARCH_MISMATCH = 6,
|
|
123
|
-
CUDNN_STATUS_MAPPING_ERROR = 7,
|
|
124
|
-
CUDNN_STATUS_EXECUTION_FAILED = 8,
|
|
125
|
-
CUDNN_STATUS_NOT_SUPPORTED = 9,
|
|
126
|
-
CUDNN_STATUS_LICENSE_ERROR = 10,
|
|
127
|
-
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
|
|
128
|
-
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
|
|
129
|
-
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
|
|
130
|
-
CUDNN_STATUS_VERSION_MISMATCH = 14,
|
|
131
|
-
} cudnnStatus_t;
|
|
132
|
-
|
|
133
|
-
/* human-readable error messages */
|
|
134
|
-
const char *CUDNNWINAPI
|
|
135
|
-
cudnnGetErrorString(cudnnStatus_t status);
|
|
136
|
-
|
|
137
|
-
/* Forward definition in this version only */
|
|
138
|
-
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
|
|
139
|
-
|
|
140
|
-
typedef enum {
|
|
141
|
-
CUDNN_ERRQUERY_RAWCODE = 0,
|
|
142
|
-
CUDNN_ERRQUERY_NONBLOCKING = 1,
|
|
143
|
-
CUDNN_ERRQUERY_BLOCKING = 2,
|
|
144
|
-
} cudnnErrQueryMode_t;
|
|
145
|
-
|
|
146
|
-
cudnnStatus_t CUDNNWINAPI
|
|
147
|
-
cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
|
|
148
|
-
|
|
149
|
-
#ifndef __LIBRARY_TYPES_H__
|
|
150
|
-
|
|
151
|
-
typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType;
|
|
152
|
-
|
|
153
|
-
#endif
|
|
154
|
-
|
|
155
|
-
cudnnStatus_t CUDNNWINAPI
|
|
156
|
-
cudnnGetProperty(libraryPropertyType type, int *value);
|
|
157
|
-
|
|
158
|
-
cudnnStatus_t CUDNNWINAPI
|
|
159
|
-
cudnnCreate(cudnnHandle_t *handle);
|
|
160
|
-
cudnnStatus_t CUDNNWINAPI
|
|
161
|
-
cudnnDestroy(cudnnHandle_t handle);
|
|
162
|
-
cudnnStatus_t CUDNNWINAPI
|
|
163
|
-
cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
|
164
|
-
cudnnStatus_t CUDNNWINAPI
|
|
165
|
-
cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
|
|
166
|
-
|
|
167
75
|
/* Data structures to represent Image/Filter and the Neural Network Layer */
|
|
168
76
|
typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
|
|
169
|
-
typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t;
|
|
170
|
-
typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t;
|
|
77
|
+
typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED;
|
|
78
|
+
typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED;
|
|
171
79
|
typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
|
|
172
|
-
typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t;
|
|
80
|
+
typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED;
|
|
173
81
|
typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
|
|
174
|
-
typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t;
|
|
175
|
-
typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t;
|
|
82
|
+
typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED;
|
|
83
|
+
typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED;
|
|
176
84
|
typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
|
|
177
|
-
typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t;
|
|
178
|
-
/*
|
|
179
|
-
* CUDNN data type
|
|
180
|
-
*/
|
|
181
|
-
typedef enum {
|
|
182
|
-
CUDNN_DATA_FLOAT = 0,
|
|
183
|
-
CUDNN_DATA_DOUBLE = 1,
|
|
184
|
-
CUDNN_DATA_HALF = 2,
|
|
185
|
-
CUDNN_DATA_INT8 = 3,
|
|
186
|
-
CUDNN_DATA_INT32 = 4,
|
|
187
|
-
CUDNN_DATA_INT8x4 = 5,
|
|
188
|
-
CUDNN_DATA_UINT8 = 6,
|
|
189
|
-
CUDNN_DATA_UINT8x4 = 7,
|
|
190
|
-
CUDNN_DATA_INT8x32 = 8,
|
|
191
|
-
CUDNN_DATA_BFLOAT16 = 9,
|
|
192
|
-
CUDNN_DATA_INT64 = 10,
|
|
193
|
-
CUDNN_DATA_BOOLEAN = 11,
|
|
194
|
-
CUDNN_DATA_FP8_E4M3 = 12,
|
|
195
|
-
CUDNN_DATA_FP8_E5M2 = 13,
|
|
196
|
-
CUDNN_DATA_FAST_FLOAT_FOR_FP8 = 14,
|
|
197
|
-
} cudnnDataType_t;
|
|
198
|
-
|
|
199
|
-
/*
|
|
200
|
-
* CUDNN math type
|
|
201
|
-
*/
|
|
202
|
-
typedef enum {
|
|
203
|
-
CUDNN_DEFAULT_MATH = 0,
|
|
204
|
-
CUDNN_TENSOR_OP_MATH = 1,
|
|
205
|
-
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
|
|
206
|
-
CUDNN_FMA_MATH = 3,
|
|
207
|
-
} cudnnMathType_t;
|
|
208
|
-
|
|
209
|
-
/*
|
|
210
|
-
* CUDNN propagate Nan
|
|
211
|
-
*/
|
|
212
|
-
typedef enum {
|
|
213
|
-
CUDNN_NOT_PROPAGATE_NAN = 0,
|
|
214
|
-
CUDNN_PROPAGATE_NAN = 1,
|
|
215
|
-
} cudnnNanPropagation_t;
|
|
216
|
-
|
|
85
|
+
typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED;
|
|
217
86
|
/*
|
|
218
87
|
* CUDNN Determinism
|
|
219
88
|
*/
|
|
@@ -222,19 +91,10 @@ typedef enum {
|
|
|
222
91
|
CUDNN_DETERMINISTIC = 1,
|
|
223
92
|
} cudnnDeterminism_t;
|
|
224
93
|
|
|
225
|
-
/* Maximum supported number of tensor dimensions */
|
|
226
|
-
#define CUDNN_DIM_MAX 8
|
|
227
|
-
|
|
228
94
|
/* Create an instance of a generic Tensor descriptor */
|
|
229
95
|
cudnnStatus_t CUDNNWINAPI
|
|
230
96
|
cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
|
231
97
|
|
|
232
|
-
typedef enum {
|
|
233
|
-
CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
|
|
234
|
-
CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
|
|
235
|
-
CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
|
|
236
|
-
} cudnnTensorFormat_t;
|
|
237
|
-
|
|
238
98
|
cudnnStatus_t CUDNNWINAPI
|
|
239
99
|
cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
|
240
100
|
cudnnTensorFormat_t format,
|
|
@@ -327,18 +187,18 @@ typedef enum {
|
|
|
327
187
|
} cudnnFoldingDirection_t;
|
|
328
188
|
|
|
329
189
|
/** Create a destination descriptor for cudnnTransformTensor */
|
|
330
|
-
cudnnStatus_t CUDNNWINAPI
|
|
190
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
331
191
|
cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
|
|
332
192
|
const cudnnTensorDescriptor_t srcDesc,
|
|
333
193
|
cudnnTensorDescriptor_t destDesc,
|
|
334
194
|
size_t *destSizeInBytes);
|
|
335
195
|
|
|
336
196
|
/** Create an empty tensor transform descriptor */
|
|
337
|
-
cudnnStatus_t CUDNNWINAPI
|
|
197
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
338
198
|
cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
|
|
339
199
|
|
|
340
200
|
/** Initialize a previously created tensor transform descriptor. */
|
|
341
|
-
cudnnStatus_t CUDNNWINAPI
|
|
201
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
342
202
|
cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
|
343
203
|
const uint32_t nbDims,
|
|
344
204
|
const cudnnTensorFormat_t destFormat,
|
|
@@ -351,7 +211,7 @@ cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc
|
|
|
351
211
|
* Retrieves the values stored in a previously initialized tensor transform
|
|
352
212
|
* descriptor.
|
|
353
213
|
*/
|
|
354
|
-
cudnnStatus_t CUDNNWINAPI
|
|
214
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
355
215
|
cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
|
356
216
|
uint32_t nbDimsRequested,
|
|
357
217
|
cudnnTensorFormat_t *destFormat,
|
|
@@ -363,11 +223,11 @@ cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc
|
|
|
363
223
|
/**
|
|
364
224
|
* Destroys a previously created tensor transform descriptor.
|
|
365
225
|
*/
|
|
366
|
-
cudnnStatus_t CUDNNWINAPI
|
|
226
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
367
227
|
cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
|
|
368
228
|
|
|
369
229
|
/* Tensor layout conversion helper (y = alpha * x + beta * y) */
|
|
370
|
-
cudnnStatus_t CUDNNWINAPI
|
|
230
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
371
231
|
cudnnTransformTensor(cudnnHandle_t handle,
|
|
372
232
|
const void *alpha,
|
|
373
233
|
const cudnnTensorDescriptor_t xDesc,
|
|
@@ -376,7 +236,7 @@ cudnnTransformTensor(cudnnHandle_t handle,
|
|
|
376
236
|
const cudnnTensorDescriptor_t yDesc,
|
|
377
237
|
void *y);
|
|
378
238
|
|
|
379
|
-
cudnnStatus_t CUDNNWINAPI
|
|
239
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
380
240
|
cudnnTransformTensorEx(cudnnHandle_t handle,
|
|
381
241
|
const cudnnTensorTransformDescriptor_t transDesc,
|
|
382
242
|
const void *alpha,
|
|
@@ -387,7 +247,7 @@ cudnnTransformTensorEx(cudnnHandle_t handle,
|
|
|
387
247
|
void *destData);
|
|
388
248
|
|
|
389
249
|
/* Tensor Bias addition : C = alpha * A + beta * C */
|
|
390
|
-
cudnnStatus_t CUDNNWINAPI
|
|
250
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
391
251
|
cudnnAddTensor(cudnnHandle_t handle,
|
|
392
252
|
const void *alpha,
|
|
393
253
|
const cudnnTensorDescriptor_t aDesc,
|
|
@@ -408,27 +268,27 @@ typedef enum {
|
|
|
408
268
|
CUDNN_OP_TENSOR_NOT = 5,
|
|
409
269
|
} cudnnOpTensorOp_t;
|
|
410
270
|
|
|
411
|
-
cudnnStatus_t CUDNNWINAPI
|
|
271
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
412
272
|
cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
|
|
413
273
|
|
|
414
|
-
cudnnStatus_t CUDNNWINAPI
|
|
274
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
415
275
|
cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
|
|
416
276
|
cudnnOpTensorOp_t opTensorOp,
|
|
417
277
|
cudnnDataType_t opTensorCompType,
|
|
418
278
|
cudnnNanPropagation_t opTensorNanOpt);
|
|
419
279
|
|
|
420
|
-
cudnnStatus_t CUDNNWINAPI
|
|
280
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
421
281
|
cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
|
|
422
282
|
cudnnOpTensorOp_t *opTensorOp,
|
|
423
283
|
cudnnDataType_t *opTensorCompType,
|
|
424
284
|
cudnnNanPropagation_t *opTensorNanOpt);
|
|
425
285
|
|
|
426
|
-
cudnnStatus_t CUDNNWINAPI
|
|
286
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
427
287
|
cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
|
|
428
288
|
|
|
429
289
|
/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
|
|
430
290
|
/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
|
|
431
|
-
cudnnStatus_t CUDNNWINAPI
|
|
291
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
432
292
|
cudnnOpTensor(cudnnHandle_t handle,
|
|
433
293
|
const cudnnOpTensorDescriptor_t opTensorDesc,
|
|
434
294
|
const void *alpha1,
|
|
@@ -441,28 +301,13 @@ cudnnOpTensor(cudnnHandle_t handle,
|
|
|
441
301
|
const cudnnTensorDescriptor_t cDesc,
|
|
442
302
|
void *C);
|
|
443
303
|
|
|
444
|
-
/*
|
|
445
|
-
* CUDNN ReduceTensor op type
|
|
446
|
-
*/
|
|
447
|
-
typedef enum {
|
|
448
|
-
CUDNN_REDUCE_TENSOR_ADD = 0,
|
|
449
|
-
CUDNN_REDUCE_TENSOR_MUL = 1,
|
|
450
|
-
CUDNN_REDUCE_TENSOR_MIN = 2,
|
|
451
|
-
CUDNN_REDUCE_TENSOR_MAX = 3,
|
|
452
|
-
CUDNN_REDUCE_TENSOR_AMAX = 4,
|
|
453
|
-
CUDNN_REDUCE_TENSOR_AVG = 5,
|
|
454
|
-
CUDNN_REDUCE_TENSOR_NORM1 = 6,
|
|
455
|
-
CUDNN_REDUCE_TENSOR_NORM2 = 7,
|
|
456
|
-
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
|
|
457
|
-
} cudnnReduceTensorOp_t;
|
|
458
|
-
|
|
459
304
|
/*
|
|
460
305
|
* CUDNN ReduceTensor indices type
|
|
461
306
|
*/
|
|
462
307
|
typedef enum {
|
|
463
308
|
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
|
|
464
309
|
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
|
|
465
|
-
} cudnnReduceTensorIndices_t;
|
|
310
|
+
} cudnnReduceTensorIndices_t CUDNN_DEPRECATED;
|
|
466
311
|
|
|
467
312
|
/*
|
|
468
313
|
* CUDNN tensor indices type size (all unsigned)
|
|
@@ -473,12 +318,12 @@ typedef enum {
|
|
|
473
318
|
CUDNN_64BIT_INDICES = 1,
|
|
474
319
|
CUDNN_16BIT_INDICES = 2,
|
|
475
320
|
CUDNN_8BIT_INDICES = 3,
|
|
476
|
-
} cudnnIndicesType_t;
|
|
321
|
+
} cudnnIndicesType_t CUDNN_DEPRECATED;
|
|
477
322
|
|
|
478
|
-
cudnnStatus_t CUDNNWINAPI
|
|
323
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
479
324
|
cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
|
|
480
325
|
|
|
481
|
-
cudnnStatus_t CUDNNWINAPI
|
|
326
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
482
327
|
cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
483
328
|
cudnnReduceTensorOp_t reduceTensorOp,
|
|
484
329
|
cudnnDataType_t reduceTensorCompType,
|
|
@@ -486,7 +331,7 @@ cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
|
486
331
|
cudnnReduceTensorIndices_t reduceTensorIndices,
|
|
487
332
|
cudnnIndicesType_t reduceTensorIndicesType);
|
|
488
333
|
|
|
489
|
-
cudnnStatus_t CUDNNWINAPI
|
|
334
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
490
335
|
cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
491
336
|
cudnnReduceTensorOp_t *reduceTensorOp,
|
|
492
337
|
cudnnDataType_t *reduceTensorCompType,
|
|
@@ -494,12 +339,12 @@ cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorD
|
|
|
494
339
|
cudnnReduceTensorIndices_t *reduceTensorIndices,
|
|
495
340
|
cudnnIndicesType_t *reduceTensorIndicesType);
|
|
496
341
|
|
|
497
|
-
cudnnStatus_t CUDNNWINAPI
|
|
342
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
498
343
|
cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
|
|
499
344
|
|
|
500
345
|
/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
|
|
501
346
|
* output tensors */
|
|
502
|
-
cudnnStatus_t CUDNNWINAPI
|
|
347
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
503
348
|
cudnnGetReductionIndicesSize(cudnnHandle_t handle,
|
|
504
349
|
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
505
350
|
const cudnnTensorDescriptor_t aDesc,
|
|
@@ -508,7 +353,7 @@ cudnnGetReductionIndicesSize(cudnnHandle_t handle,
|
|
|
508
353
|
|
|
509
354
|
/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
|
|
510
355
|
* tensors */
|
|
511
|
-
cudnnStatus_t CUDNNWINAPI
|
|
356
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
512
357
|
cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
|
|
513
358
|
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
514
359
|
const cudnnTensorDescriptor_t aDesc,
|
|
@@ -518,7 +363,7 @@ cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
|
|
|
518
363
|
/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
|
|
519
364
|
/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
|
|
520
365
|
/* The indices space is ignored for reduce ops other than min or max. */
|
|
521
|
-
cudnnStatus_t CUDNNWINAPI
|
|
366
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
522
367
|
cudnnReduceTensor(cudnnHandle_t handle,
|
|
523
368
|
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
524
369
|
void *indices,
|
|
@@ -537,14 +382,14 @@ cudnnStatus_t CUDNNWINAPI
|
|
|
537
382
|
cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
|
|
538
383
|
|
|
539
384
|
/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
|
|
540
|
-
cudnnStatus_t CUDNNWINAPI
|
|
385
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
541
386
|
cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
|
|
542
387
|
|
|
543
388
|
/* Create an instance of FilterStruct */
|
|
544
|
-
cudnnStatus_t CUDNNWINAPI
|
|
389
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
545
390
|
cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
|
|
546
391
|
|
|
547
|
-
cudnnStatus_t CUDNNWINAPI
|
|
392
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
548
393
|
cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
|
|
549
394
|
cudnnDataType_t dataType, /* image data type */
|
|
550
395
|
cudnnTensorFormat_t format,
|
|
@@ -553,7 +398,7 @@ cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
|
|
|
553
398
|
int h, /* height of each input filter */
|
|
554
399
|
int w); /* width of each input filter */
|
|
555
400
|
|
|
556
|
-
cudnnStatus_t CUDNNWINAPI
|
|
401
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
557
402
|
cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
|
558
403
|
cudnnDataType_t *dataType, /* image data type */
|
|
559
404
|
cudnnTensorFormat_t *format,
|
|
@@ -562,24 +407,24 @@ cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
|
|
562
407
|
int *h, /* height of each input filter */
|
|
563
408
|
int *w); /* width of each input filter */
|
|
564
409
|
|
|
565
|
-
cudnnStatus_t CUDNNWINAPI
|
|
410
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
566
411
|
cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
|
|
567
412
|
cudnnDataType_t dataType, /* image data type */
|
|
568
413
|
cudnnTensorFormat_t format,
|
|
569
414
|
int nbDims,
|
|
570
415
|
const int filterDimA[]);
|
|
571
416
|
|
|
572
|
-
cudnnStatus_t CUDNNWINAPI
|
|
417
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
573
418
|
cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
|
574
419
|
int nbDimsRequested,
|
|
575
420
|
cudnnDataType_t *dataType, /* image data type */
|
|
576
421
|
cudnnTensorFormat_t *format,
|
|
577
422
|
int *nbDims,
|
|
578
423
|
int filterDimA[]);
|
|
579
|
-
cudnnStatus_t CUDNNWINAPI
|
|
424
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
580
425
|
cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
|
|
581
426
|
|
|
582
|
-
cudnnStatus_t CUDNNWINAPI
|
|
427
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
583
428
|
cudnnTransformFilter(cudnnHandle_t handle,
|
|
584
429
|
const cudnnTensorTransformDescriptor_t transDesc,
|
|
585
430
|
const void *alpha,
|
|
@@ -589,7 +434,7 @@ cudnnTransformFilter(cudnnHandle_t handle,
|
|
|
589
434
|
const cudnnFilterDescriptor_t destDesc,
|
|
590
435
|
void *destData);
|
|
591
436
|
|
|
592
|
-
cudnnStatus_t CUDNNWINAPI
|
|
437
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
593
438
|
cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
|
|
594
439
|
|
|
595
440
|
/*
|
|
@@ -628,13 +473,13 @@ typedef enum {
|
|
|
628
473
|
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
|
|
629
474
|
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
|
|
630
475
|
CUDNN_POOLING_MAX_DETERMINISTIC = 3
|
|
631
|
-
} cudnnPoolingMode_t;
|
|
476
|
+
} cudnnPoolingMode_t CUDNN_DEPRECATED;
|
|
632
477
|
|
|
633
478
|
/* Create an instance of pooling descriptor */
|
|
634
|
-
cudnnStatus_t CUDNNWINAPI
|
|
479
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
635
480
|
cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
|
|
636
481
|
|
|
637
|
-
cudnnStatus_t CUDNNWINAPI
|
|
482
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
638
483
|
cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
|
639
484
|
cudnnPoolingMode_t mode,
|
|
640
485
|
cudnnNanPropagation_t maxpoolingNanOpt,
|
|
@@ -645,7 +490,7 @@ cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
|
|
645
490
|
int verticalStride,
|
|
646
491
|
int horizontalStride);
|
|
647
492
|
|
|
648
|
-
cudnnStatus_t CUDNNWINAPI
|
|
493
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
649
494
|
cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
|
650
495
|
cudnnPoolingMode_t *mode,
|
|
651
496
|
cudnnNanPropagation_t *maxpoolingNanOpt,
|
|
@@ -656,7 +501,7 @@ cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
|
|
656
501
|
int *verticalStride,
|
|
657
502
|
int *horizontalStride);
|
|
658
503
|
|
|
659
|
-
cudnnStatus_t CUDNNWINAPI
|
|
504
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
660
505
|
cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
|
661
506
|
const cudnnPoolingMode_t mode,
|
|
662
507
|
const cudnnNanPropagation_t maxpoolingNanOpt,
|
|
@@ -665,7 +510,7 @@ cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
|
|
665
510
|
const int paddingA[],
|
|
666
511
|
const int strideA[]);
|
|
667
512
|
|
|
668
|
-
cudnnStatus_t CUDNNWINAPI
|
|
513
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
669
514
|
cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
|
670
515
|
int nbDimsRequested,
|
|
671
516
|
cudnnPoolingMode_t *mode,
|
|
@@ -675,13 +520,13 @@ cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
|
|
675
520
|
int paddingA[],
|
|
676
521
|
int strideA[]);
|
|
677
522
|
|
|
678
|
-
cudnnStatus_t CUDNNWINAPI
|
|
523
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
679
524
|
cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
|
680
525
|
const cudnnTensorDescriptor_t inputTensorDesc,
|
|
681
526
|
int nbDims,
|
|
682
527
|
int outputTensorDimA[]);
|
|
683
528
|
|
|
684
|
-
cudnnStatus_t CUDNNWINAPI
|
|
529
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
685
530
|
cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
|
686
531
|
const cudnnTensorDescriptor_t inputTensorDesc,
|
|
687
532
|
int *n,
|
|
@@ -690,13 +535,13 @@ cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
|
|
690
535
|
int *w);
|
|
691
536
|
|
|
692
537
|
/* Destroy an instance of pooling descriptor */
|
|
693
|
-
cudnnStatus_t CUDNNWINAPI
|
|
538
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
694
539
|
cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
|
|
695
540
|
|
|
696
541
|
/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
|
697
542
|
|
|
698
543
|
/* Function to perform forward pooling */
|
|
699
|
-
cudnnStatus_t CUDNNWINAPI
|
|
544
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
700
545
|
cudnnPoolingForward(cudnnHandle_t handle,
|
|
701
546
|
const cudnnPoolingDescriptor_t poolingDesc,
|
|
702
547
|
const void *alpha,
|
|
@@ -706,46 +551,33 @@ cudnnPoolingForward(cudnnHandle_t handle,
|
|
|
706
551
|
const cudnnTensorDescriptor_t yDesc,
|
|
707
552
|
void *y);
|
|
708
553
|
|
|
709
|
-
/*
|
|
710
|
-
* activation mode
|
|
711
|
-
*/
|
|
712
|
-
typedef enum {
|
|
713
|
-
CUDNN_ACTIVATION_SIGMOID = 0,
|
|
714
|
-
CUDNN_ACTIVATION_RELU = 1,
|
|
715
|
-
CUDNN_ACTIVATION_TANH = 2,
|
|
716
|
-
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
|
|
717
|
-
CUDNN_ACTIVATION_ELU = 4,
|
|
718
|
-
CUDNN_ACTIVATION_IDENTITY = 5,
|
|
719
|
-
CUDNN_ACTIVATION_SWISH = 6
|
|
720
|
-
} cudnnActivationMode_t;
|
|
721
|
-
|
|
722
554
|
/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
|
723
|
-
cudnnStatus_t CUDNNWINAPI
|
|
555
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
724
556
|
cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
|
|
725
557
|
|
|
726
|
-
cudnnStatus_t CUDNNWINAPI
|
|
558
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
727
559
|
cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
|
|
728
560
|
cudnnActivationMode_t mode,
|
|
729
561
|
cudnnNanPropagation_t reluNanOpt,
|
|
730
562
|
double coef); /* ceiling for clipped RELU, alpha for ELU */
|
|
731
563
|
|
|
732
|
-
cudnnStatus_t CUDNNWINAPI
|
|
564
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
733
565
|
cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
|
|
734
566
|
cudnnActivationMode_t *mode,
|
|
735
567
|
cudnnNanPropagation_t *reluNanOpt,
|
|
736
568
|
double *coef); /* ceiling for clipped RELU, alpha for ELU */
|
|
737
569
|
|
|
738
|
-
cudnnStatus_t CUDNNWINAPI
|
|
570
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
739
571
|
cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
|
|
740
572
|
|
|
741
|
-
cudnnStatus_t CUDNNWINAPI
|
|
573
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
742
574
|
cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
|
|
743
575
|
|
|
744
|
-
cudnnStatus_t CUDNNWINAPI
|
|
576
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
745
577
|
cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
|
|
746
578
|
|
|
747
579
|
/* Function to perform forward activation */
|
|
748
|
-
cudnnStatus_t CUDNNWINAPI
|
|
580
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
749
581
|
cudnnActivationForward(cudnnHandle_t handle,
|
|
750
582
|
cudnnActivationDescriptor_t activationDesc,
|
|
751
583
|
const void *alpha,
|
|
@@ -835,7 +667,7 @@ typedef enum {
|
|
|
835
667
|
* May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
|
|
836
668
|
*/
|
|
837
669
|
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
|
|
838
|
-
} cudnnBatchNormMode_t;
|
|
670
|
+
} cudnnBatchNormMode_t CUDNN_DEPRECATED;
|
|
839
671
|
|
|
840
672
|
#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
|
|
841
673
|
|
|
@@ -844,7 +676,7 @@ typedef enum {
|
|
|
844
676
|
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
|
845
677
|
* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
|
|
846
678
|
*/
|
|
847
|
-
cudnnStatus_t CUDNNWINAPI
|
|
679
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
848
680
|
cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
|
|
849
681
|
const cudnnTensorDescriptor_t xDesc,
|
|
850
682
|
cudnnBatchNormMode_t mode);
|
|
@@ -853,7 +685,7 @@ typedef enum {
|
|
|
853
685
|
CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
|
|
854
686
|
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
|
|
855
687
|
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
|
|
856
|
-
} cudnnBatchNormOps_t;
|
|
688
|
+
} cudnnBatchNormOps_t CUDNN_DEPRECATED;
|
|
857
689
|
|
|
858
690
|
/*
|
|
859
691
|
* Performs Batch Normalization during Inference:
|
|
@@ -862,7 +694,7 @@ typedef enum {
|
|
|
862
694
|
* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
|
|
863
695
|
* above for notes on function arguments.
|
|
864
696
|
*/
|
|
865
|
-
cudnnStatus_t CUDNNWINAPI
|
|
697
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
866
698
|
cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
|
|
867
699
|
cudnnBatchNormMode_t mode,
|
|
868
700
|
const void *alpha, /* alpha[0] = result blend factor */
|
|
@@ -884,16 +716,16 @@ typedef enum {
|
|
|
884
716
|
|
|
885
717
|
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
|
886
718
|
CUDNN_NORM_PER_CHANNEL = 1,
|
|
887
|
-
} cudnnNormMode_t;
|
|
719
|
+
} cudnnNormMode_t CUDNN_DEPRECATED;
|
|
888
720
|
|
|
889
|
-
typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t;
|
|
721
|
+
typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED;
|
|
890
722
|
|
|
891
723
|
/*
|
|
892
724
|
* Derives a tensor descriptor from layer data descriptor for Normalization
|
|
893
725
|
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
|
894
726
|
* normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
|
|
895
727
|
*/
|
|
896
|
-
cudnnStatus_t CUDNNWINAPI
|
|
728
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
897
729
|
cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
|
|
898
730
|
cudnnTensorDescriptor_t derivedNormMeanVarDesc,
|
|
899
731
|
const cudnnTensorDescriptor_t xDesc,
|
|
@@ -904,7 +736,7 @@ typedef enum {
|
|
|
904
736
|
CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
|
|
905
737
|
CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
|
|
906
738
|
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
|
|
907
|
-
} cudnnNormOps_t;
|
|
739
|
+
} cudnnNormOps_t CUDNN_DEPRECATED;
|
|
908
740
|
|
|
909
741
|
/*
|
|
910
742
|
* Performs Normalization during Inference:
|
|
@@ -913,7 +745,7 @@ typedef enum {
|
|
|
913
745
|
* according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
|
|
914
746
|
* above for notes on function arguments.
|
|
915
747
|
*/
|
|
916
|
-
cudnnStatus_t CUDNNWINAPI
|
|
748
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
917
749
|
cudnnNormalizationForwardInference(cudnnHandle_t handle,
|
|
918
750
|
cudnnNormMode_t mode,
|
|
919
751
|
cudnnNormOps_t normOps,
|
|
@@ -1021,11 +853,6 @@ cudnnDropoutForward(cudnnHandle_t handle,
|
|
|
1021
853
|
void *reserveSpace,
|
|
1022
854
|
size_t reserveSpaceSizeInBytes);
|
|
1023
855
|
|
|
1024
|
-
/* TODO: remove */
|
|
1025
|
-
|
|
1026
|
-
typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t;
|
|
1027
|
-
typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t;
|
|
1028
|
-
|
|
1029
856
|
/* TODO: move these enums out to the appropriate submodule */
|
|
1030
857
|
typedef enum {
|
|
1031
858
|
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
|
|
@@ -1060,124 +887,430 @@ typedef enum {
|
|
|
1060
887
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
|
|
1061
888
|
} cudnnConvolutionBwdDataAlgo_t;
|
|
1062
889
|
|
|
1063
|
-
typedef enum {
|
|
1064
|
-
CUDNN_RNN_ALGO_STANDARD = 0,
|
|
1065
|
-
CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
|
|
1066
|
-
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
|
|
1067
|
-
CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
|
|
1068
|
-
CUDNN_RNN_ALGO_COUNT = 4,
|
|
1069
|
-
} cudnnRNNAlgo_t;
|
|
1070
|
-
|
|
1071
890
|
typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
|
|
1072
891
|
|
|
1073
|
-
/*
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
} cudnnAlgorithm_t;
|
|
892
|
+
/*
|
|
893
|
+
* \brief Cross-library version checker.
|
|
894
|
+
* This function is implemented differently in each sub-library. Each sublib
|
|
895
|
+
* checks whether its own version matches that of its dependencies.
|
|
896
|
+
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
|
897
|
+
* CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
|
|
898
|
+
*/
|
|
899
|
+
cudnnStatus_t CUDNNWINAPI
|
|
900
|
+
cudnnOpsVersionCheck(void);
|
|
1083
901
|
|
|
1084
|
-
|
|
1085
|
-
|
|
902
|
+
/* Function to perform backward softmax */
|
|
903
|
+
cudnnStatus_t CUDNNWINAPI
|
|
904
|
+
cudnnSoftmaxBackward(cudnnHandle_t handle,
|
|
905
|
+
cudnnSoftmaxAlgorithm_t algo,
|
|
906
|
+
cudnnSoftmaxMode_t mode,
|
|
907
|
+
const void *alpha,
|
|
908
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
909
|
+
const void *y,
|
|
910
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
911
|
+
const void *dy,
|
|
912
|
+
const void *beta,
|
|
913
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
914
|
+
void *dx);
|
|
1086
915
|
|
|
916
|
+
/* Function to perform backward pooling */
|
|
1087
917
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1088
|
-
|
|
918
|
+
cudnnPoolingBackward(cudnnHandle_t handle,
|
|
919
|
+
const cudnnPoolingDescriptor_t poolingDesc,
|
|
920
|
+
const void *alpha,
|
|
921
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
922
|
+
const void *y,
|
|
923
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
924
|
+
const void *dy,
|
|
925
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
926
|
+
const void *x,
|
|
927
|
+
const void *beta,
|
|
928
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
929
|
+
void *dx);
|
|
1089
930
|
|
|
931
|
+
/* Function to perform backward activation */
|
|
1090
932
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1091
|
-
|
|
933
|
+
cudnnActivationBackward(cudnnHandle_t handle,
|
|
934
|
+
cudnnActivationDescriptor_t activationDesc,
|
|
935
|
+
const void *alpha,
|
|
936
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
937
|
+
const void *y,
|
|
938
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
939
|
+
const void *dy,
|
|
940
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
941
|
+
const void *x,
|
|
942
|
+
const void *beta,
|
|
943
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
944
|
+
void *dx);
|
|
945
|
+
|
|
946
|
+
/* LRN cross-channel backward computation. Double parameters cast to tensor data type */
|
|
947
|
+
cudnnStatus_t CUDNNWINAPI
|
|
948
|
+
cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
|
|
949
|
+
cudnnLRNDescriptor_t normDesc,
|
|
950
|
+
cudnnLRNMode_t lrnMode,
|
|
951
|
+
const void *alpha,
|
|
952
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
953
|
+
const void *y,
|
|
954
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
955
|
+
const void *dy,
|
|
956
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
957
|
+
const void *x,
|
|
958
|
+
const void *beta,
|
|
959
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
960
|
+
void *dx);
|
|
961
|
+
|
|
962
|
+
cudnnStatus_t CUDNNWINAPI
|
|
963
|
+
cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
|
|
964
|
+
cudnnLRNDescriptor_t normDesc,
|
|
965
|
+
cudnnDivNormMode_t mode,
|
|
966
|
+
const void *alpha,
|
|
967
|
+
const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
|
|
968
|
+
const void *x,
|
|
969
|
+
const void *means, /* if NULL, means are assumed to be zero */
|
|
970
|
+
const void *dy,
|
|
971
|
+
void *temp,
|
|
972
|
+
void *temp2,
|
|
973
|
+
const void *beta,
|
|
974
|
+
const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
|
|
975
|
+
void *dx, /* output x differential */
|
|
976
|
+
void *dMeans); /* output means differential, can be NULL */
|
|
1092
977
|
|
|
1093
978
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1094
|
-
|
|
979
|
+
cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
|
|
980
|
+
cudnnBatchNormMode_t mode,
|
|
981
|
+
cudnnBatchNormOps_t bnOps,
|
|
982
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
983
|
+
const cudnnTensorDescriptor_t zDesc,
|
|
984
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
985
|
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
|
986
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
987
|
+
size_t *sizeInBytes);
|
|
1095
988
|
|
|
1096
989
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1097
|
-
|
|
990
|
+
cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
|
|
991
|
+
cudnnBatchNormMode_t mode,
|
|
992
|
+
cudnnBatchNormOps_t bnOps,
|
|
993
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
994
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
995
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
996
|
+
const cudnnTensorDescriptor_t dzDesc,
|
|
997
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
998
|
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
|
999
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
1000
|
+
size_t *sizeInBytes);
|
|
1098
1001
|
|
|
1099
1002
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1100
|
-
|
|
1101
|
-
|
|
1003
|
+
cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
|
|
1004
|
+
cudnnBatchNormMode_t mode,
|
|
1005
|
+
cudnnBatchNormOps_t bnOps,
|
|
1006
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
1007
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1008
|
+
size_t *sizeInBytes);
|
|
1009
|
+
|
|
1010
|
+
/* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
|
|
1102
1011
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1012
|
+
cudnnBatchNormalizationForwardTraining(
|
|
1013
|
+
cudnnHandle_t handle,
|
|
1014
|
+
cudnnBatchNormMode_t mode,
|
|
1015
|
+
|
|
1016
|
+
const void *alpha, /* alpha[0] = result blend factor */
|
|
1017
|
+
const void *beta, /* beta[0] = dest layer blend factor */
|
|
1018
|
+
|
|
1019
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1020
|
+
const void *x, /* NxCxHxW */
|
|
1021
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1022
|
+
void *y, /* NxCxHxW */
|
|
1023
|
+
|
|
1024
|
+
/* Shared desc for the next 6 tensors in the argument list.
|
|
1025
|
+
Data type to be set as follows:
|
|
1026
|
+
type = (typeOf(x) == double) ? double : float
|
|
1027
|
+
Dimensions for this descriptor depend on normalization mode
|
|
1028
|
+
- Spatial Normalization : tensors are expected to have dims 1xCx1x1
|
|
1029
|
+
(normalization is performed across NxHxW)
|
|
1030
|
+
- Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
|
|
1031
|
+
(normalization is performed across N) */
|
|
1032
|
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
|
1033
|
+
|
|
1034
|
+
/* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
|
|
1035
|
+
const void *bnScale,
|
|
1036
|
+
const void *bnBias,
|
|
1037
|
+
|
|
1038
|
+
/* MUST use factor=1 in the very first call of a complete training cycle.
|
|
1039
|
+
Use a factor=1/(1+n) at N-th call to the function to get
|
|
1040
|
+
Cumulative Moving Average (CMA) behavior
|
|
1041
|
+
CMA[n] = (x[1]+...+x[n])/n
|
|
1042
|
+
Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
|
|
1043
|
+
((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
|
|
1044
|
+
CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
|
|
1045
|
+
double exponentialAverageFactor,
|
|
1046
|
+
|
|
1047
|
+
/* Used in Training phase only.
|
|
1048
|
+
runningMean = newMean*factor + runningMean*(1-factor) */
|
|
1049
|
+
void *resultRunningMean,
|
|
1050
|
+
/* Output in training mode, input in inference. Is the moving average
|
|
1051
|
+
of variance[x] (factor is applied in the same way as for runningMean) */
|
|
1052
|
+
void *resultRunningVariance,
|
|
1053
|
+
|
|
1054
|
+
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
|
|
1055
|
+
double epsilon,
|
|
1056
|
+
|
|
1057
|
+
/* Optionally save intermediate results from the forward pass here
|
|
1058
|
+
- can be reused to speed up backward pass. NULL if unused */
|
|
1059
|
+
void *resultSaveMean,
|
|
1060
|
+
void *resultSaveInvVariance);
|
|
1061
|
+
|
|
1062
|
+
/* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
|
|
1109
1063
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1064
|
+
cudnnBatchNormalizationForwardTrainingEx(
|
|
1065
|
+
cudnnHandle_t handle,
|
|
1066
|
+
cudnnBatchNormMode_t mode,
|
|
1067
|
+
cudnnBatchNormOps_t bnOps,
|
|
1068
|
+
|
|
1069
|
+
const void *alpha, /* alpha[0] = result blend factor */
|
|
1070
|
+
const void *beta, /* beta[0] = dest layer blend factor */
|
|
1071
|
+
|
|
1072
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1073
|
+
const void *xData,
|
|
1074
|
+
const cudnnTensorDescriptor_t zDesc,
|
|
1075
|
+
const void *zData,
|
|
1076
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1077
|
+
void *yData,
|
|
1078
|
+
|
|
1079
|
+
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
|
1080
|
+
const void *bnScale,
|
|
1081
|
+
const void *bnBias,
|
|
1082
|
+
|
|
1083
|
+
double exponentialAverageFactor,
|
|
1084
|
+
void *resultRunningMean,
|
|
1085
|
+
void *resultRunningVariance,
|
|
1086
|
+
|
|
1087
|
+
/* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
|
|
1088
|
+
double epsilon,
|
|
1089
|
+
|
|
1090
|
+
/* Optionally save intermediate results from the forward pass here
|
|
1091
|
+
- can be reused to speed up backward pass. NULL if unused */
|
|
1092
|
+
void *resultSaveMean,
|
|
1093
|
+
void *resultSaveInvVariance,
|
|
1094
|
+
|
|
1095
|
+
cudnnActivationDescriptor_t activationDesc,
|
|
1096
|
+
void *workspace,
|
|
1097
|
+
size_t workSpaceSizeInBytes,
|
|
1098
|
+
void *reserveSpace,
|
|
1099
|
+
size_t reserveSpaceSizeInBytes);
|
|
1100
|
+
|
|
1101
|
+
/* Performs backward pass of Batch Normalization layer. Returns x gradient,
|
|
1102
|
+
* bnScale gradient and bnBias gradient */
|
|
1116
1103
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1117
|
-
|
|
1104
|
+
cudnnBatchNormalizationBackward(cudnnHandle_t handle,
|
|
1105
|
+
cudnnBatchNormMode_t mode,
|
|
1106
|
+
const void *alphaDataDiff,
|
|
1107
|
+
const void *betaDataDiff,
|
|
1108
|
+
const void *alphaParamDiff,
|
|
1109
|
+
const void *betaParamDiff,
|
|
1110
|
+
const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
|
|
1111
|
+
const void *x,
|
|
1112
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
1113
|
+
const void *dy,
|
|
1114
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
1115
|
+
void *dx,
|
|
1116
|
+
/* Shared tensor desc for the 4 tensors below */
|
|
1117
|
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
|
1118
|
+
const void *bnScale, /* bnBias doesn't affect backpropagation */
|
|
1119
|
+
/* scale and bias diff are not backpropagated below this layer */
|
|
1120
|
+
void *dBnScaleResult,
|
|
1121
|
+
void *dBnBiasResult,
|
|
1122
|
+
/* Same epsilon as forward pass */
|
|
1123
|
+
double epsilon,
|
|
1124
|
+
|
|
1125
|
+
/* Optionally cached intermediate results from
|
|
1126
|
+
forward pass */
|
|
1127
|
+
const void *savedMean,
|
|
1128
|
+
const void *savedInvVariance);
|
|
1118
1129
|
|
|
1119
1130
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1120
|
-
|
|
1131
|
+
cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
|
|
1132
|
+
cudnnBatchNormMode_t mode,
|
|
1133
|
+
cudnnBatchNormOps_t bnOps,
|
|
1134
|
+
|
|
1135
|
+
const void *alphaDataDiff,
|
|
1136
|
+
const void *betaDataDiff,
|
|
1137
|
+
const void *alphaParamDiff,
|
|
1138
|
+
const void *betaParamDiff,
|
|
1139
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1140
|
+
const void *xData,
|
|
1141
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1142
|
+
const void *yData,
|
|
1143
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
1144
|
+
const void *dyData,
|
|
1145
|
+
const cudnnTensorDescriptor_t dzDesc,
|
|
1146
|
+
void *dzData,
|
|
1147
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
1148
|
+
void *dxData,
|
|
1149
|
+
|
|
1150
|
+
/* Shared tensor desc for the 4 tensors below */
|
|
1151
|
+
const cudnnTensorDescriptor_t dBnScaleBiasDesc,
|
|
1152
|
+
const void *bnScaleData,
|
|
1153
|
+
const void *bnBiasData, /* needed if there is activation */
|
|
1154
|
+
void *dBnScaleData,
|
|
1155
|
+
void *dBnBiasData,
|
|
1156
|
+
double epsilon, /* Same epsilon as forward pass */
|
|
1157
|
+
|
|
1158
|
+
/* Optionally cached intermediate results from
|
|
1159
|
+
forward pass */
|
|
1160
|
+
const void *savedMean,
|
|
1161
|
+
const void *savedInvVariance,
|
|
1162
|
+
cudnnActivationDescriptor_t activationDesc,
|
|
1163
|
+
void *workSpace,
|
|
1164
|
+
size_t workSpaceSizeInBytes,
|
|
1165
|
+
void *reserveSpace,
|
|
1166
|
+
size_t reserveSpaceSizeInBytes);
|
|
1121
1167
|
|
|
1122
1168
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1169
|
+
cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle,
|
|
1170
|
+
cudnnNormMode_t mode,
|
|
1171
|
+
cudnnNormOps_t normOps,
|
|
1172
|
+
cudnnNormAlgo_t algo,
|
|
1173
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1174
|
+
const cudnnTensorDescriptor_t zDesc,
|
|
1175
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1176
|
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
|
1177
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
1178
|
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
|
1179
|
+
size_t *sizeInBytes,
|
|
1180
|
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
|
1127
1181
|
|
|
1128
1182
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
|
|
1144
|
-
#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
|
|
1145
|
-
|
|
1146
|
-
/* struct containing useful informaiton for each API call */
|
|
1147
|
-
typedef struct cudnnDebugStruct {
|
|
1148
|
-
unsigned cudnn_version;
|
|
1149
|
-
cudnnStatus_t cudnnStatus;
|
|
1150
|
-
unsigned time_sec; /* epoch time in seconds */
|
|
1151
|
-
unsigned time_usec; /* microseconds part of epoch time */
|
|
1152
|
-
unsigned time_delta; /* time since start in seconds */
|
|
1153
|
-
cudnnHandle_t handle; /* cudnn handle */
|
|
1154
|
-
cudaStream_t stream; /* cuda stream ID */
|
|
1155
|
-
unsigned long long pid; /* process ID */
|
|
1156
|
-
unsigned long long tid; /* thread ID */
|
|
1157
|
-
int cudaDeviceId; /* CUDA device ID */
|
|
1158
|
-
int reserved[15]; /* reserved for future use */
|
|
1159
|
-
} cudnnDebug_t;
|
|
1160
|
-
|
|
1161
|
-
typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
|
|
1183
|
+
cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle,
|
|
1184
|
+
cudnnNormMode_t mode,
|
|
1185
|
+
cudnnNormOps_t normOps,
|
|
1186
|
+
cudnnNormAlgo_t algo,
|
|
1187
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1188
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1189
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
1190
|
+
const cudnnTensorDescriptor_t dzDesc,
|
|
1191
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
1192
|
+
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
|
|
1193
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
1194
|
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
|
1195
|
+
size_t *sizeInBytes,
|
|
1196
|
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
|
1162
1197
|
|
|
1163
|
-
cudnnStatus_t CUDNNWINAPI
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1198
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1199
|
+
cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle,
|
|
1200
|
+
cudnnNormMode_t mode,
|
|
1201
|
+
cudnnNormOps_t normOps,
|
|
1202
|
+
cudnnNormAlgo_t algo,
|
|
1203
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
1204
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1205
|
+
size_t *sizeInBytes,
|
|
1206
|
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
|
1207
|
+
|
|
1208
|
+
/* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */
|
|
1209
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1210
|
+
cudnnNormalizationForwardTraining(cudnnHandle_t handle,
|
|
1211
|
+
cudnnNormMode_t mode,
|
|
1212
|
+
cudnnNormOps_t normOps,
|
|
1213
|
+
cudnnNormAlgo_t algo,
|
|
1214
|
+
const void *alpha, /* alpha[0] = result blend factor */
|
|
1215
|
+
const void *beta, /* beta[0] = dest layer blend factor */
|
|
1216
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1217
|
+
const void *xData,
|
|
1218
|
+
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
|
1219
|
+
const void *normScale,
|
|
1220
|
+
const void *normBias,
|
|
1221
|
+
double exponentialAverageFactor,
|
|
1222
|
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
|
1223
|
+
void *resultRunningMean,
|
|
1224
|
+
void *resultRunningVariance,
|
|
1225
|
+
/* Has to be >= 0. Should be the same in forward and backward functions. */
|
|
1226
|
+
double epsilon,
|
|
1227
|
+
/* Optionally save intermediate results from the forward pass here
|
|
1228
|
+
- can be reused to speed up backward pass. NULL if unused */
|
|
1229
|
+
void *resultSaveMean,
|
|
1230
|
+
void *resultSaveInvVariance,
|
|
1231
|
+
cudnnActivationDescriptor_t activationDesc,
|
|
1232
|
+
const cudnnTensorDescriptor_t zDesc,
|
|
1233
|
+
const void *zData,
|
|
1234
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1235
|
+
void *yData,
|
|
1236
|
+
void *workspace,
|
|
1237
|
+
size_t workSpaceSizeInBytes,
|
|
1238
|
+
void *reserveSpace,
|
|
1239
|
+
size_t reserveSpaceSizeInBytes,
|
|
1240
|
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
|
1168
1241
|
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
*
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1242
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
1243
|
+
cudnnNormalizationBackward(cudnnHandle_t handle,
|
|
1244
|
+
cudnnNormMode_t mode,
|
|
1245
|
+
cudnnNormOps_t normOps,
|
|
1246
|
+
cudnnNormAlgo_t algo,
|
|
1247
|
+
const void *alphaDataDiff,
|
|
1248
|
+
const void *betaDataDiff,
|
|
1249
|
+
const void *alphaParamDiff,
|
|
1250
|
+
const void *betaParamDiff,
|
|
1251
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1252
|
+
const void *xData,
|
|
1253
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
1254
|
+
const void *yData,
|
|
1255
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
1256
|
+
const void *dyData,
|
|
1257
|
+
const cudnnTensorDescriptor_t dzDesc,
|
|
1258
|
+
void *dzData,
|
|
1259
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
1260
|
+
void *dxData,
|
|
1261
|
+
/* Shared tensor desc for the 4 tensors below */
|
|
1262
|
+
const cudnnTensorDescriptor_t dNormScaleBiasDesc,
|
|
1263
|
+
const void *normScaleData,
|
|
1264
|
+
const void *normBiasData, /* needed if there is activation */
|
|
1265
|
+
void *dNormScaleData,
|
|
1266
|
+
void *dNormBiasData,
|
|
1267
|
+
double epsilon, /* Same epsilon as forward pass */
|
|
1268
|
+
const cudnnTensorDescriptor_t normMeanVarDesc,
|
|
1269
|
+
/* Optionally cached intermediate results from
|
|
1270
|
+
forward pass */
|
|
1271
|
+
const void *savedMean,
|
|
1272
|
+
const void *savedInvVariance,
|
|
1273
|
+
cudnnActivationDescriptor_t activationDesc,
|
|
1274
|
+
void *workSpace,
|
|
1275
|
+
size_t workSpaceSizeInBytes,
|
|
1276
|
+
void *reserveSpace,
|
|
1277
|
+
size_t reserveSpaceSizeInBytes,
|
|
1278
|
+
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
|
1279
|
+
|
|
1280
|
+
cudnnStatus_t CUDNNWINAPI
|
|
1281
|
+
cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
|
|
1282
|
+
const cudnnSpatialTransformerDescriptor_t stDesc,
|
|
1283
|
+
const void *dgrid,
|
|
1284
|
+
void *dtheta);
|
|
1285
|
+
|
|
1286
|
+
cudnnStatus_t CUDNNWINAPI
|
|
1287
|
+
cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
|
|
1288
|
+
cudnnSpatialTransformerDescriptor_t stDesc,
|
|
1289
|
+
const void *alpha,
|
|
1290
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
1291
|
+
const void *x,
|
|
1292
|
+
const void *beta,
|
|
1293
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
1294
|
+
void *dx,
|
|
1295
|
+
const void *alphaDgrid,
|
|
1296
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
1297
|
+
const void *dy,
|
|
1298
|
+
const void *grid,
|
|
1299
|
+
const void *betaDgrid,
|
|
1300
|
+
void *dgrid);
|
|
1301
|
+
|
|
1302
|
+
cudnnStatus_t CUDNNWINAPI
|
|
1303
|
+
cudnnDropoutBackward(cudnnHandle_t handle,
|
|
1304
|
+
const cudnnDropoutDescriptor_t dropoutDesc,
|
|
1305
|
+
const cudnnTensorDescriptor_t dydesc,
|
|
1306
|
+
const void *dy,
|
|
1307
|
+
const cudnnTensorDescriptor_t dxdesc,
|
|
1308
|
+
void *dx,
|
|
1309
|
+
void *reserveSpace,
|
|
1310
|
+
size_t reserveSpaceSizeInBytes);
|
|
1178
1311
|
|
|
1179
1312
|
#if defined(__cplusplus)
|
|
1180
1313
|
}
|
|
1181
1314
|
#endif
|
|
1182
1315
|
|
|
1183
|
-
#endif /*
|
|
1316
|
+
#endif /* CUDNN_OPS_H_ */
|