nvidia-cudnn-cu13 9.12.0.46__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1316 @@
1
+ /*
2
+ * Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
3
+ *
4
+ * NOTICE TO LICENSEE:
5
+ *
6
+ * This source code and/or documentation ("Licensed Deliverables") are
7
+ * subject to NVIDIA intellectual property rights under U.S. and
8
+ * international Copyright laws.
9
+ *
10
+ * These Licensed Deliverables contained herein is PROPRIETARY and
11
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
+ * conditions of a form of NVIDIA software license agreement by and
13
+ * between NVIDIA and Licensee ("License Agreement") or electronically
14
+ * accepted by Licensee. Notwithstanding any terms or conditions to
15
+ * the contrary in the License Agreement, reproduction or disclosure
16
+ * of the Licensed Deliverables to any third party without the express
17
+ * written consent of NVIDIA is prohibited.
18
+ *
19
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
+ * OF THESE LICENSED DELIVERABLES.
33
+ *
34
+ * U.S. Government End Users. These Licensed Deliverables are a
35
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
+ * 1995), consisting of "commercial computer software" and "commercial
37
+ * computer software documentation" as such terms are used in 48
38
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
+ * U.S. Government End Users acquire the Licensed Deliverables with
42
+ * only those rights set forth herein.
43
+ *
44
+ * Any use of the Licensed Deliverables in individual and commercial
45
+ * software must include, in the user documentation and internal
46
+ * comments to the code, the above Disclaimer and U.S. Government End
47
+ * Users Notice.
48
+ */
49
+
50
+ /*
51
+ * cudnn_ops : cuDNN's basic definitions and basic operations.
52
+ */
53
+
54
+ #if !defined(CUDNN_OPS_H_)
55
+ #define CUDNN_OPS_H_
56
+
57
+ #include <stdint.h>
58
+
59
+ #include "cudnn_version.h"
60
+ #include "cudnn_graph.h"
61
+
62
+ /* These version numbers are autogenerated, do not edit manually. */
63
+ #define CUDNN_OPS_MAJOR 9
64
+ #define CUDNN_OPS_MINOR 12
65
+ #define CUDNN_OPS_PATCH 0
66
+
67
+ #if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL)
68
+ #error Version mismatch in cuDNN OPS INFER!!!
69
+ #endif
70
+
71
+ #if defined(__cplusplus)
72
+ extern "C" {
73
+ #endif
74
+
75
+ /* Data structures to represent Image/Filter and the Neural Network Layer */
76
+ typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
77
+ typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED;
78
+ typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED;
79
+ typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
80
+ typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED;
81
+ typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
82
+ typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED;
83
+ typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED;
84
+ typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
85
+ typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED;
86
+ /*
87
+ * CUDNN Determinism
88
+ */
89
+ typedef enum {
90
+ CUDNN_NON_DETERMINISTIC = 0,
91
+ CUDNN_DETERMINISTIC = 1,
92
+ } cudnnDeterminism_t;
93
+
94
+ /* Create an instance of a generic Tensor descriptor */
95
+ cudnnStatus_t CUDNNWINAPI
96
+ cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
97
+
98
+ cudnnStatus_t CUDNNWINAPI
99
+ cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
100
+ cudnnTensorFormat_t format,
101
+ cudnnDataType_t dataType, /* image data type */
102
+ int n, /* number of inputs (batch size) */
103
+ int c, /* number of input feature maps */
104
+ int h, /* height of input section */
105
+ int w); /* width of input section */
106
+
107
+ cudnnStatus_t CUDNNWINAPI
108
+ cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
109
+ cudnnDataType_t dataType, /* image data type */
110
+ int n, /* number of inputs (batch size) */
111
+ int c, /* number of input feature maps */
112
+ int h, /* height of input section */
113
+ int w, /* width of input section */
114
+ int nStride,
115
+ int cStride,
116
+ int hStride,
117
+ int wStride);
118
+
119
+ cudnnStatus_t CUDNNWINAPI
120
+ cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
121
+ cudnnDataType_t *dataType, /* image data type */
122
+ int *n, /* number of inputs (batch size) */
123
+ int *c, /* number of input feature maps */
124
+ int *h, /* height of input section */
125
+ int *w, /* width of input section */
126
+ int *nStride,
127
+ int *cStride,
128
+ int *hStride,
129
+ int *wStride);
130
+
131
+ cudnnStatus_t CUDNNWINAPI
132
+ cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
133
+ cudnnDataType_t dataType,
134
+ int nbDims,
135
+ const int dimA[],
136
+ const int strideA[]);
137
+
138
+ cudnnStatus_t CUDNNWINAPI
139
+ cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
140
+ cudnnTensorFormat_t format,
141
+ cudnnDataType_t dataType,
142
+ int nbDims,
143
+ const int dimA[]);
144
+
145
+ cudnnStatus_t CUDNNWINAPI
146
+ cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
147
+ int nbDimsRequested,
148
+ cudnnDataType_t *dataType,
149
+ int *nbDims,
150
+ int dimA[],
151
+ int strideA[]);
152
+
153
+ cudnnStatus_t CUDNNWINAPI
154
+ cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
155
+
156
+ /* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
157
+
158
+ 1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
159
+ input_stride : c x h x h_stride
160
+ feature_stride : h x h_stride
161
+ h_stride : >= w ( h_stride = w if no padding)
162
+ w_stride : 1
163
+
164
+
165
+ 2)Example of all images in row major with features maps interleaved
166
+ input_stride : c x h x h_stride
167
+ feature_stride : 1
168
+ h_stride : w x c
169
+ w_stride : c
170
+
171
+ 3)Example of all images in column major order one batch of features after the other (with optional padding on column)
172
+ input_stride : c x w x w_stride
173
+ feature_stride : w x w_stride
174
+ h_stride : 1
175
+ w_stride : >= h
176
+
177
+ */
178
+
179
+ /* Destroy an instance of Tensor4d descriptor */
180
+ cudnnStatus_t CUDNNWINAPI
181
+ cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
182
+
183
+ /* Fold/unfold transforms */
184
+ typedef enum {
185
+ CUDNN_TRANSFORM_FOLD = 0U,
186
+ CUDNN_TRANSFORM_UNFOLD = 1U,
187
+ } cudnnFoldingDirection_t;
188
+
189
+ /** Create a destination descriptor for cudnnTransformTensor */
190
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
191
+ cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
192
+ const cudnnTensorDescriptor_t srcDesc,
193
+ cudnnTensorDescriptor_t destDesc,
194
+ size_t *destSizeInBytes);
195
+
196
+ /** Create an empty tensor transform descriptor */
197
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
198
+ cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
199
+
200
+ /** Initialize a previously created tensor transform descriptor. */
201
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
202
+ cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
203
+ const uint32_t nbDims,
204
+ const cudnnTensorFormat_t destFormat,
205
+ const int32_t padBeforeA[],
206
+ const int32_t padAfterA[],
207
+ const uint32_t foldA[],
208
+ const cudnnFoldingDirection_t direction);
209
+
210
+ /**
211
+ * Retrieves the values stored in a previously initialized tensor transform
212
+ * descriptor.
213
+ */
214
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
215
+ cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
216
+ uint32_t nbDimsRequested,
217
+ cudnnTensorFormat_t *destFormat,
218
+ int32_t padBeforeA[],
219
+ int32_t padAfterA[],
220
+ uint32_t foldA[],
221
+ cudnnFoldingDirection_t *direction);
222
+
223
+ /**
224
+ * Destroys a previously created tensor transform descriptor.
225
+ */
226
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
227
+ cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
228
+
229
+ /* Tensor layout conversion helper (y = alpha * x + beta * y) */
230
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
231
+ cudnnTransformTensor(cudnnHandle_t handle,
232
+ const void *alpha,
233
+ const cudnnTensorDescriptor_t xDesc,
234
+ const void *x,
235
+ const void *beta,
236
+ const cudnnTensorDescriptor_t yDesc,
237
+ void *y);
238
+
239
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
240
+ cudnnTransformTensorEx(cudnnHandle_t handle,
241
+ const cudnnTensorTransformDescriptor_t transDesc,
242
+ const void *alpha,
243
+ const cudnnTensorDescriptor_t srcDesc,
244
+ const void *srcData,
245
+ const void *beta,
246
+ const cudnnTensorDescriptor_t destDesc,
247
+ void *destData);
248
+
249
+ /* Tensor Bias addition : C = alpha * A + beta * C */
250
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
251
+ cudnnAddTensor(cudnnHandle_t handle,
252
+ const void *alpha,
253
+ const cudnnTensorDescriptor_t aDesc,
254
+ const void *A,
255
+ const void *beta,
256
+ const cudnnTensorDescriptor_t cDesc,
257
+ void *C);
258
+
259
+ /*
260
+ * CUDNN OpTensor op type
261
+ */
262
+ typedef enum {
263
+ CUDNN_OP_TENSOR_ADD = 0,
264
+ CUDNN_OP_TENSOR_MUL = 1,
265
+ CUDNN_OP_TENSOR_MIN = 2,
266
+ CUDNN_OP_TENSOR_MAX = 3,
267
+ CUDNN_OP_TENSOR_SQRT = 4,
268
+ CUDNN_OP_TENSOR_NOT = 5,
269
+ } cudnnOpTensorOp_t;
270
+
271
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
272
+ cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
273
+
274
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
275
+ cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
276
+ cudnnOpTensorOp_t opTensorOp,
277
+ cudnnDataType_t opTensorCompType,
278
+ cudnnNanPropagation_t opTensorNanOpt);
279
+
280
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
281
+ cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
282
+ cudnnOpTensorOp_t *opTensorOp,
283
+ cudnnDataType_t *opTensorCompType,
284
+ cudnnNanPropagation_t *opTensorNanOpt);
285
+
286
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
287
+ cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
288
+
289
+ /* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
290
+ /* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
291
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
292
+ cudnnOpTensor(cudnnHandle_t handle,
293
+ const cudnnOpTensorDescriptor_t opTensorDesc,
294
+ const void *alpha1,
295
+ const cudnnTensorDescriptor_t aDesc,
296
+ const void *A,
297
+ const void *alpha2,
298
+ const cudnnTensorDescriptor_t bDesc,
299
+ const void *B,
300
+ const void *beta,
301
+ const cudnnTensorDescriptor_t cDesc,
302
+ void *C);
303
+
304
+ /*
305
+ * CUDNN ReduceTensor indices type
306
+ */
307
+ typedef enum {
308
+ CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
309
+ CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
310
+ } cudnnReduceTensorIndices_t CUDNN_DEPRECATED;
311
+
312
+ /*
313
+ * CUDNN tensor indices type size (all unsigned)
314
+ * Currently not supported, default is 32 bit unsigned.
315
+ */
316
+ typedef enum {
317
+ CUDNN_32BIT_INDICES = 0,
318
+ CUDNN_64BIT_INDICES = 1,
319
+ CUDNN_16BIT_INDICES = 2,
320
+ CUDNN_8BIT_INDICES = 3,
321
+ } cudnnIndicesType_t CUDNN_DEPRECATED;
322
+
323
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
324
+ cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
325
+
326
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
327
+ cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
328
+ cudnnReduceTensorOp_t reduceTensorOp,
329
+ cudnnDataType_t reduceTensorCompType,
330
+ cudnnNanPropagation_t reduceTensorNanOpt,
331
+ cudnnReduceTensorIndices_t reduceTensorIndices,
332
+ cudnnIndicesType_t reduceTensorIndicesType);
333
+
334
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
335
+ cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
336
+ cudnnReduceTensorOp_t *reduceTensorOp,
337
+ cudnnDataType_t *reduceTensorCompType,
338
+ cudnnNanPropagation_t *reduceTensorNanOpt,
339
+ cudnnReduceTensorIndices_t *reduceTensorIndices,
340
+ cudnnIndicesType_t *reduceTensorIndicesType);
341
+
342
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
343
+ cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
344
+
345
+ /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
346
+ * output tensors */
347
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
348
+ cudnnGetReductionIndicesSize(cudnnHandle_t handle,
349
+ const cudnnReduceTensorDescriptor_t reduceTensorDesc,
350
+ const cudnnTensorDescriptor_t aDesc,
351
+ const cudnnTensorDescriptor_t cDesc,
352
+ size_t *sizeInBytes);
353
+
354
+ /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
355
+ * tensors */
356
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
357
+ cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
358
+ const cudnnReduceTensorDescriptor_t reduceTensorDesc,
359
+ const cudnnTensorDescriptor_t aDesc,
360
+ const cudnnTensorDescriptor_t cDesc,
361
+ size_t *sizeInBytes);
362
+
363
+ /* Tensor operation : C = reduce op( alpha * A ) + beta * C */
364
+ /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
365
+ /* The indices space is ignored for reduce ops other than min or max. */
366
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
367
+ cudnnReduceTensor(cudnnHandle_t handle,
368
+ const cudnnReduceTensorDescriptor_t reduceTensorDesc,
369
+ void *indices,
370
+ size_t indicesSizeInBytes,
371
+ void *workspace,
372
+ size_t workspaceSizeInBytes,
373
+ const void *alpha,
374
+ const cudnnTensorDescriptor_t aDesc,
375
+ const void *A,
376
+ const void *beta,
377
+ const cudnnTensorDescriptor_t cDesc,
378
+ void *C);
379
+
380
+ /* Set all values of a tensor to a given value : y[i] = value[0] */
381
+ cudnnStatus_t CUDNNWINAPI
382
+ cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
383
+
384
+ /* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
385
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
386
+ cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
387
+
388
+ /* Create an instance of FilterStruct */
389
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
390
+ cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
391
+
392
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
393
+ cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
394
+ cudnnDataType_t dataType, /* image data type */
395
+ cudnnTensorFormat_t format,
396
+ int k, /* number of output feature maps */
397
+ int c, /* number of input feature maps */
398
+ int h, /* height of each input filter */
399
+ int w); /* width of each input filter */
400
+
401
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
402
+ cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
403
+ cudnnDataType_t *dataType, /* image data type */
404
+ cudnnTensorFormat_t *format,
405
+ int *k, /* number of output feature maps */
406
+ int *c, /* number of input feature maps */
407
+ int *h, /* height of each input filter */
408
+ int *w); /* width of each input filter */
409
+
410
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
411
+ cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
412
+ cudnnDataType_t dataType, /* image data type */
413
+ cudnnTensorFormat_t format,
414
+ int nbDims,
415
+ const int filterDimA[]);
416
+
417
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
418
+ cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
419
+ int nbDimsRequested,
420
+ cudnnDataType_t *dataType, /* image data type */
421
+ cudnnTensorFormat_t *format,
422
+ int *nbDims,
423
+ int filterDimA[]);
424
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
425
+ cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
426
+
427
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
428
+ cudnnTransformFilter(cudnnHandle_t handle,
429
+ const cudnnTensorTransformDescriptor_t transDesc,
430
+ const void *alpha,
431
+ const cudnnFilterDescriptor_t srcDesc,
432
+ const void *srcData,
433
+ const void *beta,
434
+ const cudnnFilterDescriptor_t destDesc,
435
+ void *destData);
436
+
437
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
438
+ cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
439
+
440
+ /*
441
+ * softmax algorithm
442
+ */
443
+ typedef enum {
444
+ CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
445
+ CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
446
+ CUDNN_SOFTMAX_LOG = 2
447
+ } cudnnSoftmaxAlgorithm_t;
448
+
449
+ typedef enum {
450
+ CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
451
+ CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
452
+ } cudnnSoftmaxMode_t;
453
+
454
+ /* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
455
+
456
+ /* Function to perform forward softmax */
457
+ cudnnStatus_t CUDNNWINAPI
458
+ cudnnSoftmaxForward(cudnnHandle_t handle,
459
+ cudnnSoftmaxAlgorithm_t algo,
460
+ cudnnSoftmaxMode_t mode,
461
+ const void *alpha,
462
+ const cudnnTensorDescriptor_t xDesc,
463
+ const void *x,
464
+ const void *beta,
465
+ const cudnnTensorDescriptor_t yDesc,
466
+ void *y);
467
+
468
+ /*
469
+ * pooling mode
470
+ */
471
+ typedef enum {
472
+ CUDNN_POOLING_MAX = 0,
473
+ CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
474
+ CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
475
+ CUDNN_POOLING_MAX_DETERMINISTIC = 3
476
+ } cudnnPoolingMode_t CUDNN_DEPRECATED;
477
+
478
+ /* Create an instance of pooling descriptor */
479
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
480
+ cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
481
+
482
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
483
+ cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
484
+ cudnnPoolingMode_t mode,
485
+ cudnnNanPropagation_t maxpoolingNanOpt,
486
+ int windowHeight,
487
+ int windowWidth,
488
+ int verticalPadding,
489
+ int horizontalPadding,
490
+ int verticalStride,
491
+ int horizontalStride);
492
+
493
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
494
+ cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
495
+ cudnnPoolingMode_t *mode,
496
+ cudnnNanPropagation_t *maxpoolingNanOpt,
497
+ int *windowHeight,
498
+ int *windowWidth,
499
+ int *verticalPadding,
500
+ int *horizontalPadding,
501
+ int *verticalStride,
502
+ int *horizontalStride);
503
+
504
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
505
+ cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
506
+ const cudnnPoolingMode_t mode,
507
+ const cudnnNanPropagation_t maxpoolingNanOpt,
508
+ int nbDims,
509
+ const int windowDimA[],
510
+ const int paddingA[],
511
+ const int strideA[]);
512
+
513
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
514
+ cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
515
+ int nbDimsRequested,
516
+ cudnnPoolingMode_t *mode,
517
+ cudnnNanPropagation_t *maxpoolingNanOpt,
518
+ int *nbDims,
519
+ int windowDimA[],
520
+ int paddingA[],
521
+ int strideA[]);
522
+
523
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
524
+ cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
525
+ const cudnnTensorDescriptor_t inputTensorDesc,
526
+ int nbDims,
527
+ int outputTensorDimA[]);
528
+
529
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
530
+ cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
531
+ const cudnnTensorDescriptor_t inputTensorDesc,
532
+ int *n,
533
+ int *c,
534
+ int *h,
535
+ int *w);
536
+
537
+ /* Destroy an instance of pooling descriptor */
538
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
539
+ cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
540
+
541
+ /* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
542
+
543
+ /* Function to perform forward pooling */
544
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
545
+ cudnnPoolingForward(cudnnHandle_t handle,
546
+ const cudnnPoolingDescriptor_t poolingDesc,
547
+ const void *alpha,
548
+ const cudnnTensorDescriptor_t xDesc,
549
+ const void *x,
550
+ const void *beta,
551
+ const cudnnTensorDescriptor_t yDesc,
552
+ void *y);
553
+
554
+ /* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
555
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
556
+ cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
557
+
558
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
559
+ cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
560
+ cudnnActivationMode_t mode,
561
+ cudnnNanPropagation_t reluNanOpt,
562
+ double coef); /* ceiling for clipped RELU, alpha for ELU */
563
+
564
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
565
+ cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
566
+ cudnnActivationMode_t *mode,
567
+ cudnnNanPropagation_t *reluNanOpt,
568
+ double *coef); /* ceiling for clipped RELU, alpha for ELU */
569
+
570
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
571
+ cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta);
572
+
573
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
574
+ cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta);
575
+
576
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
577
+ cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
578
+
579
+ /* Function to perform forward activation */
580
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
581
+ cudnnActivationForward(cudnnHandle_t handle,
582
+ cudnnActivationDescriptor_t activationDesc,
583
+ const void *alpha,
584
+ const cudnnTensorDescriptor_t xDesc,
585
+ const void *x,
586
+ const void *beta,
587
+ const cudnnTensorDescriptor_t yDesc,
588
+ void *y);
589
+
590
+ /*
591
+ * Create an instance of LRN (Local Response Normalization) descriptor
592
+ * Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
593
+ */
594
+ cudnnStatus_t CUDNNWINAPI
595
+ cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
596
+
597
+ #define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
598
+ #define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
599
+ #define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
600
+ #define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
601
+
602
+ /* LRN layer mode */
603
+ typedef enum {
604
+ CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
605
+ } cudnnLRNMode_t;
606
+
607
+ /*
608
+ * Uses a window [center-lookBehind, center+lookAhead], where
609
+ * lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
610
+ * Values of double parameters cast to tensor data type.
611
+ */
612
+ cudnnStatus_t CUDNNWINAPI
613
+ cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
614
+ /*
615
+ * Retrieve the settings currently stored in an LRN layer descriptor
616
+ * Any of the provided pointers can be NULL (no corresponding value will be returned)
617
+ */
618
+ cudnnStatus_t CUDNNWINAPI
619
+ cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
620
+
621
+ /* Destroy an instance of LRN descriptor */
622
+ cudnnStatus_t CUDNNWINAPI
623
+ cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
624
+
625
+ /* LRN functions: output = alpha * normalize(x) + beta * old_y */
626
+
627
+ /* LRN cross-channel forward computation. Double parameters cast to tensor data type */
628
+ cudnnStatus_t CUDNNWINAPI
629
+ cudnnLRNCrossChannelForward(cudnnHandle_t handle,
630
+ cudnnLRNDescriptor_t normDesc,
631
+ cudnnLRNMode_t lrnMode,
632
+ const void *alpha,
633
+ const cudnnTensorDescriptor_t xDesc,
634
+ const void *x,
635
+ const void *beta,
636
+ const cudnnTensorDescriptor_t yDesc,
637
+ void *y);
638
+
639
+ typedef enum {
640
+ CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
641
+ } cudnnDivNormMode_t;
642
+
643
+ /* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
644
+ cudnnStatus_t CUDNNWINAPI
645
+ cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
646
+ cudnnLRNDescriptor_t normDesc,
647
+ cudnnDivNormMode_t mode,
648
+ const void *alpha,
649
+ const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
650
+ const void *x,
651
+ const void *means, /* if NULL, means are assumed to be zero */
652
+ void *temp,
653
+ void *temp2,
654
+ const void *beta,
655
+ const cudnnTensorDescriptor_t yDesc,
656
+ void *y);
657
+
658
+ typedef enum {
659
+ /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
660
+ CUDNN_BATCHNORM_PER_ACTIVATION = 0,
661
+
662
+ /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
663
+ CUDNN_BATCHNORM_SPATIAL = 1,
664
+
665
+ /*
666
+ * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
667
+ * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
668
+ */
669
+ CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
670
+ } cudnnBatchNormMode_t CUDNN_DEPRECATED;
671
+
672
+ #define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
673
+
674
+ /*
675
+ * Derives a tensor descriptor from layer data descriptor for BatchNormalization
676
+ * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
677
+ * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
678
+ */
679
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
680
+ cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
681
+ const cudnnTensorDescriptor_t xDesc,
682
+ cudnnBatchNormMode_t mode);
683
+
684
+ typedef enum {
685
+ CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
686
+ CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
687
+ CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
688
+ } cudnnBatchNormOps_t CUDNN_DEPRECATED;
689
+
690
+ /*
691
+ * Performs Batch Normalization during Inference:
692
+ * y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
693
+ * with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
694
+ * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
695
+ * above for notes on function arguments.
696
+ */
697
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
698
+ cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
699
+ cudnnBatchNormMode_t mode,
700
+ const void *alpha, /* alpha[0] = result blend factor */
701
+ const void *beta, /* beta[0] = dest layer blend factor */
702
+ const cudnnTensorDescriptor_t xDesc,
703
+ const void *x, /* NxCxHxW */
704
+ const cudnnTensorDescriptor_t yDesc,
705
+ void *y, /* NxCxHxW */
706
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
707
+ const void *bnScale,
708
+ const void *bnBias,
709
+ const void *estimatedMean,
710
+ const void *estimatedVariance,
711
+ double epsilon);
712
+
713
+ typedef enum {
714
+ /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
715
+ CUDNN_NORM_PER_ACTIVATION = 0,
716
+
717
+ /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
718
+ CUDNN_NORM_PER_CHANNEL = 1,
719
+ } cudnnNormMode_t CUDNN_DEPRECATED;
720
+
721
+ typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED;
722
+
723
+ /*
724
+ * Derives a tensor descriptor from layer data descriptor for Normalization
725
+ * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
726
+ * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
727
+ */
728
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
729
+ cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
730
+ cudnnTensorDescriptor_t derivedNormMeanVarDesc,
731
+ const cudnnTensorDescriptor_t xDesc,
732
+ cudnnNormMode_t mode,
733
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
734
+
735
+ typedef enum {
736
+ CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
737
+ CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
738
+ CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
739
+ } cudnnNormOps_t CUDNN_DEPRECATED;
740
+
741
+ /*
742
+ * Performs Normalization during Inference:
743
+ * y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
744
+ * with normScale, normBias, runningMean, runningInvVariance tensors indexed
745
+ * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
746
+ * above for notes on function arguments.
747
+ */
748
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
749
+ cudnnNormalizationForwardInference(cudnnHandle_t handle,
750
+ cudnnNormMode_t mode,
751
+ cudnnNormOps_t normOps,
752
+ cudnnNormAlgo_t algo,
753
+ const void *alpha, /* alpha[0] = result blend factor */
754
+ const void *beta, /* beta[0] = dest layer blend factor */
755
+ const cudnnTensorDescriptor_t xDesc,
756
+ const void *x, /* NxCxHxW */
757
+ const cudnnTensorDescriptor_t normScaleBiasDesc,
758
+ const void *normScale,
759
+ const void *normBias,
760
+ const cudnnTensorDescriptor_t normMeanVarDesc,
761
+ const void *estimatedMean,
762
+ const void *estimatedVariance,
763
+ const cudnnTensorDescriptor_t zDesc,
764
+ const void *z,
765
+ cudnnActivationDescriptor_t activationDesc,
766
+ const cudnnTensorDescriptor_t yDesc,
767
+ void *y, /* NxCxHxW */
768
+ double epsilon,
769
+ int groupCnt); /* Place hold for future work*/
770
+
771
+ /* APIs for spatial transformer network*/
772
+ typedef enum {
773
+ CUDNN_SAMPLER_BILINEAR = 0,
774
+ } cudnnSamplerType_t;
775
+
776
+ cudnnStatus_t CUDNNWINAPI
777
+ cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
778
+
779
+ cudnnStatus_t CUDNNWINAPI
780
+ cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
781
+ cudnnSamplerType_t samplerType,
782
+ cudnnDataType_t dataType,
783
+ const int nbDims,
784
+ const int dimA[]);
785
+
786
+ cudnnStatus_t CUDNNWINAPI
787
+ cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
788
+
789
+ cudnnStatus_t CUDNNWINAPI
790
+ cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
791
+ const cudnnSpatialTransformerDescriptor_t stDesc,
792
+ const void *theta,
793
+ void *grid);
794
+
795
+ cudnnStatus_t CUDNNWINAPI
796
+ cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
797
+ cudnnSpatialTransformerDescriptor_t stDesc,
798
+ const void *alpha,
799
+ const cudnnTensorDescriptor_t xDesc,
800
+ const void *x,
801
+ const void *grid,
802
+ const void *beta,
803
+ cudnnTensorDescriptor_t yDesc,
804
+ void *y);
805
+
806
+ typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
807
+
808
+ cudnnStatus_t CUDNNWINAPI
809
+ cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
810
+
811
+ cudnnStatus_t CUDNNWINAPI
812
+ cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
813
+
814
+ /*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
815
+ cudnnStatus_t CUDNNWINAPI
816
+ cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
817
+
818
+ /*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
819
+ cudnnStatus_t CUDNNWINAPI
820
+ cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
821
+
822
+ cudnnStatus_t CUDNNWINAPI
823
+ cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
824
+ cudnnHandle_t handle,
825
+ float dropout,
826
+ void *states,
827
+ size_t stateSizeInBytes,
828
+ unsigned long long seed);
829
+
830
+ /* Restores the dropout descriptor to a previously saved-off state */
831
+ cudnnStatus_t CUDNNWINAPI
832
+ cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
833
+ cudnnHandle_t handle,
834
+ float dropout,
835
+ void *states,
836
+ size_t stateSizeInBytes,
837
+ unsigned long long seed);
838
+
839
+ cudnnStatus_t CUDNNWINAPI
840
+ cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
841
+ cudnnHandle_t handle,
842
+ float *dropout,
843
+ void **states,
844
+ unsigned long long *seed);
845
+
846
+ cudnnStatus_t CUDNNWINAPI
847
+ cudnnDropoutForward(cudnnHandle_t handle,
848
+ const cudnnDropoutDescriptor_t dropoutDesc,
849
+ const cudnnTensorDescriptor_t xdesc,
850
+ const void *x,
851
+ const cudnnTensorDescriptor_t ydesc,
852
+ void *y,
853
+ void *reserveSpace,
854
+ size_t reserveSpaceSizeInBytes);
855
+
856
+ /* TODO: move these enums out to the appropriate submodule */
857
+ typedef enum {
858
+ CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
859
+ CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
860
+ CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
861
+ CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
862
+ CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
863
+ CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
864
+ CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
865
+ CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
866
+ CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
867
+ } cudnnConvolutionFwdAlgo_t;
868
+
869
+ typedef enum {
870
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
871
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
872
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
873
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
874
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
875
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
876
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
877
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
878
+ } cudnnConvolutionBwdFilterAlgo_t;
879
+
880
+ typedef enum {
881
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
882
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
883
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
884
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
885
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
886
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
887
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
888
+ } cudnnConvolutionBwdDataAlgo_t;
889
+
890
+ typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
891
+
892
+ /*
893
+ * \brief Cross-library version checker.
894
+ * This function is implemented differently in each sub-library. Each sublib
895
+ * checks whether its own version matches that of its dependencies.
896
+ * \returns CUDNN_STATUS_SUCCESS if the version check passes,
897
+ * CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
898
+ */
899
+ cudnnStatus_t CUDNNWINAPI
900
+ cudnnOpsVersionCheck(void);
901
+
902
+ /* Function to perform backward softmax */
903
+ cudnnStatus_t CUDNNWINAPI
904
+ cudnnSoftmaxBackward(cudnnHandle_t handle,
905
+ cudnnSoftmaxAlgorithm_t algo,
906
+ cudnnSoftmaxMode_t mode,
907
+ const void *alpha,
908
+ const cudnnTensorDescriptor_t yDesc,
909
+ const void *y,
910
+ const cudnnTensorDescriptor_t dyDesc,
911
+ const void *dy,
912
+ const void *beta,
913
+ const cudnnTensorDescriptor_t dxDesc,
914
+ void *dx);
915
+
916
+ /* Function to perform backward pooling */
917
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
918
+ cudnnPoolingBackward(cudnnHandle_t handle,
919
+ const cudnnPoolingDescriptor_t poolingDesc,
920
+ const void *alpha,
921
+ const cudnnTensorDescriptor_t yDesc,
922
+ const void *y,
923
+ const cudnnTensorDescriptor_t dyDesc,
924
+ const void *dy,
925
+ const cudnnTensorDescriptor_t xDesc,
926
+ const void *x,
927
+ const void *beta,
928
+ const cudnnTensorDescriptor_t dxDesc,
929
+ void *dx);
930
+
931
+ /* Function to perform backward activation */
932
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
933
+ cudnnActivationBackward(cudnnHandle_t handle,
934
+ cudnnActivationDescriptor_t activationDesc,
935
+ const void *alpha,
936
+ const cudnnTensorDescriptor_t yDesc,
937
+ const void *y,
938
+ const cudnnTensorDescriptor_t dyDesc,
939
+ const void *dy,
940
+ const cudnnTensorDescriptor_t xDesc,
941
+ const void *x,
942
+ const void *beta,
943
+ const cudnnTensorDescriptor_t dxDesc,
944
+ void *dx);
945
+
946
+ /* LRN cross-channel backward computation. Double parameters cast to tensor data type */
947
+ cudnnStatus_t CUDNNWINAPI
948
+ cudnnLRNCrossChannelBackward(cudnnHandle_t handle,
949
+ cudnnLRNDescriptor_t normDesc,
950
+ cudnnLRNMode_t lrnMode,
951
+ const void *alpha,
952
+ const cudnnTensorDescriptor_t yDesc,
953
+ const void *y,
954
+ const cudnnTensorDescriptor_t dyDesc,
955
+ const void *dy,
956
+ const cudnnTensorDescriptor_t xDesc,
957
+ const void *x,
958
+ const void *beta,
959
+ const cudnnTensorDescriptor_t dxDesc,
960
+ void *dx);
961
+
962
+ cudnnStatus_t CUDNNWINAPI
963
+ cudnnDivisiveNormalizationBackward(cudnnHandle_t handle,
964
+ cudnnLRNDescriptor_t normDesc,
965
+ cudnnDivNormMode_t mode,
966
+ const void *alpha,
967
+ const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */
968
+ const void *x,
969
+ const void *means, /* if NULL, means are assumed to be zero */
970
+ const void *dy,
971
+ void *temp,
972
+ void *temp2,
973
+ const void *beta,
974
+ const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */
975
+ void *dx, /* output x differential */
976
+ void *dMeans); /* output means differential, can be NULL */
977
+
978
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
979
+ cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle,
980
+ cudnnBatchNormMode_t mode,
981
+ cudnnBatchNormOps_t bnOps,
982
+ const cudnnTensorDescriptor_t xDesc,
983
+ const cudnnTensorDescriptor_t zDesc,
984
+ const cudnnTensorDescriptor_t yDesc,
985
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
986
+ const cudnnActivationDescriptor_t activationDesc,
987
+ size_t *sizeInBytes);
988
+
989
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
990
+ cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle,
991
+ cudnnBatchNormMode_t mode,
992
+ cudnnBatchNormOps_t bnOps,
993
+ const cudnnTensorDescriptor_t xDesc,
994
+ const cudnnTensorDescriptor_t yDesc,
995
+ const cudnnTensorDescriptor_t dyDesc,
996
+ const cudnnTensorDescriptor_t dzDesc,
997
+ const cudnnTensorDescriptor_t dxDesc,
998
+ const cudnnTensorDescriptor_t dBnScaleBiasDesc,
999
+ const cudnnActivationDescriptor_t activationDesc,
1000
+ size_t *sizeInBytes);
1001
+
1002
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1003
+ cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle,
1004
+ cudnnBatchNormMode_t mode,
1005
+ cudnnBatchNormOps_t bnOps,
1006
+ const cudnnActivationDescriptor_t activationDesc,
1007
+ const cudnnTensorDescriptor_t xDesc,
1008
+ size_t *sizeInBytes);
1009
+
1010
+ /* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */
1011
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1012
+ cudnnBatchNormalizationForwardTraining(
1013
+ cudnnHandle_t handle,
1014
+ cudnnBatchNormMode_t mode,
1015
+
1016
+ const void *alpha, /* alpha[0] = result blend factor */
1017
+ const void *beta, /* beta[0] = dest layer blend factor */
1018
+
1019
+ const cudnnTensorDescriptor_t xDesc,
1020
+ const void *x, /* NxCxHxW */
1021
+ const cudnnTensorDescriptor_t yDesc,
1022
+ void *y, /* NxCxHxW */
1023
+
1024
+ /* Shared desc for the next 6 tensors in the argument list.
1025
+ Data type to be set as follows:
1026
+ type = (typeOf(x) == double) ? double : float
1027
+ Dimensions for this descriptor depend on normalization mode
1028
+ - Spatial Normalization : tensors are expected to have dims 1xCx1x1
1029
+ (normalization is performed across NxHxW)
1030
+ - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW
1031
+ (normalization is performed across N) */
1032
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
1033
+
1034
+ /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */
1035
+ const void *bnScale,
1036
+ const void *bnBias,
1037
+
1038
+ /* MUST use factor=1 in the very first call of a complete training cycle.
1039
+ Use a factor=1/(1+n) at N-th call to the function to get
1040
+ Cumulative Moving Average (CMA) behavior
1041
+ CMA[n] = (x[1]+...+x[n])/n
1042
+ Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) =
1043
+ ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) =
1044
+ CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */
1045
+ double exponentialAverageFactor,
1046
+
1047
+ /* Used in Training phase only.
1048
+ runningMean = newMean*factor + runningMean*(1-factor) */
1049
+ void *resultRunningMean,
1050
+ /* Output in training mode, input in inference. Is the moving average
1051
+ of variance[x] (factor is applied in the same way as for runningMean) */
1052
+ void *resultRunningVariance,
1053
+
1054
+ /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
1055
+ double epsilon,
1056
+
1057
+ /* Optionally save intermediate results from the forward pass here
1058
+ - can be reused to speed up backward pass. NULL if unused */
1059
+ void *resultSaveMean,
1060
+ void *resultSaveInvVariance);
1061
+
1062
+ /* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */
1063
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1064
+ cudnnBatchNormalizationForwardTrainingEx(
1065
+ cudnnHandle_t handle,
1066
+ cudnnBatchNormMode_t mode,
1067
+ cudnnBatchNormOps_t bnOps,
1068
+
1069
+ const void *alpha, /* alpha[0] = result blend factor */
1070
+ const void *beta, /* beta[0] = dest layer blend factor */
1071
+
1072
+ const cudnnTensorDescriptor_t xDesc,
1073
+ const void *xData,
1074
+ const cudnnTensorDescriptor_t zDesc,
1075
+ const void *zData,
1076
+ const cudnnTensorDescriptor_t yDesc,
1077
+ void *yData,
1078
+
1079
+ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
1080
+ const void *bnScale,
1081
+ const void *bnBias,
1082
+
1083
+ double exponentialAverageFactor,
1084
+ void *resultRunningMean,
1085
+ void *resultRunningVariance,
1086
+
1087
+ /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */
1088
+ double epsilon,
1089
+
1090
+ /* Optionally save intermediate results from the forward pass here
1091
+ - can be reused to speed up backward pass. NULL if unused */
1092
+ void *resultSaveMean,
1093
+ void *resultSaveInvVariance,
1094
+
1095
+ cudnnActivationDescriptor_t activationDesc,
1096
+ void *workspace,
1097
+ size_t workSpaceSizeInBytes,
1098
+ void *reserveSpace,
1099
+ size_t reserveSpaceSizeInBytes);
1100
+
1101
+ /* Performs backward pass of Batch Normalization layer. Returns x gradient,
1102
+ * bnScale gradient and bnBias gradient */
1103
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1104
+ cudnnBatchNormalizationBackward(cudnnHandle_t handle,
1105
+ cudnnBatchNormMode_t mode,
1106
+ const void *alphaDataDiff,
1107
+ const void *betaDataDiff,
1108
+ const void *alphaParamDiff,
1109
+ const void *betaParamDiff,
1110
+ const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */
1111
+ const void *x,
1112
+ const cudnnTensorDescriptor_t dyDesc,
1113
+ const void *dy,
1114
+ const cudnnTensorDescriptor_t dxDesc,
1115
+ void *dx,
1116
+ /* Shared tensor desc for the 4 tensors below */
1117
+ const cudnnTensorDescriptor_t dBnScaleBiasDesc,
1118
+ const void *bnScale, /* bnBias doesn't affect backpropagation */
1119
+ /* scale and bias diff are not backpropagated below this layer */
1120
+ void *dBnScaleResult,
1121
+ void *dBnBiasResult,
1122
+ /* Same epsilon as forward pass */
1123
+ double epsilon,
1124
+
1125
+ /* Optionally cached intermediate results from
1126
+ forward pass */
1127
+ const void *savedMean,
1128
+ const void *savedInvVariance);
1129
+
1130
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1131
+ cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle,
1132
+ cudnnBatchNormMode_t mode,
1133
+ cudnnBatchNormOps_t bnOps,
1134
+
1135
+ const void *alphaDataDiff,
1136
+ const void *betaDataDiff,
1137
+ const void *alphaParamDiff,
1138
+ const void *betaParamDiff,
1139
+ const cudnnTensorDescriptor_t xDesc,
1140
+ const void *xData,
1141
+ const cudnnTensorDescriptor_t yDesc,
1142
+ const void *yData,
1143
+ const cudnnTensorDescriptor_t dyDesc,
1144
+ const void *dyData,
1145
+ const cudnnTensorDescriptor_t dzDesc,
1146
+ void *dzData,
1147
+ const cudnnTensorDescriptor_t dxDesc,
1148
+ void *dxData,
1149
+
1150
+ /* Shared tensor desc for the 4 tensors below */
1151
+ const cudnnTensorDescriptor_t dBnScaleBiasDesc,
1152
+ const void *bnScaleData,
1153
+ const void *bnBiasData, /* needed if there is activation */
1154
+ void *dBnScaleData,
1155
+ void *dBnBiasData,
1156
+ double epsilon, /* Same epsilon as forward pass */
1157
+
1158
+ /* Optionally cached intermediate results from
1159
+ forward pass */
1160
+ const void *savedMean,
1161
+ const void *savedInvVariance,
1162
+ cudnnActivationDescriptor_t activationDesc,
1163
+ void *workSpace,
1164
+ size_t workSpaceSizeInBytes,
1165
+ void *reserveSpace,
1166
+ size_t reserveSpaceSizeInBytes);
1167
+
1168
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1169
+ cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle,
1170
+ cudnnNormMode_t mode,
1171
+ cudnnNormOps_t normOps,
1172
+ cudnnNormAlgo_t algo,
1173
+ const cudnnTensorDescriptor_t xDesc,
1174
+ const cudnnTensorDescriptor_t zDesc,
1175
+ const cudnnTensorDescriptor_t yDesc,
1176
+ const cudnnTensorDescriptor_t normScaleBiasDesc,
1177
+ const cudnnActivationDescriptor_t activationDesc,
1178
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1179
+ size_t *sizeInBytes,
1180
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1181
+
1182
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1183
+ cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle,
1184
+ cudnnNormMode_t mode,
1185
+ cudnnNormOps_t normOps,
1186
+ cudnnNormAlgo_t algo,
1187
+ const cudnnTensorDescriptor_t xDesc,
1188
+ const cudnnTensorDescriptor_t yDesc,
1189
+ const cudnnTensorDescriptor_t dyDesc,
1190
+ const cudnnTensorDescriptor_t dzDesc,
1191
+ const cudnnTensorDescriptor_t dxDesc,
1192
+ const cudnnTensorDescriptor_t dNormScaleBiasDesc,
1193
+ const cudnnActivationDescriptor_t activationDesc,
1194
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1195
+ size_t *sizeInBytes,
1196
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1197
+
1198
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1199
+ cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle,
1200
+ cudnnNormMode_t mode,
1201
+ cudnnNormOps_t normOps,
1202
+ cudnnNormAlgo_t algo,
1203
+ const cudnnActivationDescriptor_t activationDesc,
1204
+ const cudnnTensorDescriptor_t xDesc,
1205
+ size_t *sizeInBytes,
1206
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1207
+
1208
+ /* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */
1209
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1210
+ cudnnNormalizationForwardTraining(cudnnHandle_t handle,
1211
+ cudnnNormMode_t mode,
1212
+ cudnnNormOps_t normOps,
1213
+ cudnnNormAlgo_t algo,
1214
+ const void *alpha, /* alpha[0] = result blend factor */
1215
+ const void *beta, /* beta[0] = dest layer blend factor */
1216
+ const cudnnTensorDescriptor_t xDesc,
1217
+ const void *xData,
1218
+ const cudnnTensorDescriptor_t normScaleBiasDesc,
1219
+ const void *normScale,
1220
+ const void *normBias,
1221
+ double exponentialAverageFactor,
1222
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1223
+ void *resultRunningMean,
1224
+ void *resultRunningVariance,
1225
+ /* Has to be >= 0. Should be the same in forward and backward functions. */
1226
+ double epsilon,
1227
+ /* Optionally save intermediate results from the forward pass here
1228
+ - can be reused to speed up backward pass. NULL if unused */
1229
+ void *resultSaveMean,
1230
+ void *resultSaveInvVariance,
1231
+ cudnnActivationDescriptor_t activationDesc,
1232
+ const cudnnTensorDescriptor_t zDesc,
1233
+ const void *zData,
1234
+ const cudnnTensorDescriptor_t yDesc,
1235
+ void *yData,
1236
+ void *workspace,
1237
+ size_t workSpaceSizeInBytes,
1238
+ void *reserveSpace,
1239
+ size_t reserveSpaceSizeInBytes,
1240
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1241
+
1242
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
1243
+ cudnnNormalizationBackward(cudnnHandle_t handle,
1244
+ cudnnNormMode_t mode,
1245
+ cudnnNormOps_t normOps,
1246
+ cudnnNormAlgo_t algo,
1247
+ const void *alphaDataDiff,
1248
+ const void *betaDataDiff,
1249
+ const void *alphaParamDiff,
1250
+ const void *betaParamDiff,
1251
+ const cudnnTensorDescriptor_t xDesc,
1252
+ const void *xData,
1253
+ const cudnnTensorDescriptor_t yDesc,
1254
+ const void *yData,
1255
+ const cudnnTensorDescriptor_t dyDesc,
1256
+ const void *dyData,
1257
+ const cudnnTensorDescriptor_t dzDesc,
1258
+ void *dzData,
1259
+ const cudnnTensorDescriptor_t dxDesc,
1260
+ void *dxData,
1261
+ /* Shared tensor desc for the 4 tensors below */
1262
+ const cudnnTensorDescriptor_t dNormScaleBiasDesc,
1263
+ const void *normScaleData,
1264
+ const void *normBiasData, /* needed if there is activation */
1265
+ void *dNormScaleData,
1266
+ void *dNormBiasData,
1267
+ double epsilon, /* Same epsilon as forward pass */
1268
+ const cudnnTensorDescriptor_t normMeanVarDesc,
1269
+ /* Optionally cached intermediate results from
1270
+ forward pass */
1271
+ const void *savedMean,
1272
+ const void *savedInvVariance,
1273
+ cudnnActivationDescriptor_t activationDesc,
1274
+ void *workSpace,
1275
+ size_t workSpaceSizeInBytes,
1276
+ void *reserveSpace,
1277
+ size_t reserveSpaceSizeInBytes,
1278
+ int groupCnt); /* Place hold for future work, should be set to 1 now*/
1279
+
1280
+ cudnnStatus_t CUDNNWINAPI
1281
+ cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle,
1282
+ const cudnnSpatialTransformerDescriptor_t stDesc,
1283
+ const void *dgrid,
1284
+ void *dtheta);
1285
+
1286
+ cudnnStatus_t CUDNNWINAPI
1287
+ cudnnSpatialTfSamplerBackward(cudnnHandle_t handle,
1288
+ cudnnSpatialTransformerDescriptor_t stDesc,
1289
+ const void *alpha,
1290
+ const cudnnTensorDescriptor_t xDesc,
1291
+ const void *x,
1292
+ const void *beta,
1293
+ const cudnnTensorDescriptor_t dxDesc,
1294
+ void *dx,
1295
+ const void *alphaDgrid,
1296
+ const cudnnTensorDescriptor_t dyDesc,
1297
+ const void *dy,
1298
+ const void *grid,
1299
+ const void *betaDgrid,
1300
+ void *dgrid);
1301
+
1302
+ cudnnStatus_t CUDNNWINAPI
1303
+ cudnnDropoutBackward(cudnnHandle_t handle,
1304
+ const cudnnDropoutDescriptor_t dropoutDesc,
1305
+ const cudnnTensorDescriptor_t dydesc,
1306
+ const void *dy,
1307
+ const cudnnTensorDescriptor_t dxdesc,
1308
+ void *dx,
1309
+ void *reserveSpace,
1310
+ size_t reserveSpaceSizeInBytes);
1311
+
1312
+ #if defined(__cplusplus)
1313
+ }
1314
+ #endif
1315
+
1316
+ #endif /* CUDNN_OPS_H_ */