nvidia-cudnn-cu13 9.12.0.46__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nvidia/cudnn/bin/cudnn64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_adv64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_engines_precompiled64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_heuristic64_9.dll +0 -0
- nvidia/cudnn/bin/cudnn_ops64_9.dll +0 -0
- nvidia/cudnn/include/cudnn.h +68 -0
- nvidia/cudnn/include/cudnn_adv.h +669 -0
- nvidia/cudnn/include/cudnn_backend.h +60 -0
- nvidia/cudnn/include/cudnn_cnn.h +693 -0
- nvidia/cudnn/include/cudnn_graph.h +996 -0
- nvidia/cudnn/include/cudnn_ops.h +1316 -0
- nvidia/cudnn/include/cudnn_version.h +70 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/METADATA +47 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/RECORD +20 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/WHEEL +5 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/licenses/License.txt +154 -0
- nvidia_cudnn_cu13-9.12.0.46.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,693 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
|
|
3
|
+
*
|
|
4
|
+
* NOTICE TO LICENSEE:
|
|
5
|
+
*
|
|
6
|
+
* This source code and/or documentation ("Licensed Deliverables") are
|
|
7
|
+
* subject to NVIDIA intellectual property rights under U.S. and
|
|
8
|
+
* international Copyright laws.
|
|
9
|
+
*
|
|
10
|
+
* These Licensed Deliverables contained herein is PROPRIETARY and
|
|
11
|
+
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
|
12
|
+
* conditions of a form of NVIDIA software license agreement by and
|
|
13
|
+
* between NVIDIA and Licensee ("License Agreement") or electronically
|
|
14
|
+
* accepted by Licensee. Notwithstanding any terms or conditions to
|
|
15
|
+
* the contrary in the License Agreement, reproduction or disclosure
|
|
16
|
+
* of the Licensed Deliverables to any third party without the express
|
|
17
|
+
* written consent of NVIDIA is prohibited.
|
|
18
|
+
*
|
|
19
|
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
20
|
+
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
|
21
|
+
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
|
22
|
+
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
|
23
|
+
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
|
24
|
+
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
|
25
|
+
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
26
|
+
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
27
|
+
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
|
28
|
+
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
29
|
+
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
30
|
+
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
31
|
+
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
|
32
|
+
* OF THESE LICENSED DELIVERABLES.
|
|
33
|
+
*
|
|
34
|
+
* U.S. Government End Users. These Licensed Deliverables are a
|
|
35
|
+
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
|
36
|
+
* 1995), consisting of "commercial computer software" and "commercial
|
|
37
|
+
* computer software documentation" as such terms are used in 48
|
|
38
|
+
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
|
39
|
+
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
|
40
|
+
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
|
41
|
+
* U.S. Government End Users acquire the Licensed Deliverables with
|
|
42
|
+
* only those rights set forth herein.
|
|
43
|
+
*
|
|
44
|
+
* Any use of the Licensed Deliverables in individual and commercial
|
|
45
|
+
* software must include, in the user documentation and internal
|
|
46
|
+
* comments to the code, the above Disclaimer and U.S. Government End
|
|
47
|
+
* Users Notice.
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
/*
|
|
51
|
+
* cudnn_cnn : cuDNN's basic definitions and CNN functions.
|
|
52
|
+
*/
|
|
53
|
+
|
|
54
|
+
#if !defined(CUDNN_CNN_H_)
|
|
55
|
+
#define CUDNN_CNN_H_
|
|
56
|
+
|
|
57
|
+
#pragma once
|
|
58
|
+
#include <stdint.h>
|
|
59
|
+
|
|
60
|
+
#include "cudnn_version.h"
|
|
61
|
+
#include "cudnn_ops.h"
|
|
62
|
+
|
|
63
|
+
/* These version numbers are autogenerated, do not edit manually. */
|
|
64
|
+
#define CUDNN_CNN_MAJOR 9
|
|
65
|
+
#define CUDNN_CNN_MINOR 12
|
|
66
|
+
#define CUDNN_CNN_PATCH 0
|
|
67
|
+
|
|
68
|
+
#if (CUDNN_CNN_MAJOR != CUDNN_MAJOR) || (CUDNN_CNN_MINOR != CUDNN_MINOR) || (CUDNN_CNN_PATCH != CUDNN_PATCHLEVEL)
|
|
69
|
+
#error Version mismatch in cuDNN CNN INFER!!!
|
|
70
|
+
#endif
|
|
71
|
+
|
|
72
|
+
#if defined(__cplusplus)
|
|
73
|
+
extern "C" {
|
|
74
|
+
#endif
|
|
75
|
+
|
|
76
|
+
typedef struct cudnnConvolutionStruct *cudnnConvolutionDescriptor_t CUDNN_DEPRECATED;
|
|
77
|
+
|
|
78
|
+
typedef struct cudnnConvolutionFwdAlgoPerfStruct {
|
|
79
|
+
cudnnConvolutionFwdAlgo_t algo;
|
|
80
|
+
cudnnStatus_t status;
|
|
81
|
+
float time;
|
|
82
|
+
size_t memory;
|
|
83
|
+
cudnnDeterminism_t determinism;
|
|
84
|
+
cudnnMathType_t mathType;
|
|
85
|
+
int reserved[3];
|
|
86
|
+
} cudnnConvolutionFwdAlgoPerf_t CUDNN_DEPRECATED;
|
|
87
|
+
|
|
88
|
+
/* Create an instance of convolution descriptor */
|
|
89
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
90
|
+
cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc);
|
|
91
|
+
|
|
92
|
+
/* Destroy an instance of convolution descriptor */
|
|
93
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
94
|
+
cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc);
|
|
95
|
+
|
|
96
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
97
|
+
cudnnSetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType);
|
|
98
|
+
|
|
99
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
100
|
+
cudnnGetConvolutionMathType(cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType);
|
|
101
|
+
|
|
102
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
103
|
+
cudnnSetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int groupCount);
|
|
104
|
+
|
|
105
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
106
|
+
cudnnGetConvolutionGroupCount(cudnnConvolutionDescriptor_t convDesc, int *groupCount);
|
|
107
|
+
|
|
108
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
109
|
+
cudnnSetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType);
|
|
110
|
+
|
|
111
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
112
|
+
cudnnGetConvolutionReorderType(cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType);
|
|
113
|
+
|
|
114
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
115
|
+
cudnnSetConvolution2dDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
|
116
|
+
int pad_h, /* zero-padding height */
|
|
117
|
+
int pad_w, /* zero-padding width */
|
|
118
|
+
int u, /* vertical filter stride */
|
|
119
|
+
int v, /* horizontal filter stride */
|
|
120
|
+
int dilation_h, /* filter dilation in the vertical dimension */
|
|
121
|
+
int dilation_w, /* filter dilation in the horizontal dimension */
|
|
122
|
+
cudnnConvolutionMode_t mode,
|
|
123
|
+
cudnnDataType_t computeType);
|
|
124
|
+
|
|
125
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
126
|
+
cudnnGetConvolution2dDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
|
127
|
+
int *pad_h, /* zero-padding height */
|
|
128
|
+
int *pad_w, /* zero-padding width */
|
|
129
|
+
int *u, /* vertical filter stride */
|
|
130
|
+
int *v, /* horizontal filter stride */
|
|
131
|
+
int *dilation_h, /* filter dilation in the vertical dimension */
|
|
132
|
+
int *dilation_w, /* filter dilation in the horizontal dimension */
|
|
133
|
+
cudnnConvolutionMode_t *mode,
|
|
134
|
+
cudnnDataType_t *computeType);
|
|
135
|
+
|
|
136
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
137
|
+
cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
|
|
138
|
+
int arrayLength, /* nbDims-2 size */
|
|
139
|
+
const int padA[],
|
|
140
|
+
const int filterStrideA[],
|
|
141
|
+
const int dilationA[],
|
|
142
|
+
cudnnConvolutionMode_t mode,
|
|
143
|
+
cudnnDataType_t computeType); /* convolution data type */
|
|
144
|
+
|
|
145
|
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
|
146
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
147
|
+
cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
|
|
148
|
+
int arrayLengthRequested,
|
|
149
|
+
int *arrayLength,
|
|
150
|
+
int padA[],
|
|
151
|
+
int strideA[],
|
|
152
|
+
int dilationA[],
|
|
153
|
+
cudnnConvolutionMode_t *mode,
|
|
154
|
+
cudnnDataType_t *computeType); /* convolution data type */
|
|
155
|
+
|
|
156
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
157
|
+
cudnnGetConvolution2dForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
|
158
|
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
|
159
|
+
const cudnnFilterDescriptor_t filterDesc,
|
|
160
|
+
int *n,
|
|
161
|
+
int *c,
|
|
162
|
+
int *h,
|
|
163
|
+
int *w);
|
|
164
|
+
|
|
165
|
+
/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
|
|
166
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
167
|
+
cudnnGetConvolutionNdForwardOutputDim(const cudnnConvolutionDescriptor_t convDesc,
|
|
168
|
+
const cudnnTensorDescriptor_t inputTensorDesc,
|
|
169
|
+
const cudnnFilterDescriptor_t filterDesc,
|
|
170
|
+
int nbDims,
|
|
171
|
+
int tensorOuputDimA[]);
|
|
172
|
+
|
|
173
|
+
/* helper function to provide the convolution forward algo that fit best the requirement */
|
|
174
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
175
|
+
cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
|
176
|
+
|
|
177
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
178
|
+
cudnnGetConvolutionForwardAlgorithm_v7(cudnnHandle_t handle,
|
|
179
|
+
const cudnnTensorDescriptor_t srcDesc,
|
|
180
|
+
const cudnnFilterDescriptor_t filterDesc,
|
|
181
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
182
|
+
const cudnnTensorDescriptor_t destDesc,
|
|
183
|
+
const int requestedAlgoCount,
|
|
184
|
+
int *returnedAlgoCount,
|
|
185
|
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
|
186
|
+
|
|
187
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
188
|
+
cudnnFindConvolutionForwardAlgorithm(cudnnHandle_t handle,
|
|
189
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
190
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
191
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
192
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
193
|
+
const int requestedAlgoCount,
|
|
194
|
+
int *returnedAlgoCount,
|
|
195
|
+
cudnnConvolutionFwdAlgoPerf_t *perfResults);
|
|
196
|
+
|
|
197
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
198
|
+
cudnnFindConvolutionForwardAlgorithmEx(cudnnHandle_t handle,
|
|
199
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
200
|
+
const void *x,
|
|
201
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
202
|
+
const void *w,
|
|
203
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
204
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
205
|
+
void *y,
|
|
206
|
+
const int requestedAlgoCount,
|
|
207
|
+
int *returnedAlgoCount,
|
|
208
|
+
cudnnConvolutionFwdAlgoPerf_t *perfResults,
|
|
209
|
+
void *workSpace,
|
|
210
|
+
size_t workSpaceSizeInBytes);
|
|
211
|
+
|
|
212
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
213
|
+
cudnnIm2Col(cudnnHandle_t handle,
|
|
214
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
215
|
+
const void *x,
|
|
216
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
217
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
218
|
+
void *colBuffer);
|
|
219
|
+
|
|
220
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
221
|
+
cudnnReorderFilterAndBias(cudnnHandle_t handle,
|
|
222
|
+
const cudnnFilterDescriptor_t filterDesc,
|
|
223
|
+
cudnnReorderType_t reorderType,
|
|
224
|
+
const void *filterData,
|
|
225
|
+
void *reorderedFilterData,
|
|
226
|
+
int reorderBias,
|
|
227
|
+
const void *biasData,
|
|
228
|
+
void *reorderedBiasData);
|
|
229
|
+
|
|
230
|
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
|
231
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
232
|
+
cudnnGetConvolutionForwardWorkspaceSize(cudnnHandle_t handle,
|
|
233
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
234
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
235
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
236
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
237
|
+
cudnnConvolutionFwdAlgo_t algo,
|
|
238
|
+
size_t *sizeInBytes);
|
|
239
|
+
|
|
240
|
+
/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
|
241
|
+
|
|
242
|
+
/* Function to perform the forward pass for batch convolution */
|
|
243
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
244
|
+
cudnnConvolutionForward(cudnnHandle_t handle,
|
|
245
|
+
const void *alpha,
|
|
246
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
247
|
+
const void *x,
|
|
248
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
249
|
+
const void *w,
|
|
250
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
251
|
+
cudnnConvolutionFwdAlgo_t algo,
|
|
252
|
+
void *workSpace,
|
|
253
|
+
size_t workSpaceSizeInBytes,
|
|
254
|
+
const void *beta,
|
|
255
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
256
|
+
void *y);
|
|
257
|
+
|
|
258
|
+
/* Fused conv/bias/activation operation : y = Act( alpha1 * conv(x) + alpha2 * z + bias ) */
|
|
259
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
260
|
+
cudnnConvolutionBiasActivationForward(cudnnHandle_t handle,
|
|
261
|
+
const void *alpha1,
|
|
262
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
263
|
+
const void *x,
|
|
264
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
265
|
+
const void *w,
|
|
266
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
267
|
+
cudnnConvolutionFwdAlgo_t algo,
|
|
268
|
+
void *workSpace,
|
|
269
|
+
size_t workSpaceSizeInBytes,
|
|
270
|
+
const void *alpha2,
|
|
271
|
+
const cudnnTensorDescriptor_t zDesc,
|
|
272
|
+
const void *z,
|
|
273
|
+
const cudnnTensorDescriptor_t biasDesc,
|
|
274
|
+
const void *bias,
|
|
275
|
+
const cudnnActivationDescriptor_t activationDesc,
|
|
276
|
+
const cudnnTensorDescriptor_t yDesc,
|
|
277
|
+
void *y);
|
|
278
|
+
|
|
279
|
+
/* helper function to provide the convolution backward data algo that fit best the requirement */
|
|
280
|
+
|
|
281
|
+
typedef struct cudnnConvolutionBwdDataAlgoPerfStruct {
|
|
282
|
+
cudnnConvolutionBwdDataAlgo_t algo;
|
|
283
|
+
cudnnStatus_t status;
|
|
284
|
+
float time;
|
|
285
|
+
size_t memory;
|
|
286
|
+
cudnnDeterminism_t determinism;
|
|
287
|
+
cudnnMathType_t mathType;
|
|
288
|
+
int reserved[3];
|
|
289
|
+
} cudnnConvolutionBwdDataAlgoPerf_t CUDNN_DEPRECATED;
|
|
290
|
+
|
|
291
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
292
|
+
cudnnGetConvolutionBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
|
293
|
+
|
|
294
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
295
|
+
cudnnFindConvolutionBackwardDataAlgorithm(cudnnHandle_t handle,
|
|
296
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
297
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
298
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
299
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
300
|
+
const int requestedAlgoCount,
|
|
301
|
+
int *returnedAlgoCount,
|
|
302
|
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
|
303
|
+
|
|
304
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
305
|
+
cudnnFindConvolutionBackwardDataAlgorithmEx(cudnnHandle_t handle,
|
|
306
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
307
|
+
const void *w,
|
|
308
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
309
|
+
const void *dy,
|
|
310
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
311
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
312
|
+
void *dx,
|
|
313
|
+
const int requestedAlgoCount,
|
|
314
|
+
int *returnedAlgoCount,
|
|
315
|
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults,
|
|
316
|
+
void *workSpace,
|
|
317
|
+
size_t workSpaceSizeInBytes);
|
|
318
|
+
|
|
319
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
320
|
+
cudnnGetConvolutionBackwardDataAlgorithm_v7(cudnnHandle_t handle,
|
|
321
|
+
const cudnnFilterDescriptor_t filterDesc,
|
|
322
|
+
const cudnnTensorDescriptor_t diffDesc,
|
|
323
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
324
|
+
const cudnnTensorDescriptor_t gradDesc,
|
|
325
|
+
const int requestedAlgoCount,
|
|
326
|
+
int *returnedAlgoCount,
|
|
327
|
+
cudnnConvolutionBwdDataAlgoPerf_t *perfResults);
|
|
328
|
+
|
|
329
|
+
/*
|
|
330
|
+
* convolution algorithm (which requires potentially some workspace)
|
|
331
|
+
*/
|
|
332
|
+
|
|
333
|
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
|
334
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
335
|
+
cudnnGetConvolutionBackwardDataWorkspaceSize(cudnnHandle_t handle,
|
|
336
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
337
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
338
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
339
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
340
|
+
cudnnConvolutionBwdDataAlgo_t algo,
|
|
341
|
+
size_t *sizeInBytes);
|
|
342
|
+
|
|
343
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
344
|
+
cudnnConvolutionBackwardData(cudnnHandle_t handle,
|
|
345
|
+
const void *alpha,
|
|
346
|
+
const cudnnFilterDescriptor_t wDesc,
|
|
347
|
+
const void *w,
|
|
348
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
349
|
+
const void *dy,
|
|
350
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
351
|
+
cudnnConvolutionBwdDataAlgo_t algo,
|
|
352
|
+
void *workSpace,
|
|
353
|
+
size_t workSpaceSizeInBytes,
|
|
354
|
+
const void *beta,
|
|
355
|
+
const cudnnTensorDescriptor_t dxDesc,
|
|
356
|
+
void *dx);
|
|
357
|
+
|
|
358
|
+
/* Helper function to calculate folding descriptors for dgrad */
|
|
359
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
360
|
+
cudnnGetFoldedConvBackwardDataDescriptors(const cudnnHandle_t handle,
|
|
361
|
+
const cudnnFilterDescriptor_t filterDesc,
|
|
362
|
+
const cudnnTensorDescriptor_t diffDesc,
|
|
363
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
364
|
+
const cudnnTensorDescriptor_t gradDesc,
|
|
365
|
+
const cudnnTensorFormat_t transformFormat,
|
|
366
|
+
cudnnFilterDescriptor_t foldedFilterDesc,
|
|
367
|
+
cudnnTensorDescriptor_t paddedDiffDesc,
|
|
368
|
+
cudnnConvolutionDescriptor_t foldedConvDesc,
|
|
369
|
+
cudnnTensorDescriptor_t foldedGradDesc,
|
|
370
|
+
cudnnTensorTransformDescriptor_t filterFoldTransDesc,
|
|
371
|
+
cudnnTensorTransformDescriptor_t diffPadTransDesc,
|
|
372
|
+
cudnnTensorTransformDescriptor_t gradFoldTransDesc,
|
|
373
|
+
cudnnTensorTransformDescriptor_t gradUnfoldTransDesc);
|
|
374
|
+
|
|
375
|
+
/* cudnnFusedOps... */
|
|
376
|
+
struct cudnnFusedOpsConstParamStruct;
|
|
377
|
+
typedef struct cudnnFusedOpsConstParamStruct *cudnnFusedOpsConstParamPack_t CUDNN_DEPRECATED;
|
|
378
|
+
|
|
379
|
+
struct cudnnFusedOpsVariantParamStruct;
|
|
380
|
+
typedef struct cudnnFusedOpsVariantParamStruct *cudnnFusedOpsVariantParamPack_t CUDNN_DEPRECATED;
|
|
381
|
+
|
|
382
|
+
struct cudnnFusedOpsPlanStruct;
|
|
383
|
+
typedef struct cudnnFusedOpsPlanStruct *cudnnFusedOpsPlan_t CUDNN_DEPRECATED;
|
|
384
|
+
|
|
385
|
+
typedef enum {
|
|
386
|
+
/* each op in [ ] can be disabled by passing NULL ptr */
|
|
387
|
+
/* [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] */
|
|
388
|
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_CONV_BNSTATS = 0,
|
|
389
|
+
/* [per channel scale], [per channel bias], [activation], convolutionBackwardWeights */
|
|
390
|
+
CUDNN_FUSED_SCALE_BIAS_ACTIVATION_WGRAD = 1,
|
|
391
|
+
/* utility for BN training in BN-conv fusion */
|
|
392
|
+
/* computes the equivalent scale and bias from ySum ySqSum and learned scale, bias */
|
|
393
|
+
/* optionally update running stats and generate saved stats */
|
|
394
|
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_TRAINING = 2,
|
|
395
|
+
/* utility for BN inference in BN-conv fusion */
|
|
396
|
+
/* computes the equivalent scale and bias from learned running stats and learned scale, bias */
|
|
397
|
+
CUDNN_FUSED_BN_FINALIZE_STATISTICS_INFERENCE = 3,
|
|
398
|
+
/* reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] */
|
|
399
|
+
CUDNN_FUSED_CONV_SCALE_BIAS_ADD_ACTIVATION = 4,
|
|
400
|
+
/* reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask */
|
|
401
|
+
CUDNN_FUSED_SCALE_BIAS_ADD_ACTIVATION_GEN_BITMASK = 5,
|
|
402
|
+
/* reserved for future use */
|
|
403
|
+
CUDNN_FUSED_DACTIVATION_FORK_DBATCHNORM = 6,
|
|
404
|
+
} cudnnFusedOps_t CUDNN_DEPRECATED;
|
|
405
|
+
|
|
406
|
+
typedef enum {
|
|
407
|
+
/* set XDESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
408
|
+
/* get XDESC: pass previously created cudnnTensorDescriptor_t */
|
|
409
|
+
CUDNN_PARAM_XDESC = 0,
|
|
410
|
+
/* set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
411
|
+
CUDNN_PARAM_XDATA_PLACEHOLDER = 1,
|
|
412
|
+
/* set/get BN_MODE: pass cudnnBatchNormMode_t* */
|
|
413
|
+
CUDNN_PARAM_BN_MODE = 2,
|
|
414
|
+
/* set CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
415
|
+
/* get CUDNN_PARAM_BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
|
416
|
+
CUDNN_PARAM_BN_EQSCALEBIAS_DESC = 3,
|
|
417
|
+
/* set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
418
|
+
CUDNN_PARAM_BN_EQSCALE_PLACEHOLDER = 4,
|
|
419
|
+
/* set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
420
|
+
CUDNN_PARAM_BN_EQBIAS_PLACEHOLDER = 5,
|
|
421
|
+
/* set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t */
|
|
422
|
+
/* get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t */
|
|
423
|
+
CUDNN_PARAM_ACTIVATION_DESC = 6,
|
|
424
|
+
/* set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t */
|
|
425
|
+
/* get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t */
|
|
426
|
+
CUDNN_PARAM_CONV_DESC = 7,
|
|
427
|
+
/* set WDESC: pass previously initialized cudnnFilterDescriptor_t */
|
|
428
|
+
/* get WDESC: pass previously created cudnnFilterDescriptor_t */
|
|
429
|
+
CUDNN_PARAM_WDESC = 8,
|
|
430
|
+
/* set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
431
|
+
CUDNN_PARAM_WDATA_PLACEHOLDER = 9,
|
|
432
|
+
/* set DWDESC: pass previously initialized cudnnFilterDescriptor_t */
|
|
433
|
+
/* get DWDESC: pass previously created cudnnFilterDescriptor_t */
|
|
434
|
+
CUDNN_PARAM_DWDESC = 10,
|
|
435
|
+
/* set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
436
|
+
CUDNN_PARAM_DWDATA_PLACEHOLDER = 11,
|
|
437
|
+
/* set YDESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
438
|
+
/* get YDESC: pass previously created cudnnTensorDescriptor_t */
|
|
439
|
+
CUDNN_PARAM_YDESC = 12,
|
|
440
|
+
/* set/get YDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
441
|
+
CUDNN_PARAM_YDATA_PLACEHOLDER = 13,
|
|
442
|
+
/* set DYDESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
443
|
+
/* get DYDESC: pass previously created cudnnTensorDescriptor_t */
|
|
444
|
+
CUDNN_PARAM_DYDESC = 14,
|
|
445
|
+
/* set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
446
|
+
CUDNN_PARAM_DYDATA_PLACEHOLDER = 15,
|
|
447
|
+
/* set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
448
|
+
/* get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t */
|
|
449
|
+
CUDNN_PARAM_YSTATS_DESC = 16,
|
|
450
|
+
/* set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
451
|
+
CUDNN_PARAM_YSUM_PLACEHOLDER = 17,
|
|
452
|
+
/* set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
453
|
+
CUDNN_PARAM_YSQSUM_PLACEHOLDER = 18,
|
|
454
|
+
/* set CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
455
|
+
/* get CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t */
|
|
456
|
+
CUDNN_PARAM_BN_SCALEBIAS_MEANVAR_DESC = 19,
|
|
457
|
+
/* set/get CUDNN_PARAM_BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
458
|
+
CUDNN_PARAM_BN_SCALE_PLACEHOLDER = 20,
|
|
459
|
+
/* set/get CUDNN_PARAM_BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
460
|
+
CUDNN_PARAM_BN_BIAS_PLACEHOLDER = 21,
|
|
461
|
+
/* set/get CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
462
|
+
CUDNN_PARAM_BN_SAVED_MEAN_PLACEHOLDER = 22,
|
|
463
|
+
/* set/get CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
464
|
+
CUDNN_PARAM_BN_SAVED_INVSTD_PLACEHOLDER = 23,
|
|
465
|
+
/* set/get CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
466
|
+
CUDNN_PARAM_BN_RUNNING_MEAN_PLACEHOLDER = 24,
|
|
467
|
+
/* set/get CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
468
|
+
CUDNN_PARAM_BN_RUNNING_VAR_PLACEHOLDER = 25,
|
|
469
|
+
|
|
470
|
+
/* set ZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
471
|
+
/* get ZDESC: pass previously created cudnnTensorDescriptor_t */
|
|
472
|
+
CUDNN_PARAM_ZDESC = 26,
|
|
473
|
+
/* set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
474
|
+
CUDNN_PARAM_ZDATA_PLACEHOLDER = 27,
|
|
475
|
+
/* set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
476
|
+
/* get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t */
|
|
477
|
+
CUDNN_PARAM_BN_Z_EQSCALEBIAS_DESC = 28,
|
|
478
|
+
/* set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
479
|
+
CUDNN_PARAM_BN_Z_EQSCALE_PLACEHOLDER = 29,
|
|
480
|
+
/* set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
481
|
+
CUDNN_PARAM_BN_Z_EQBIAS_PLACEHOLDER = 30,
|
|
482
|
+
|
|
483
|
+
/* set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
484
|
+
/* get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t */
|
|
485
|
+
CUDNN_PARAM_ACTIVATION_BITMASK_DESC = 31,
|
|
486
|
+
/* set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
487
|
+
CUDNN_PARAM_ACTIVATION_BITMASK_PLACEHOLDER = 32,
|
|
488
|
+
|
|
489
|
+
/* set DXDESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
490
|
+
/* get DXDESC: pass previously created cudnnTensorDescriptor_t */
|
|
491
|
+
CUDNN_PARAM_DXDESC = 33,
|
|
492
|
+
/* set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
493
|
+
CUDNN_PARAM_DXDATA_PLACEHOLDER = 34,
|
|
494
|
+
/* set DZDESC: pass previously initialized cudnnTensorDescriptor_t */
|
|
495
|
+
/* get DZDESC: pass previously created cudnnTensorDescriptor_t */
|
|
496
|
+
CUDNN_PARAM_DZDESC = 35,
|
|
497
|
+
/* set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
498
|
+
CUDNN_PARAM_DZDATA_PLACEHOLDER = 36,
|
|
499
|
+
/* set/get CUDNN_PARAM_BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
500
|
+
CUDNN_PARAM_BN_DSCALE_PLACEHOLDER = 37,
|
|
501
|
+
/* set/get CUDNN_PARAM_BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* */
|
|
502
|
+
CUDNN_PARAM_BN_DBIAS_PLACEHOLDER = 38,
|
|
503
|
+
} cudnnFusedOpsConstParamLabel_t CUDNN_DEPRECATED;
|
|
504
|
+
|
|
505
|
+
typedef enum {
|
|
506
|
+
CUDNN_PTR_NULL = 0,
|
|
507
|
+
CUDNN_PTR_ELEM_ALIGNED = 1,
|
|
508
|
+
CUDNN_PTR_16B_ALIGNED = 2,
|
|
509
|
+
} cudnnFusedOpsPointerPlaceHolder_t CUDNN_DEPRECATED;
|
|
510
|
+
|
|
511
|
+
typedef enum {
|
|
512
|
+
/* set: pass void* pointing to dev memory */
|
|
513
|
+
/* get: pass void** pointing to host memory */
|
|
514
|
+
CUDNN_PTR_XDATA = 0,
|
|
515
|
+
CUDNN_PTR_BN_EQSCALE = 1,
|
|
516
|
+
CUDNN_PTR_BN_EQBIAS = 2,
|
|
517
|
+
CUDNN_PTR_WDATA = 3,
|
|
518
|
+
CUDNN_PTR_DWDATA = 4,
|
|
519
|
+
CUDNN_PTR_YDATA = 5,
|
|
520
|
+
CUDNN_PTR_DYDATA = 6,
|
|
521
|
+
CUDNN_PTR_YSUM = 7,
|
|
522
|
+
CUDNN_PTR_YSQSUM = 8,
|
|
523
|
+
CUDNN_PTR_WORKSPACE = 9,
|
|
524
|
+
CUDNN_PTR_BN_SCALE = 10,
|
|
525
|
+
CUDNN_PTR_BN_BIAS = 11,
|
|
526
|
+
CUDNN_PTR_BN_SAVED_MEAN = 12,
|
|
527
|
+
CUDNN_PTR_BN_SAVED_INVSTD = 13,
|
|
528
|
+
CUDNN_PTR_BN_RUNNING_MEAN = 14,
|
|
529
|
+
CUDNN_PTR_BN_RUNNING_VAR = 15,
|
|
530
|
+
CUDNN_PTR_ZDATA = 16,
|
|
531
|
+
CUDNN_PTR_BN_Z_EQSCALE = 17,
|
|
532
|
+
CUDNN_PTR_BN_Z_EQBIAS = 18,
|
|
533
|
+
CUDNN_PTR_ACTIVATION_BITMASK = 19,
|
|
534
|
+
CUDNN_PTR_DXDATA = 20,
|
|
535
|
+
CUDNN_PTR_DZDATA = 21,
|
|
536
|
+
CUDNN_PTR_BN_DSCALE = 22,
|
|
537
|
+
CUDNN_PTR_BN_DBIAS = 23,
|
|
538
|
+
|
|
539
|
+
/* set/get: pass size_t* pointing to host memory */
|
|
540
|
+
CUDNN_SCALAR_SIZE_T_WORKSPACE_SIZE_IN_BYTES = 100,
|
|
541
|
+
/* set/get: pass int64_t* pointing to host memory */
|
|
542
|
+
CUDNN_SCALAR_INT64_T_BN_ACCUMULATION_COUNT = 101,
|
|
543
|
+
/* set/get: pass double* pointing to host memory */
|
|
544
|
+
CUDNN_SCALAR_DOUBLE_BN_EXP_AVG_FACTOR = 102,
|
|
545
|
+
/* set/get: pass double* pointing to host memory */
|
|
546
|
+
CUDNN_SCALAR_DOUBLE_BN_EPSILON = 103,
|
|
547
|
+
} cudnnFusedOpsVariantParamLabel_t CUDNN_DEPRECATED;
|
|
548
|
+
|
|
549
|
+
cudnnStatus_t CUDNNWINAPI
|
|
550
|
+
cudnnCnnVersionCheck(void);
|
|
551
|
+
|
|
552
|
+
/* helper function to provide the convolution backward filter algo that fit best the requirement */
|
|
553
|
+
|
|
554
|
+
typedef struct cudnnConvolutionBwdFilterAlgoPerfStruct {
|
|
555
|
+
cudnnConvolutionBwdFilterAlgo_t algo;
|
|
556
|
+
cudnnStatus_t status;
|
|
557
|
+
float time;
|
|
558
|
+
size_t memory;
|
|
559
|
+
cudnnDeterminism_t determinism;
|
|
560
|
+
cudnnMathType_t mathType;
|
|
561
|
+
int reserved[3];
|
|
562
|
+
} cudnnConvolutionBwdFilterAlgoPerf_t CUDNN_DEPRECATED;
|
|
563
|
+
|
|
564
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
565
|
+
cudnnGetConvolutionBackwardFilterAlgorithmMaxCount(cudnnHandle_t handle, int *count);
|
|
566
|
+
|
|
567
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
568
|
+
cudnnFindConvolutionBackwardFilterAlgorithm(cudnnHandle_t handle,
|
|
569
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
570
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
571
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
572
|
+
const cudnnFilterDescriptor_t dwDesc,
|
|
573
|
+
const int requestedAlgoCount,
|
|
574
|
+
int *returnedAlgoCount,
|
|
575
|
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
|
|
576
|
+
|
|
577
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
578
|
+
cudnnFindConvolutionBackwardFilterAlgorithmEx(cudnnHandle_t handle,
|
|
579
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
580
|
+
const void *x,
|
|
581
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
582
|
+
const void *y,
|
|
583
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
584
|
+
const cudnnFilterDescriptor_t dwDesc,
|
|
585
|
+
void *dw,
|
|
586
|
+
const int requestedAlgoCount,
|
|
587
|
+
int *returnedAlgoCount,
|
|
588
|
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults,
|
|
589
|
+
void *workSpace,
|
|
590
|
+
size_t workSpaceSizeInBytes);
|
|
591
|
+
|
|
592
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
593
|
+
cudnnGetConvolutionBackwardFilterAlgorithm_v7(cudnnHandle_t handle,
|
|
594
|
+
const cudnnTensorDescriptor_t srcDesc,
|
|
595
|
+
const cudnnTensorDescriptor_t diffDesc,
|
|
596
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
597
|
+
const cudnnFilterDescriptor_t gradDesc,
|
|
598
|
+
const int requestedAlgoCount,
|
|
599
|
+
int *returnedAlgoCount,
|
|
600
|
+
cudnnConvolutionBwdFilterAlgoPerf_t *perfResults);
|
|
601
|
+
|
|
602
|
+
/*
|
|
603
|
+
* convolution algorithm (which requires potentially some workspace)
|
|
604
|
+
*/
|
|
605
|
+
|
|
606
|
+
/* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
|
|
607
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
608
|
+
cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnnHandle_t handle,
|
|
609
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
610
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
611
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
612
|
+
const cudnnFilterDescriptor_t gradDesc,
|
|
613
|
+
cudnnConvolutionBwdFilterAlgo_t algo,
|
|
614
|
+
size_t *sizeInBytes);
|
|
615
|
+
|
|
616
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
617
|
+
cudnnConvolutionBackwardFilter(cudnnHandle_t handle,
|
|
618
|
+
const void *alpha,
|
|
619
|
+
const cudnnTensorDescriptor_t xDesc,
|
|
620
|
+
const void *x,
|
|
621
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
622
|
+
const void *dy,
|
|
623
|
+
const cudnnConvolutionDescriptor_t convDesc,
|
|
624
|
+
cudnnConvolutionBwdFilterAlgo_t algo,
|
|
625
|
+
void *workSpace,
|
|
626
|
+
size_t workSpaceSizeInBytes,
|
|
627
|
+
const void *beta,
|
|
628
|
+
const cudnnFilterDescriptor_t dwDesc,
|
|
629
|
+
void *dw);
|
|
630
|
+
|
|
631
|
+
/* Function to compute the bias gradient for batch convolution */
|
|
632
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
633
|
+
cudnnConvolutionBackwardBias(cudnnHandle_t handle,
|
|
634
|
+
const void *alpha,
|
|
635
|
+
const cudnnTensorDescriptor_t dyDesc,
|
|
636
|
+
const void *dy,
|
|
637
|
+
const void *beta,
|
|
638
|
+
const cudnnTensorDescriptor_t dbDesc,
|
|
639
|
+
void *db);
|
|
640
|
+
|
|
641
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
642
|
+
cudnnCreateFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops);
|
|
643
|
+
|
|
644
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
645
|
+
cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack);
|
|
646
|
+
|
|
647
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
648
|
+
cudnnSetFusedOpsConstParamPackAttribute(cudnnFusedOpsConstParamPack_t constPack,
|
|
649
|
+
cudnnFusedOpsConstParamLabel_t paramLabel,
|
|
650
|
+
const void *param);
|
|
651
|
+
|
|
652
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
653
|
+
cudnnGetFusedOpsConstParamPackAttribute(const cudnnFusedOpsConstParamPack_t constPack,
|
|
654
|
+
cudnnFusedOpsConstParamLabel_t paramLabel,
|
|
655
|
+
void *param,
|
|
656
|
+
int *isNULL);
|
|
657
|
+
|
|
658
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
659
|
+
cudnnCreateFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops);
|
|
660
|
+
|
|
661
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
662
|
+
cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack);
|
|
663
|
+
|
|
664
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
665
|
+
cudnnSetFusedOpsVariantParamPackAttribute(cudnnFusedOpsVariantParamPack_t varPack,
|
|
666
|
+
cudnnFusedOpsVariantParamLabel_t paramLabel,
|
|
667
|
+
void *ptr);
|
|
668
|
+
|
|
669
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
670
|
+
cudnnGetFusedOpsVariantParamPackAttribute(const cudnnFusedOpsVariantParamPack_t varPack,
|
|
671
|
+
cudnnFusedOpsVariantParamLabel_t paramLabel,
|
|
672
|
+
void *ptr);
|
|
673
|
+
|
|
674
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
675
|
+
cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, cudnnFusedOps_t ops);
|
|
676
|
+
|
|
677
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
678
|
+
cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan);
|
|
679
|
+
|
|
680
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
681
|
+
cudnnMakeFusedOpsPlan(cudnnHandle_t handle,
|
|
682
|
+
cudnnFusedOpsPlan_t plan,
|
|
683
|
+
const cudnnFusedOpsConstParamPack_t constPack,
|
|
684
|
+
size_t *workspaceSizeInBytes);
|
|
685
|
+
|
|
686
|
+
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
687
|
+
cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, cudnnFusedOpsVariantParamPack_t varPack);
|
|
688
|
+
|
|
689
|
+
#if defined(__cplusplus)
|
|
690
|
+
}
|
|
691
|
+
#endif
|
|
692
|
+
|
|
693
|
+
#endif /* CUDNN_CNN_H_ */
|