nvidia-cudnn-cu12 8.9.7.29__py3-none-win_amd64.whl → 9.0.0.312__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. nvidia/cudnn/bin/cudnn64_9.dll +0 -0
  2. nvidia/cudnn/bin/{cudnn_adv_infer64_8.dll → cudnn_adv64_9.dll} +0 -0
  3. nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
  4. nvidia/cudnn/bin/{cudnn_cnn_infer64_8.dll → cudnn_engines_precompiled64_9.dll} +0 -0
  5. nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
  6. nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
  7. nvidia/cudnn/bin/{cudnn_ops_infer64_8.dll → cudnn_heuristic64_9.dll} +0 -0
  8. nvidia/cudnn/bin/{cudnn_adv_train64_8.dll → cudnn_ops64_9.dll} +0 -0
  9. nvidia/cudnn/include/cudnn.h +8 -18
  10. nvidia/cudnn/include/{cudnn_adv_infer.h → cudnn_adv.h} +265 -252
  11. nvidia/cudnn/include/cudnn_backend.h +3 -558
  12. nvidia/cudnn/include/{cudnn_cnn_infer.h → cudnn_cnn.h} +187 -65
  13. nvidia/cudnn/include/cudnn_graph.h +908 -0
  14. nvidia/cudnn/include/{cudnn_ops_infer.h → cudnn_ops.h} +469 -336
  15. nvidia/cudnn/include/cudnn_version.h +4 -43
  16. nvidia/cudnn/lib/x64/cudnn.lib +0 -0
  17. nvidia/cudnn/lib/x64/cudnn64_9.lib +0 -0
  18. nvidia/cudnn/lib/x64/cudnn_adv.lib +0 -0
  19. nvidia/cudnn/lib/x64/cudnn_adv64_9.lib +0 -0
  20. nvidia/cudnn/lib/x64/cudnn_cnn.lib +0 -0
  21. nvidia/cudnn/lib/x64/cudnn_cnn64_9.lib +0 -0
  22. nvidia/cudnn/lib/x64/cudnn_engines_precompiled.lib +0 -0
  23. nvidia/cudnn/lib/x64/cudnn_engines_precompiled64_9.lib +0 -0
  24. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled.lib +0 -0
  25. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled64_9.lib +0 -0
  26. nvidia/cudnn/lib/x64/cudnn_graph.lib +0 -0
  27. nvidia/cudnn/lib/x64/cudnn_graph64_9.lib +0 -0
  28. nvidia/cudnn/lib/x64/cudnn_heuristic.lib +0 -0
  29. nvidia/cudnn/lib/x64/cudnn_heuristic64_9.lib +0 -0
  30. nvidia/cudnn/lib/x64/cudnn_ops.lib +0 -0
  31. nvidia/cudnn/lib/x64/cudnn_ops64_9.lib +0 -0
  32. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/METADATA +1 -1
  33. nvidia_cudnn_cu12-9.0.0.312.dist-info/RECORD +41 -0
  34. nvidia/cudnn/bin/cudnn64_8.dll +0 -0
  35. nvidia/cudnn/bin/cudnn_cnn_train64_8.dll +0 -0
  36. nvidia/cudnn/bin/cudnn_ops_train64_8.dll +0 -0
  37. nvidia/cudnn/include/cudnn_adv_train.h +0 -540
  38. nvidia/cudnn/include/cudnn_cnn_train.h +0 -219
  39. nvidia/cudnn/include/cudnn_ops_train.h +0 -501
  40. nvidia/cudnn/lib/x64/cudnn64_8.lib +0 -0
  41. nvidia/cudnn/lib/x64/cudnn_adv_infer.lib +0 -0
  42. nvidia/cudnn/lib/x64/cudnn_adv_infer64_8.lib +0 -0
  43. nvidia/cudnn/lib/x64/cudnn_adv_train.lib +0 -0
  44. nvidia/cudnn/lib/x64/cudnn_adv_train64_8.lib +0 -0
  45. nvidia/cudnn/lib/x64/cudnn_cnn_infer.lib +0 -0
  46. nvidia/cudnn/lib/x64/cudnn_cnn_infer64_8.lib +0 -0
  47. nvidia/cudnn/lib/x64/cudnn_cnn_train.lib +0 -0
  48. nvidia/cudnn/lib/x64/cudnn_cnn_train64_8.lib +0 -0
  49. nvidia/cudnn/lib/x64/cudnn_ops_infer.lib +0 -0
  50. nvidia/cudnn/lib/x64/cudnn_ops_infer64_8.lib +0 -0
  51. nvidia/cudnn/lib/x64/cudnn_ops_train.lib +0 -0
  52. nvidia/cudnn/lib/x64/cudnn_ops_train64_8.lib +0 -0
  53. nvidia_cudnn_cu12-8.9.7.29.dist-info/RECORD +0 -40
  54. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/License.txt +0 -0
  55. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/WHEEL +0 -0
  56. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/top_level.txt +0 -0
@@ -1,540 +0,0 @@
1
- /*
2
- * Copyright 2014-2023 NVIDIA Corporation. All rights reserved.
3
- *
4
- * NOTICE TO LICENSEE:
5
- *
6
- * This source code and/or documentation ("Licensed Deliverables") are
7
- * subject to NVIDIA intellectual property rights under U.S. and
8
- * international Copyright laws.
9
- *
10
- * These Licensed Deliverables contained herein is PROPRIETARY and
11
- * CONFIDENTIAL to NVIDIA and is being provided under the terms and
12
- * conditions of a form of NVIDIA software license agreement by and
13
- * between NVIDIA and Licensee ("License Agreement") or electronically
14
- * accepted by Licensee. Notwithstanding any terms or conditions to
15
- * the contrary in the License Agreement, reproduction or disclosure
16
- * of the Licensed Deliverables to any third party without the express
17
- * written consent of NVIDIA is prohibited.
18
- *
19
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
20
- * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
21
- * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
22
- * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
23
- * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
24
- * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
25
- * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
26
- * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
27
- * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
28
- * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
29
- * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
30
- * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
31
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
32
- * OF THESE LICENSED DELIVERABLES.
33
- *
34
- * U.S. Government End Users. These Licensed Deliverables are a
35
- * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
36
- * 1995), consisting of "commercial computer software" and "commercial
37
- * computer software documentation" as such terms are used in 48
38
- * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
39
- * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
40
- * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
41
- * U.S. Government End Users acquire the Licensed Deliverables with
42
- * only those rights set forth herein.
43
- *
44
- * Any use of the Licensed Deliverables in individual and commercial
45
- * software must include, in the user documentation and internal
46
- * comments to the code, the above Disclaimer and U.S. Government End
47
- * Users Notice.
48
- */
49
-
50
- /* cudnn_adv_train : cuDNN's advanced and experimental features.
51
-
52
- */
53
-
54
- #if !defined(CUDNN_ADV_TRAIN_H_)
55
- #define CUDNN_ADV_TRAIN_H_
56
-
57
- #include <cuda_runtime.h>
58
- #include <stdint.h>
59
-
60
- #include "cudnn_version.h"
61
- #include "cudnn_ops_infer.h"
62
- #include "cudnn_ops_train.h"
63
- #include "cudnn_adv_infer.h"
64
-
65
- /* These version numbers are autogenerated, do not edit manually. */
66
- #define CUDNN_ADV_TRAIN_MAJOR 8
67
- #define CUDNN_ADV_TRAIN_MINOR 9
68
- #define CUDNN_ADV_TRAIN_PATCH 7
69
-
70
- #if (CUDNN_ADV_TRAIN_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_TRAIN_MINOR != CUDNN_MINOR) || \
71
- (CUDNN_ADV_TRAIN_PATCH != CUDNN_PATCHLEVEL)
72
- #error Version mismatch in cuDNN ADV TRAIN!!!
73
- #endif
74
-
75
- #if defined(__cplusplus)
76
- extern "C" {
77
- #endif
78
-
79
- typedef enum {
80
- CUDNN_WGRAD_MODE_ADD = 0, /* add partial gradients to wgrad output buffers */
81
- CUDNN_WGRAD_MODE_SET = 1, /* write partial gradients to wgrad output buffers */
82
- } cudnnWgradMode_t;
83
-
84
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
85
- cudnnRNNForwardTraining(cudnnHandle_t handle,
86
- const cudnnRNNDescriptor_t rnnDesc,
87
- const int seqLength,
88
- const cudnnTensorDescriptor_t *xDesc,
89
- const void *x,
90
- const cudnnTensorDescriptor_t hxDesc,
91
- const void *hx,
92
- const cudnnTensorDescriptor_t cxDesc,
93
- const void *cx,
94
- const cudnnFilterDescriptor_t wDesc,
95
- const void *w,
96
- const cudnnTensorDescriptor_t *yDesc,
97
- void *y,
98
- const cudnnTensorDescriptor_t hyDesc,
99
- void *hy,
100
- const cudnnTensorDescriptor_t cyDesc,
101
- void *cy,
102
- void *workSpace,
103
- size_t workSpaceSizeInBytes,
104
- void *reserveSpace,
105
- size_t reserveSpaceSizeInBytes);
106
-
107
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
108
- cudnnRNNBackwardData(cudnnHandle_t handle,
109
- const cudnnRNNDescriptor_t rnnDesc,
110
- const int seqLength,
111
- const cudnnTensorDescriptor_t *yDesc,
112
- const void *y,
113
- const cudnnTensorDescriptor_t *dyDesc,
114
- const void *dy,
115
- const cudnnTensorDescriptor_t dhyDesc,
116
- const void *dhy,
117
- const cudnnTensorDescriptor_t dcyDesc,
118
- const void *dcy,
119
- const cudnnFilterDescriptor_t wDesc,
120
- const void *w,
121
- const cudnnTensorDescriptor_t hxDesc,
122
- const void *hx,
123
- const cudnnTensorDescriptor_t cxDesc,
124
- const void *cx,
125
- const cudnnTensorDescriptor_t *dxDesc,
126
- void *dx,
127
- const cudnnTensorDescriptor_t dhxDesc,
128
- void *dhx,
129
- const cudnnTensorDescriptor_t dcxDesc,
130
- void *dcx,
131
- void *workSpace,
132
- size_t workSpaceSizeInBytes,
133
- void *reserveSpace,
134
- size_t reserveSpaceSizeInBytes);
135
-
136
- cudnnStatus_t CUDNNWINAPI
137
- cudnnRNNBackwardData_v8(cudnnHandle_t handle,
138
- cudnnRNNDescriptor_t rnnDesc,
139
- const int32_t devSeqLengths[],
140
- cudnnRNNDataDescriptor_t yDesc,
141
- const void *y,
142
- const void *dy,
143
- cudnnRNNDataDescriptor_t xDesc,
144
- void *dx,
145
- cudnnTensorDescriptor_t hDesc,
146
- const void *hx,
147
- const void *dhy,
148
- void *dhx,
149
- cudnnTensorDescriptor_t cDesc,
150
- const void *cx,
151
- const void *dcy,
152
- void *dcx,
153
- size_t weightSpaceSize,
154
- const void *weightSpace,
155
- size_t workSpaceSize,
156
- void *workSpace,
157
- size_t reserveSpaceSize,
158
- void *reserveSpace);
159
-
160
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
161
- cudnnRNNBackwardWeights(cudnnHandle_t handle,
162
- const cudnnRNNDescriptor_t rnnDesc,
163
- const int seqLength,
164
- const cudnnTensorDescriptor_t *xDesc,
165
- const void *x,
166
- const cudnnTensorDescriptor_t hxDesc,
167
- const void *hx,
168
- const cudnnTensorDescriptor_t *yDesc,
169
- const void *y,
170
- const void *workSpace,
171
- size_t workSpaceSizeInBytes,
172
- const cudnnFilterDescriptor_t dwDesc,
173
- void *dw,
174
- const void *reserveSpace,
175
- size_t reserveSpaceSizeInBytes);
176
-
177
- cudnnStatus_t CUDNNWINAPI
178
- cudnnRNNBackwardWeights_v8(cudnnHandle_t handle,
179
- cudnnRNNDescriptor_t rnnDesc,
180
- cudnnWgradMode_t addGrad,
181
- const int32_t devSeqLengths[],
182
- cudnnRNNDataDescriptor_t xDesc,
183
- const void *x,
184
- cudnnTensorDescriptor_t hDesc,
185
- const void *hx,
186
- cudnnRNNDataDescriptor_t yDesc,
187
- const void *y,
188
- size_t weightSpaceSize,
189
- void *dweightSpace,
190
- size_t workSpaceSize,
191
- void *workSpace,
192
- size_t reserveSpaceSize,
193
- void *reserveSpace);
194
-
195
- /* RNN EX API */
196
-
197
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
198
- cudnnRNNForwardTrainingEx(cudnnHandle_t handle,
199
- const cudnnRNNDescriptor_t rnnDesc,
200
- const cudnnRNNDataDescriptor_t xDesc,
201
- const void *x,
202
- const cudnnTensorDescriptor_t hxDesc,
203
- const void *hx,
204
- const cudnnTensorDescriptor_t cxDesc,
205
- const void *cx,
206
- const cudnnFilterDescriptor_t wDesc,
207
- const void *w,
208
- const cudnnRNNDataDescriptor_t yDesc,
209
- void *y,
210
- const cudnnTensorDescriptor_t hyDesc,
211
- void *hy,
212
- const cudnnTensorDescriptor_t cyDesc,
213
- void *cy,
214
- const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
215
- const void *keys, /* reserved, should pass NULL */
216
- const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
217
- void *cAttn, /* reserved, should pass NULL */
218
- const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
219
- void *iAttn, /* reserved, should pass NULL */
220
- const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
221
- void *queries, /* reserved, should pass NULL */
222
- void *workSpace,
223
- size_t workSpaceSizeInBytes,
224
- void *reserveSpace,
225
- size_t reserveSpaceSizeInBytes);
226
-
227
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
228
- cudnnRNNBackwardDataEx(cudnnHandle_t handle,
229
- const cudnnRNNDescriptor_t rnnDesc,
230
- const cudnnRNNDataDescriptor_t yDesc,
231
- const void *y,
232
- const cudnnRNNDataDescriptor_t dyDesc,
233
- const void *dy,
234
- const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */
235
- const void *dcAttn, /* reserved, should pass NULL */
236
- const cudnnTensorDescriptor_t dhyDesc,
237
- const void *dhy,
238
- const cudnnTensorDescriptor_t dcyDesc,
239
- const void *dcy,
240
- const cudnnFilterDescriptor_t wDesc,
241
- const void *w,
242
- const cudnnTensorDescriptor_t hxDesc,
243
- const void *hx,
244
- const cudnnTensorDescriptor_t cxDesc,
245
- const void *cx,
246
- const cudnnRNNDataDescriptor_t dxDesc,
247
- void *dx,
248
- const cudnnTensorDescriptor_t dhxDesc,
249
- void *dhx,
250
- const cudnnTensorDescriptor_t dcxDesc,
251
- void *dcx,
252
- const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */
253
- void *dkeys, /* reserved, should pass NULL */
254
- void *workSpace,
255
- size_t workSpaceSizeInBytes,
256
- void *reserveSpace,
257
- size_t reserveSpaceSizeInBytes);
258
-
259
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
260
- cudnnRNNBackwardWeightsEx(cudnnHandle_t handle,
261
- const cudnnRNNDescriptor_t rnnDesc,
262
- const cudnnRNNDataDescriptor_t xDesc,
263
- const void *x,
264
- const cudnnTensorDescriptor_t hxDesc,
265
- const void *hx,
266
- const cudnnRNNDataDescriptor_t yDesc,
267
- const void *y,
268
- void *workSpace,
269
- size_t workSpaceSizeInBytes,
270
- const cudnnFilterDescriptor_t dwDesc,
271
- void *dw,
272
- void *reserveSpace,
273
- size_t reserveSpaceSizeInBytes);
274
-
275
- /* RNN FIND API */
276
-
277
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
278
- cudnnGetRNNForwardTrainingAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count);
279
-
280
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
281
- cudnnFindRNNForwardTrainingAlgorithmEx(cudnnHandle_t handle,
282
- const cudnnRNNDescriptor_t rnnDesc,
283
- const int seqLength,
284
- const cudnnTensorDescriptor_t *xDesc,
285
- const void *x,
286
- const cudnnTensorDescriptor_t hxDesc,
287
- const void *hx,
288
- const cudnnTensorDescriptor_t cxDesc,
289
- const void *cx,
290
- const cudnnFilterDescriptor_t wDesc,
291
- const void *w,
292
- const cudnnTensorDescriptor_t *yDesc,
293
- void *y,
294
- const cudnnTensorDescriptor_t hyDesc,
295
- void *hy,
296
- const cudnnTensorDescriptor_t cyDesc,
297
- void *cy,
298
- const float findIntensity,
299
- const int requestedAlgoCount,
300
- int *returnedAlgoCount,
301
- cudnnAlgorithmPerformance_t *perfResults,
302
- void *workspace,
303
- size_t workSpaceSizeInBytes,
304
- void *reserveSpace,
305
- size_t reserveSpaceSizeInBytes);
306
-
307
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
308
- cudnnGetRNNBackwardDataAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count);
309
-
310
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
311
- cudnnFindRNNBackwardDataAlgorithmEx(cudnnHandle_t handle,
312
- const cudnnRNNDescriptor_t rnnDesc,
313
- const int seqLength,
314
- const cudnnTensorDescriptor_t *yDesc,
315
- const void *y,
316
- const cudnnTensorDescriptor_t *dyDesc,
317
- const void *dy,
318
- const cudnnTensorDescriptor_t dhyDesc,
319
- const void *dhy,
320
- const cudnnTensorDescriptor_t dcyDesc,
321
- const void *dcy,
322
- const cudnnFilterDescriptor_t wDesc,
323
- const void *w,
324
- const cudnnTensorDescriptor_t hxDesc,
325
- const void *hx,
326
- const cudnnTensorDescriptor_t cxDesc,
327
- const void *cx,
328
- const cudnnTensorDescriptor_t *dxDesc,
329
- void *dx,
330
- const cudnnTensorDescriptor_t dhxDesc,
331
- void *dhx,
332
- const cudnnTensorDescriptor_t dcxDesc,
333
- void *dcx,
334
- const float findIntensity,
335
- const int requestedAlgoCount,
336
- int *returnedAlgoCount,
337
- cudnnAlgorithmPerformance_t *perfResults,
338
- void *workspace,
339
- size_t workSpaceSizeInBytes,
340
- void *reserveSpace,
341
- size_t reserveSpaceSizeInBytes);
342
-
343
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
344
- cudnnGetRNNBackwardWeightsAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count);
345
-
346
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
347
- cudnnFindRNNBackwardWeightsAlgorithmEx(cudnnHandle_t handle,
348
- const cudnnRNNDescriptor_t rnnDesc,
349
- const int seqLength,
350
- const cudnnTensorDescriptor_t *xDesc,
351
- const void *x,
352
- const cudnnTensorDescriptor_t hxDesc,
353
- const void *hx,
354
- const cudnnTensorDescriptor_t *yDesc,
355
- const void *y,
356
- const float findIntensity,
357
- const int requestedAlgoCount,
358
- int *returnedAlgoCount,
359
- cudnnAlgorithmPerformance_t *perfResults,
360
- const void *workspace,
361
- size_t workSpaceSizeInBytes,
362
- const cudnnFilterDescriptor_t dwDesc,
363
- void *dw,
364
- const void *reserveSpace,
365
- size_t reserveSpaceSizeInBytes);
366
-
367
- cudnnStatus_t CUDNNWINAPI
368
- cudnnMultiHeadAttnBackwardData(cudnnHandle_t handle,
369
- const cudnnAttnDescriptor_t attnDesc,
370
- const int loWinIdx[],
371
- const int hiWinIdx[],
372
- const int devSeqLengthsDQDO[],
373
- const int devSeqLengthsDKDV[],
374
- const cudnnSeqDataDescriptor_t doDesc,
375
- const void *dout,
376
- const cudnnSeqDataDescriptor_t dqDesc,
377
- void *dqueries,
378
- const void *queries,
379
- const cudnnSeqDataDescriptor_t dkDesc,
380
- void *dkeys,
381
- const void *keys,
382
- const cudnnSeqDataDescriptor_t dvDesc,
383
- void *dvalues,
384
- const void *values,
385
- size_t weightSizeInBytes,
386
- const void *weights,
387
- size_t workSpaceSizeInBytes,
388
- void *workSpace,
389
- size_t reserveSpaceSizeInBytes,
390
- void *reserveSpace);
391
-
392
- cudnnStatus_t CUDNNWINAPI
393
- cudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle,
394
- const cudnnAttnDescriptor_t attnDesc,
395
- cudnnWgradMode_t addGrad,
396
- const cudnnSeqDataDescriptor_t qDesc,
397
- const void *queries,
398
- const cudnnSeqDataDescriptor_t kDesc,
399
- const void *keys,
400
- const cudnnSeqDataDescriptor_t vDesc,
401
- const void *values,
402
- const cudnnSeqDataDescriptor_t doDesc,
403
- const void *dout,
404
- size_t weightSizeInBytes,
405
- const void *weights,
406
- void *dweights,
407
- size_t workSpaceSizeInBytes,
408
- void *workSpace,
409
- size_t reserveSpaceSizeInBytes,
410
- void *reserveSpace);
411
-
412
- /*
413
- * CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
414
- */
415
- /* Input normalization mode for loss function */
416
- typedef enum {
417
- CUDNN_LOSS_NORMALIZATION_NONE = 0,
418
- CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1,
419
- } cudnnLossNormalizationMode_t;
420
-
421
- cudnnStatus_t CUDNNWINAPI
422
- cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc);
423
-
424
- cudnnStatus_t CUDNNWINAPI
425
- cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType);
426
-
427
- cudnnStatus_t CUDNNWINAPI
428
- cudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
429
- cudnnDataType_t compType,
430
- cudnnLossNormalizationMode_t normMode,
431
- cudnnNanPropagation_t gradMode);
432
-
433
- cudnnStatus_t CUDNNWINAPI
434
- cudnnSetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
435
- cudnnDataType_t compType,
436
- cudnnLossNormalizationMode_t normMode,
437
- cudnnNanPropagation_t gradMode,
438
- int maxLabelLength);
439
-
440
- cudnnStatus_t CUDNNWINAPI
441
- cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType);
442
-
443
- cudnnStatus_t CUDNNWINAPI
444
- cudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
445
- cudnnDataType_t *compType,
446
- cudnnLossNormalizationMode_t *normMode,
447
- cudnnNanPropagation_t *gradMode);
448
-
449
- cudnnStatus_t CUDNNWINAPI
450
- cudnnGetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
451
- cudnnDataType_t *compType,
452
- cudnnLossNormalizationMode_t *normMode,
453
- cudnnNanPropagation_t *gradMode,
454
- int *maxLabelLength);
455
-
456
- cudnnStatus_t CUDNNWINAPI
457
- cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc);
458
-
459
- /* return the ctc costs and gradients, given the probabilities and labels */
460
- cudnnStatus_t CUDNNWINAPI
461
- cudnnCTCLoss(
462
- cudnnHandle_t handle,
463
- const cudnnTensorDescriptor_t
464
- probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
465
- mini batch size, A is the alphabet size) */
466
- const void *probs, /* probabilities after softmax, in GPU memory */
467
- const int hostLabels[], /* labels, in CPU memory */
468
- const int hostLabelLengths[], /* the length of each label, in CPU memory */
469
- const int hostInputLengths[], /* the lengths of timing steps in each batch, in CPU memory */
470
- void *costs, /* the returned costs of CTC, in GPU memory */
471
- const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
472
- void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
473
- cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
474
- cudnnCTCLossDescriptor_t ctcLossDesc,
475
- void *workspace, /* pointer to the workspace, in GPU memory */
476
- size_t workSpaceSizeInBytes); /* size of the workspace */
477
-
478
- /* return the ctc costs and gradients, given the probabilities and labels */
479
- cudnnStatus_t CUDNNWINAPI
480
- cudnnCTCLoss_v8(
481
- cudnnHandle_t handle,
482
- cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
483
- cudnnCTCLossDescriptor_t ctcLossDesc,
484
- const cudnnTensorDescriptor_t
485
- probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
486
- mini batch size, A is the alphabet size) */
487
- const void *probs, /* probabilities after softmax, in GPU memory */
488
- const int labels[], /* labels, in GPU memory */
489
- const int labelLengths[], /* the length of each label, in GPU memory */
490
- const int inputLengths[], /* the lengths of timing steps in each batch, in GPU memory */
491
- void *costs, /* the returned costs of CTC, in GPU memory */
492
- const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
493
- void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
494
- size_t workSpaceSizeInBytes, /* size of the workspace */
495
- void *workspace); /* pointer to the workspace, in GPU memory */
496
-
497
- /* return the workspace size needed for ctc */
498
- cudnnStatus_t CUDNNWINAPI
499
- cudnnGetCTCLossWorkspaceSize(
500
- cudnnHandle_t handle,
501
- const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
502
- timing steps, N is the mini batch size, A is the alphabet size) */
503
- const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
504
- dimensions are T,N,A. To compute costs
505
- only, set it to NULL */
506
- const int *labels, /* labels, in CPU memory */
507
- const int *labelLengths, /* the length of each label, in CPU memory */
508
- const int *inputLengths, /* the lengths of timing steps in each batch, in CPU memory */
509
- cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
510
- cudnnCTCLossDescriptor_t ctcLossDesc,
511
- size_t *sizeInBytes); /* pointer to the returned workspace size */
512
-
513
- /* return the workspace size needed for ctc */
514
- cudnnStatus_t CUDNNWINAPI
515
- cudnnGetCTCLossWorkspaceSize_v8(
516
- cudnnHandle_t handle,
517
- cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
518
- cudnnCTCLossDescriptor_t ctcLossDesc,
519
- const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
520
- timing steps, N is the mini batch size, A is the alphabet size) */
521
- const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
522
- dimensions are T,N,A. To compute costs
523
- only, set it to NULL */
524
- size_t *sizeInBytes); /* pointer to the returned workspace size */
525
-
526
- /*
527
- * \brief Cross-library version checker.
528
- * This function is implemented differently in each sub-library. Each sublib
529
- * checks whether its own version matches that of its dependencies.
530
- * \returns CUDNN_STATUS_SUCCESS if the version check passes,
531
- * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
532
- */
533
- cudnnStatus_t CUDNNWINAPI
534
- cudnnAdvTrainVersionCheck(void);
535
-
536
- #if defined(__cplusplus)
537
- }
538
- #endif
539
-
540
- #endif /* CUDNN_ADV_TRAIN_H_ */