nvidia-cudnn-cu12 8.9.6.50__py3-none-win_amd64.whl → 9.0.0.312__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. nvidia/cudnn/bin/cudnn64_9.dll +0 -0
  2. nvidia/cudnn/bin/{cudnn_adv_infer64_8.dll → cudnn_adv64_9.dll} +0 -0
  3. nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
  4. nvidia/cudnn/bin/{cudnn_cnn_infer64_8.dll → cudnn_engines_precompiled64_9.dll} +0 -0
  5. nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
  6. nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
  7. nvidia/cudnn/bin/{cudnn_ops_infer64_8.dll → cudnn_heuristic64_9.dll} +0 -0
  8. nvidia/cudnn/bin/{cudnn_adv_train64_8.dll → cudnn_ops64_9.dll} +0 -0
  9. nvidia/cudnn/include/cudnn.h +8 -18
  10. nvidia/cudnn/include/{cudnn_adv_infer.h → cudnn_adv.h} +265 -252
  11. nvidia/cudnn/include/cudnn_backend.h +3 -558
  12. nvidia/cudnn/include/{cudnn_cnn_infer.h → cudnn_cnn.h} +187 -65
  13. nvidia/cudnn/include/cudnn_graph.h +908 -0
  14. nvidia/cudnn/include/{cudnn_ops_infer.h → cudnn_ops.h} +469 -336
  15. nvidia/cudnn/include/cudnn_version.h +4 -43
  16. nvidia/cudnn/lib/x64/cudnn.lib +0 -0
  17. nvidia/cudnn/lib/x64/cudnn64_9.lib +0 -0
  18. nvidia/cudnn/lib/x64/cudnn_adv.lib +0 -0
  19. nvidia/cudnn/lib/x64/cudnn_adv64_9.lib +0 -0
  20. nvidia/cudnn/lib/x64/cudnn_cnn.lib +0 -0
  21. nvidia/cudnn/lib/x64/cudnn_cnn64_9.lib +0 -0
  22. nvidia/cudnn/lib/x64/cudnn_engines_precompiled.lib +0 -0
  23. nvidia/cudnn/lib/x64/cudnn_engines_precompiled64_9.lib +0 -0
  24. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled.lib +0 -0
  25. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled64_9.lib +0 -0
  26. nvidia/cudnn/lib/x64/cudnn_graph.lib +0 -0
  27. nvidia/cudnn/lib/x64/cudnn_graph64_9.lib +0 -0
  28. nvidia/cudnn/lib/x64/cudnn_heuristic.lib +0 -0
  29. nvidia/cudnn/lib/x64/cudnn_heuristic64_9.lib +0 -0
  30. nvidia/cudnn/lib/x64/cudnn_ops.lib +0 -0
  31. nvidia/cudnn/lib/x64/cudnn_ops64_9.lib +0 -0
  32. {nvidia_cudnn_cu12-8.9.6.50.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/METADATA +1 -1
  33. nvidia_cudnn_cu12-9.0.0.312.dist-info/RECORD +41 -0
  34. nvidia/cudnn/bin/cudnn64_8.dll +0 -0
  35. nvidia/cudnn/bin/cudnn_cnn_train64_8.dll +0 -0
  36. nvidia/cudnn/bin/cudnn_ops_train64_8.dll +0 -0
  37. nvidia/cudnn/include/cudnn_adv_train.h +0 -540
  38. nvidia/cudnn/include/cudnn_cnn_train.h +0 -219
  39. nvidia/cudnn/include/cudnn_ops_train.h +0 -501
  40. nvidia/cudnn/lib/x64/cudnn64_8.lib +0 -0
  41. nvidia/cudnn/lib/x64/cudnn_adv_infer.lib +0 -0
  42. nvidia/cudnn/lib/x64/cudnn_adv_infer64_8.lib +0 -0
  43. nvidia/cudnn/lib/x64/cudnn_adv_train.lib +0 -0
  44. nvidia/cudnn/lib/x64/cudnn_adv_train64_8.lib +0 -0
  45. nvidia/cudnn/lib/x64/cudnn_cnn_infer.lib +0 -0
  46. nvidia/cudnn/lib/x64/cudnn_cnn_infer64_8.lib +0 -0
  47. nvidia/cudnn/lib/x64/cudnn_cnn_train.lib +0 -0
  48. nvidia/cudnn/lib/x64/cudnn_cnn_train64_8.lib +0 -0
  49. nvidia/cudnn/lib/x64/cudnn_ops_infer.lib +0 -0
  50. nvidia/cudnn/lib/x64/cudnn_ops_infer64_8.lib +0 -0
  51. nvidia/cudnn/lib/x64/cudnn_ops_train.lib +0 -0
  52. nvidia/cudnn/lib/x64/cudnn_ops_train64_8.lib +0 -0
  53. nvidia_cudnn_cu12-8.9.6.50.dist-info/RECORD +0 -40
  54. {nvidia_cudnn_cu12-8.9.6.50.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/License.txt +0 -0
  55. {nvidia_cudnn_cu12-8.9.6.50.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/WHEEL +0 -0
  56. {nvidia_cudnn_cu12-8.9.6.50.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/top_level.txt +0 -0
@@ -47,26 +47,24 @@
47
47
  * Users Notice.
48
48
  */
49
49
 
50
- /* cudnn_adv_infer : cuDNN's advanced and experimental features.
50
+ /* cudnn_adv : cuDNN's advanced and experimental features.
51
51
 
52
52
  */
53
53
 
54
- #if !defined(CUDNN_ADV_INFER_H_)
55
- #define CUDNN_ADV_INFER_H_
54
+ #if !defined(CUDNN_ADV_H_)
55
+ #define CUDNN_ADV_H_
56
56
 
57
- #include <cuda_runtime.h>
58
57
  #include <stdint.h>
59
58
 
60
59
  #include "cudnn_version.h"
61
- #include "cudnn_ops_infer.h"
60
+ #include "cudnn_ops.h"
62
61
 
63
62
  /* These version numbers are autogenerated, do not edit manually. */
64
- #define CUDNN_ADV_INFER_MAJOR 8
65
- #define CUDNN_ADV_INFER_MINOR 9
66
- #define CUDNN_ADV_INFER_PATCH 6
63
+ #define CUDNN_ADV_MAJOR 9
64
+ #define CUDNN_ADV_MINOR 0
65
+ #define CUDNN_ADV_PATCH 0
67
66
 
68
- #if (CUDNN_ADV_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_INFER_MINOR != CUDNN_MINOR) || \
69
- (CUDNN_ADV_INFER_PATCH != CUDNN_PATCHLEVEL)
67
+ #if (CUDNN_ADV_MAJOR != CUDNN_MAJOR) || (CUDNN_ADV_MINOR != CUDNN_MINOR) || (CUDNN_ADV_PATCH != CUDNN_PATCHLEVEL)
70
68
  #error Version mismatch in cuDNN ADV INFER!!!
71
69
  #endif
72
70
 
@@ -76,6 +74,14 @@ extern "C" {
76
74
 
77
75
  /* BASIC RNN API */
78
76
 
77
+ typedef enum {
78
+ CUDNN_RNN_ALGO_STANDARD = 0,
79
+ CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
80
+ CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
81
+ CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
82
+ CUDNN_RNN_ALGO_COUNT = 4,
83
+ } cudnnRNNAlgo_t;
84
+
79
85
  typedef enum {
80
86
  CUDNN_FWD_MODE_INFERENCE = 0,
81
87
  CUDNN_FWD_MODE_TRAINING = 1,
@@ -116,19 +122,13 @@ typedef enum {
116
122
  CUDNN_RNN_DATA_LAYOUT_BATCH_MAJOR_UNPACKED = 2, /* padded, outer stride from one batch to the next */
117
123
  } cudnnRNNDataLayout_t;
118
124
 
119
- /* Legacy type for backward compatibility */
120
- typedef unsigned cudnnRNNPaddingMode_t;
121
-
122
- /* For auxFlags in cudnnSetRNNDescriptor_v8() and cudnnSetRNNPaddingMode() */
125
+ /* For auxFlags in cudnnSetRNNDescriptor_v8() */
123
126
  #define CUDNN_RNN_PADDED_IO_DISABLED 0
124
127
  #define CUDNN_RNN_PADDED_IO_ENABLED (1U << 0)
125
128
 
126
129
  struct cudnnRNNStruct;
127
130
  typedef struct cudnnRNNStruct *cudnnRNNDescriptor_t;
128
131
 
129
- struct cudnnPersistentRNNPlan;
130
- typedef struct cudnnPersistentRNNPlan *cudnnPersistentRNNPlan_t;
131
-
132
132
  struct cudnnRNNDataStruct;
133
133
  typedef struct cudnnRNNDataStruct *cudnnRNNDataDescriptor_t;
134
134
 
@@ -138,6 +138,13 @@ cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc);
138
138
  cudnnStatus_t CUDNNWINAPI
139
139
  cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc);
140
140
 
141
+ /*
142
+ * mathPrec in cudnnSetRNNDescriptor_v8() specifies compute precision.
143
+ * Compute precision is further modified by mathType that sets the
144
+ * preferred option for using NVIDIA Tensor Cores. dataType specify
145
+ * input/output data type and weight/bias type.
146
+ */
147
+
141
148
  cudnnStatus_t CUDNNWINAPI
142
149
  cudnnSetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc,
143
150
  cudnnRNNAlgo_t algo,
@@ -172,49 +179,7 @@ cudnnGetRNNDescriptor_v8(cudnnRNNDescriptor_t rnnDesc,
172
179
  cudnnDropoutDescriptor_t *dropoutDesc,
173
180
  uint32_t *auxFlags);
174
181
 
175
- /*
176
- * mathPrec in cudnnSetRNNDescriptor_v6() specifies compute precision
177
- * compute precision is further modified by cudnnSetRNNMatrixMathType()
178
- * dataType in cudnnGetRNNParamsSize() and wDesc specify weight storage
179
- * dropout is between RNN layers, not between recurrent steps
180
- */
181
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
182
- cudnnSetRNNDescriptor_v6(cudnnHandle_t handle,
183
- cudnnRNNDescriptor_t rnnDesc,
184
- const int hiddenSize,
185
- const int numLayers,
186
- cudnnDropoutDescriptor_t dropoutDesc,
187
- cudnnRNNInputMode_t inputMode,
188
- cudnnDirectionMode_t direction,
189
- cudnnRNNMode_t cellMode,
190
- cudnnRNNAlgo_t algo,
191
- cudnnDataType_t mathPrec);
192
-
193
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
194
- cudnnGetRNNDescriptor_v6(cudnnHandle_t handle,
195
- cudnnRNNDescriptor_t rnnDesc,
196
- int *hiddenSize,
197
- int *numLayers,
198
- cudnnDropoutDescriptor_t *dropoutDesc,
199
- cudnnRNNInputMode_t *inputMode,
200
- cudnnDirectionMode_t *direction,
201
- cudnnRNNMode_t *cellMode,
202
- cudnnRNNAlgo_t *algo,
203
- cudnnDataType_t *mathPrec);
204
-
205
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
206
- cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType);
207
-
208
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
209
- cudnnGetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType);
210
-
211
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
212
- cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t biasMode);
213
-
214
182
  CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
215
- cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, cudnnRNNBiasMode_t *biasMode);
216
-
217
- cudnnStatus_t CUDNNWINAPI
218
183
  cudnnRNNSetClip_v8(cudnnRNNDescriptor_t rnnDesc,
219
184
  cudnnRNNClipMode_t clipMode,
220
185
  cudnnNanPropagation_t clipNanOpt,
@@ -222,71 +187,21 @@ cudnnRNNSetClip_v8(cudnnRNNDescriptor_t rnnDesc,
222
187
  double rclip);
223
188
 
224
189
  cudnnStatus_t CUDNNWINAPI
190
+ cudnnRNNSetClip_v9(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t clipMode, double lclip, double rclip);
191
+
192
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
225
193
  cudnnRNNGetClip_v8(cudnnRNNDescriptor_t rnnDesc,
226
194
  cudnnRNNClipMode_t *clipMode,
227
195
  cudnnNanPropagation_t *clipNanOpt,
228
196
  double *lclip,
229
197
  double *rclip);
230
198
 
231
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
232
- cudnnRNNSetClip(cudnnHandle_t handle,
233
- cudnnRNNDescriptor_t rnnDesc,
234
- cudnnRNNClipMode_t clipMode,
235
- cudnnNanPropagation_t clipNanOpt,
236
- double lclip,
237
- double rclip);
238
-
239
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
240
- cudnnRNNGetClip(cudnnHandle_t handle,
241
- cudnnRNNDescriptor_t rnnDesc,
242
- cudnnRNNClipMode_t *clipMode,
243
- cudnnNanPropagation_t *clipNanOpt,
244
- double *lclip,
245
- double *rclip);
246
-
247
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
248
- cudnnSetRNNProjectionLayers(cudnnHandle_t handle,
249
- cudnnRNNDescriptor_t rnnDesc,
250
- const int recProjSize,
251
- const int outProjSize);
252
-
253
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
254
- cudnnGetRNNProjectionLayers(cudnnHandle_t handle,
255
- const cudnnRNNDescriptor_t rnnDesc,
256
- int *recProjSize,
257
- int *outProjSize);
258
-
259
- /* Expensive. Creates the plan for the specific settings. */
260
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
261
- cudnnCreatePersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc,
262
- const int minibatch,
263
- const cudnnDataType_t dataType,
264
- cudnnPersistentRNNPlan_t *plan);
265
-
266
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
267
- cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan);
268
-
269
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
270
- cudnnSetPersistentRNNPlan(cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan);
199
+ cudnnStatus_t CUDNNWINAPI
200
+ cudnnRNNGetClip_v9(cudnnRNNDescriptor_t rnnDesc, cudnnRNNClipMode_t *clipMode, double *lclip, double *rclip);
271
201
 
272
202
  cudnnStatus_t CUDNNWINAPI
273
203
  cudnnBuildRNNDynamic(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int miniBatch);
274
204
 
275
- /* dataType in weight descriptors and input descriptors is used to describe storage */
276
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
277
- cudnnGetRNNWorkspaceSize(cudnnHandle_t handle,
278
- const cudnnRNNDescriptor_t rnnDesc,
279
- const int seqLength,
280
- const cudnnTensorDescriptor_t *xDesc,
281
- size_t *sizeInBytes);
282
-
283
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
284
- cudnnGetRNNTrainingReserveSize(cudnnHandle_t handle,
285
- const cudnnRNNDescriptor_t rnnDesc,
286
- const int seqLength,
287
- const cudnnTensorDescriptor_t *xDesc,
288
- size_t *sizeInBytes);
289
-
290
205
  cudnnStatus_t CUDNNWINAPI
291
206
  cudnnGetRNNTempSpaceSizes(cudnnHandle_t handle,
292
207
  cudnnRNNDescriptor_t rnnDesc,
@@ -295,38 +210,9 @@ cudnnGetRNNTempSpaceSizes(cudnnHandle_t handle,
295
210
  size_t *workSpaceSize,
296
211
  size_t *reserveSpaceSize);
297
212
 
298
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
299
- cudnnGetRNNParamsSize(cudnnHandle_t handle,
300
- const cudnnRNNDescriptor_t rnnDesc,
301
- const cudnnTensorDescriptor_t xDesc,
302
- size_t *sizeInBytes,
303
- cudnnDataType_t dataType);
304
-
305
213
  cudnnStatus_t CUDNNWINAPI
306
214
  cudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, size_t *weightSpaceSize);
307
215
 
308
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
309
- cudnnGetRNNLinLayerMatrixParams(cudnnHandle_t handle,
310
- const cudnnRNNDescriptor_t rnnDesc,
311
- const int pseudoLayer,
312
- const cudnnTensorDescriptor_t xDesc,
313
- const cudnnFilterDescriptor_t wDesc,
314
- const void *w,
315
- const int linLayerID,
316
- cudnnFilterDescriptor_t linLayerMatDesc,
317
- void **linLayerMat);
318
-
319
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
320
- cudnnGetRNNLinLayerBiasParams(cudnnHandle_t handle,
321
- const cudnnRNNDescriptor_t rnnDesc,
322
- const int pseudoLayer,
323
- const cudnnTensorDescriptor_t xDesc,
324
- const cudnnFilterDescriptor_t wDesc,
325
- const void *w,
326
- const int linLayerID,
327
- cudnnFilterDescriptor_t linLayerBiasDesc,
328
- void **linLayerBias);
329
-
330
216
  cudnnStatus_t CUDNNWINAPI
331
217
  cudnnGetRNNWeightParams(cudnnHandle_t handle,
332
218
  cudnnRNNDescriptor_t rnnDesc,
@@ -339,35 +225,6 @@ cudnnGetRNNWeightParams(cudnnHandle_t handle,
339
225
  cudnnTensorDescriptor_t bDesc,
340
226
  void **bAddr);
341
227
 
342
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
343
- cudnnRNNForwardInference(cudnnHandle_t handle,
344
- const cudnnRNNDescriptor_t rnnDesc,
345
- const int seqLength,
346
- const cudnnTensorDescriptor_t *xDesc,
347
- const void *x,
348
- const cudnnTensorDescriptor_t hxDesc,
349
- const void *hx,
350
- const cudnnTensorDescriptor_t cxDesc,
351
- const void *cx,
352
- const cudnnFilterDescriptor_t wDesc,
353
- const void *w,
354
- const cudnnTensorDescriptor_t *yDesc,
355
- void *y,
356
- const cudnnTensorDescriptor_t hyDesc,
357
- void *hy,
358
- const cudnnTensorDescriptor_t cyDesc,
359
- void *cy,
360
- void *workSpace,
361
- size_t workSpaceSizeInBytes);
362
-
363
- /* RNN EX API */
364
-
365
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
366
- cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, unsigned paddingMode);
367
-
368
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
369
- cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, unsigned *paddingMode);
370
-
371
228
  cudnnStatus_t CUDNNWINAPI
372
229
  cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc);
373
230
 
@@ -395,34 +252,6 @@ cudnnGetRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc,
395
252
  int seqLengthArray[],
396
253
  void *paddingFill);
397
254
 
398
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
399
- cudnnRNNForwardInferenceEx(cudnnHandle_t handle,
400
- const cudnnRNNDescriptor_t rnnDesc,
401
- const cudnnRNNDataDescriptor_t xDesc,
402
- const void *x,
403
- const cudnnTensorDescriptor_t hxDesc,
404
- const void *hx,
405
- const cudnnTensorDescriptor_t cxDesc,
406
- const void *cx,
407
- const cudnnFilterDescriptor_t wDesc,
408
- const void *w,
409
- const cudnnRNNDataDescriptor_t yDesc,
410
- void *y,
411
- const cudnnTensorDescriptor_t hyDesc,
412
- void *hy,
413
- const cudnnTensorDescriptor_t cyDesc,
414
- void *cy,
415
- const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */
416
- const void *keys, /* reserved, should pass NULL */
417
- const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */
418
- void *cAttn, /* reserved, should pass NULL */
419
- const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */
420
- void *iAttn, /* reserved, should pass NULL */
421
- const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */
422
- void *queries, /* reserved, should pass NULL */
423
- void *workSpace,
424
- size_t workSpaceSizeInBytes);
425
-
426
255
  cudnnStatus_t CUDNNWINAPI
427
256
  cudnnRNNForward(cudnnHandle_t handle,
428
257
  cudnnRNNDescriptor_t rnnDesc,
@@ -445,39 +274,6 @@ cudnnRNNForward(cudnnHandle_t handle,
445
274
  size_t reserveSpaceSize,
446
275
  void *reserveSpace);
447
276
 
448
- /* RNN FIND API */
449
-
450
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
451
- cudnnSetRNNAlgorithmDescriptor(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, cudnnAlgorithmDescriptor_t algoDesc);
452
-
453
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
454
- cudnnGetRNNForwardInferenceAlgorithmMaxCount(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *count);
455
-
456
- CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
457
- cudnnFindRNNForwardInferenceAlgorithmEx(cudnnHandle_t handle,
458
- const cudnnRNNDescriptor_t rnnDesc,
459
- const int seqLength,
460
- const cudnnTensorDescriptor_t *xDesc,
461
- const void *x,
462
- const cudnnTensorDescriptor_t hxDesc,
463
- const void *hx,
464
- const cudnnTensorDescriptor_t cxDesc,
465
- const void *cx,
466
- const cudnnFilterDescriptor_t wDesc,
467
- const void *w,
468
- const cudnnTensorDescriptor_t *yDesc,
469
- void *y,
470
- const cudnnTensorDescriptor_t hyDesc,
471
- void *hy,
472
- const cudnnTensorDescriptor_t cyDesc,
473
- void *cy,
474
- const float findIntensity,
475
- const int requestedAlgoCount,
476
- int *returnedAlgoCount,
477
- cudnnAlgorithmPerformance_t *perfResults,
478
- void *workspace,
479
- size_t workSpaceSizeInBytes);
480
-
481
277
  /* Sequence data descriptor */
482
278
 
483
279
  typedef enum {
@@ -488,17 +284,17 @@ typedef enum {
488
284
  } cudnnSeqDataAxis_t;
489
285
 
490
286
  struct cudnnSeqDataStruct;
491
- typedef struct cudnnSeqDataStruct *cudnnSeqDataDescriptor_t;
287
+ typedef struct cudnnSeqDataStruct *cudnnSeqDataDescriptor_t CUDNN_DEPRECATED;
492
288
 
493
289
  #define CUDNN_SEQDATA_DIM_COUNT 4 /* dimension count */
494
290
 
495
- cudnnStatus_t CUDNNWINAPI
291
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
496
292
  cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc);
497
293
 
498
- cudnnStatus_t CUDNNWINAPI
294
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
499
295
  cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc);
500
296
 
501
- cudnnStatus_t CUDNNWINAPI
297
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
502
298
  cudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc,
503
299
  cudnnDataType_t dataType,
504
300
  int nbDims,
@@ -508,7 +304,7 @@ cudnnSetSeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc,
508
304
  const int seqLengthArray[],
509
305
  void *paddingFill);
510
306
 
511
- cudnnStatus_t CUDNNWINAPI
307
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
512
308
  cudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc,
513
309
  cudnnDataType_t *dataType,
514
310
  int *nbDims,
@@ -522,9 +318,6 @@ cudnnGetSeqDataDescriptor(const cudnnSeqDataDescriptor_t seqDataDesc,
522
318
 
523
319
  /* Multihead Attention */
524
320
 
525
- /* Legacy type for backward compatibility */
526
- typedef unsigned cudnnAttnQueryMap_t;
527
-
528
321
  /*
529
322
  * Multi-head attention options passed via 'attnMode' in cudnnSetAttnDescriptor().
530
323
  * Use the bitwise OR operator to combine several settings listed below. Additional
@@ -536,15 +329,15 @@ typedef unsigned cudnnAttnQueryMap_t;
536
329
  #define CUDNN_ATTN_ENABLE_PROJ_BIASES (1U << 1) /* use biases in attention input and output projections */
537
330
 
538
331
  struct cudnnAttnStruct;
539
- typedef struct cudnnAttnStruct *cudnnAttnDescriptor_t;
332
+ typedef struct cudnnAttnStruct *cudnnAttnDescriptor_t CUDNN_DEPRECATED;
540
333
 
541
- cudnnStatus_t CUDNNWINAPI
334
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
542
335
  cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc);
543
336
 
544
- cudnnStatus_t CUDNNWINAPI
337
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
545
338
  cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc);
546
339
 
547
- cudnnStatus_t CUDNNWINAPI
340
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
548
341
  cudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
549
342
  unsigned attnMode,
550
343
  int nHeads,
@@ -566,7 +359,7 @@ cudnnSetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
566
359
  int maxBatchSize,
567
360
  int maxBeamSize);
568
361
 
569
- cudnnStatus_t CUDNNWINAPI
362
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
570
363
  cudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
571
364
  unsigned *attnMode,
572
365
  int *nHeads,
@@ -588,7 +381,7 @@ cudnnGetAttnDescriptor(cudnnAttnDescriptor_t attnDesc,
588
381
  int *maxBatchSize,
589
382
  int *maxBeamSize);
590
383
 
591
- cudnnStatus_t CUDNNWINAPI
384
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
592
385
  cudnnGetMultiHeadAttnBuffers(cudnnHandle_t handle,
593
386
  const cudnnAttnDescriptor_t attnDesc,
594
387
  size_t *weightSizeInBytes,
@@ -608,7 +401,7 @@ typedef enum {
608
401
 
609
402
  #define CUDNN_ATTN_WKIND_COUNT 8 /* Number of attention weight/bias tensors */
610
403
 
611
- cudnnStatus_t CUDNNWINAPI
404
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
612
405
  cudnnGetMultiHeadAttnWeights(cudnnHandle_t handle,
613
406
  const cudnnAttnDescriptor_t attnDesc,
614
407
  cudnnMultiHeadAttnWeightKind_t wKind,
@@ -617,7 +410,7 @@ cudnnGetMultiHeadAttnWeights(cudnnHandle_t handle,
617
410
  cudnnTensorDescriptor_t wDesc,
618
411
  void **wAddr);
619
412
 
620
- cudnnStatus_t CUDNNWINAPI
413
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
621
414
  cudnnMultiHeadAttnForward(cudnnHandle_t handle,
622
415
  const cudnnAttnDescriptor_t attnDesc,
623
416
  int currIdx,
@@ -646,13 +439,233 @@ cudnnMultiHeadAttnForward(cudnnHandle_t handle,
646
439
  * This function is implemented differently in each sub-library. Each sublib
647
440
  * checks whether its own version matches that of its dependencies.
648
441
  * \returns CUDNN_STATUS_SUCCESS if the version check passes,
649
- * CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
442
+ * CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent.
650
443
  */
651
444
  cudnnStatus_t CUDNNWINAPI
652
- cudnnAdvInferVersionCheck(void);
445
+ cudnnAdvVersionCheck(void);
446
+
447
+ typedef enum {
448
+ CUDNN_WGRAD_MODE_ADD = 0, /* add partial gradients to wgrad output buffers */
449
+ CUDNN_WGRAD_MODE_SET = 1, /* write partial gradients to wgrad output buffers */
450
+ } cudnnWgradMode_t;
451
+
452
+ cudnnStatus_t CUDNNWINAPI
453
+ cudnnRNNBackwardData_v8(cudnnHandle_t handle,
454
+ cudnnRNNDescriptor_t rnnDesc,
455
+ const int32_t devSeqLengths[],
456
+ cudnnRNNDataDescriptor_t yDesc,
457
+ const void *y,
458
+ const void *dy,
459
+ cudnnRNNDataDescriptor_t xDesc,
460
+ void *dx,
461
+ cudnnTensorDescriptor_t hDesc,
462
+ const void *hx,
463
+ const void *dhy,
464
+ void *dhx,
465
+ cudnnTensorDescriptor_t cDesc,
466
+ const void *cx,
467
+ const void *dcy,
468
+ void *dcx,
469
+ size_t weightSpaceSize,
470
+ const void *weightSpace,
471
+ size_t workSpaceSize,
472
+ void *workSpace,
473
+ size_t reserveSpaceSize,
474
+ void *reserveSpace);
475
+
476
+ cudnnStatus_t CUDNNWINAPI
477
+ cudnnRNNBackwardWeights_v8(cudnnHandle_t handle,
478
+ cudnnRNNDescriptor_t rnnDesc,
479
+ cudnnWgradMode_t addGrad,
480
+ const int32_t devSeqLengths[],
481
+ cudnnRNNDataDescriptor_t xDesc,
482
+ const void *x,
483
+ cudnnTensorDescriptor_t hDesc,
484
+ const void *hx,
485
+ cudnnRNNDataDescriptor_t yDesc,
486
+ const void *y,
487
+ size_t weightSpaceSize,
488
+ void *dweightSpace,
489
+ size_t workSpaceSize,
490
+ void *workSpace,
491
+ size_t reserveSpaceSize,
492
+ void *reserveSpace);
493
+
494
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
495
+ cudnnMultiHeadAttnBackwardData(cudnnHandle_t handle,
496
+ const cudnnAttnDescriptor_t attnDesc,
497
+ const int loWinIdx[],
498
+ const int hiWinIdx[],
499
+ const int devSeqLengthsDQDO[],
500
+ const int devSeqLengthsDKDV[],
501
+ const cudnnSeqDataDescriptor_t doDesc,
502
+ const void *dout,
503
+ const cudnnSeqDataDescriptor_t dqDesc,
504
+ void *dqueries,
505
+ const void *queries,
506
+ const cudnnSeqDataDescriptor_t dkDesc,
507
+ void *dkeys,
508
+ const void *keys,
509
+ const cudnnSeqDataDescriptor_t dvDesc,
510
+ void *dvalues,
511
+ const void *values,
512
+ size_t weightSizeInBytes,
513
+ const void *weights,
514
+ size_t workSpaceSizeInBytes,
515
+ void *workSpace,
516
+ size_t reserveSpaceSizeInBytes,
517
+ void *reserveSpace);
518
+
519
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
520
+ cudnnMultiHeadAttnBackwardWeights(cudnnHandle_t handle,
521
+ const cudnnAttnDescriptor_t attnDesc,
522
+ cudnnWgradMode_t addGrad,
523
+ const cudnnSeqDataDescriptor_t qDesc,
524
+ const void *queries,
525
+ const cudnnSeqDataDescriptor_t kDesc,
526
+ const void *keys,
527
+ const cudnnSeqDataDescriptor_t vDesc,
528
+ const void *values,
529
+ const cudnnSeqDataDescriptor_t doDesc,
530
+ const void *dout,
531
+ size_t weightSizeInBytes,
532
+ const void *weights,
533
+ void *dweights,
534
+ size_t workSpaceSizeInBytes,
535
+ void *workSpace,
536
+ size_t reserveSpaceSizeInBytes,
537
+ void *reserveSpace);
538
+
539
+ /*
540
+ * CTC (Connectionist Temporal Classification) loss descriptor create/destory/set/get functions
541
+ */
542
+ /* Input normalization mode for loss function */
543
+ typedef enum {
544
+ CUDNN_LOSS_NORMALIZATION_NONE = 0,
545
+ CUDNN_LOSS_NORMALIZATION_SOFTMAX = 1,
546
+ } cudnnLossNormalizationMode_t;
547
+
548
+ cudnnStatus_t CUDNNWINAPI
549
+ cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc);
550
+
551
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
552
+ cudnnSetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType);
553
+
554
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
555
+ cudnnSetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
556
+ cudnnDataType_t compType,
557
+ cudnnLossNormalizationMode_t normMode,
558
+ cudnnNanPropagation_t gradMode);
559
+
560
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
561
+ cudnnSetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
562
+ cudnnDataType_t compType,
563
+ cudnnLossNormalizationMode_t normMode,
564
+ cudnnNanPropagation_t gradMode,
565
+ int maxLabelLength);
566
+
567
+ cudnnStatus_t CUDNNWINAPI
568
+ cudnnSetCTCLossDescriptor_v9(cudnnCTCLossDescriptor_t ctcLossDesc,
569
+ cudnnDataType_t compType,
570
+ cudnnLossNormalizationMode_t normMode,
571
+ cudnnCTCGradMode_t ctcGradMode,
572
+ int maxLabelLength);
573
+
574
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
575
+ cudnnGetCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType);
576
+
577
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
578
+ cudnnGetCTCLossDescriptorEx(cudnnCTCLossDescriptor_t ctcLossDesc,
579
+ cudnnDataType_t *compType,
580
+ cudnnLossNormalizationMode_t *normMode,
581
+ cudnnNanPropagation_t *gradMode);
582
+
583
+ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
584
+ cudnnGetCTCLossDescriptor_v8(cudnnCTCLossDescriptor_t ctcLossDesc,
585
+ cudnnDataType_t *compType,
586
+ cudnnLossNormalizationMode_t *normMode,
587
+ cudnnNanPropagation_t *gradMode,
588
+ int *maxLabelLength);
589
+
590
+ cudnnStatus_t CUDNNWINAPI
591
+ cudnnGetCTCLossDescriptor_v9(cudnnCTCLossDescriptor_t ctcLossDesc,
592
+ cudnnDataType_t *compType,
593
+ cudnnLossNormalizationMode_t *normMode,
594
+ cudnnCTCGradMode_t *ctcGradMode,
595
+ int *maxLabelLength);
596
+
597
+ cudnnStatus_t CUDNNWINAPI
598
+ cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc);
599
+
600
+ /* return the ctc costs and gradients, given the probabilities and labels */
601
+ cudnnStatus_t CUDNNWINAPI
602
+ cudnnCTCLoss(
603
+ cudnnHandle_t handle,
604
+ const cudnnTensorDescriptor_t
605
+ probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
606
+ mini batch size, A is the alphabet size) */
607
+ const void *probs, /* probabilities after softmax, in GPU memory */
608
+ const int hostLabels[], /* labels, in CPU memory */
609
+ const int hostLabelLengths[], /* the length of each label, in CPU memory */
610
+ const int hostInputLengths[], /* the lengths of timing steps in each batch, in CPU memory */
611
+ void *costs, /* the returned costs of CTC, in GPU memory */
612
+ const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
613
+ void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
614
+ cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
615
+ cudnnCTCLossDescriptor_t ctcLossDesc,
616
+ void *workspace, /* pointer to the workspace, in GPU memory */
617
+ size_t workSpaceSizeInBytes); /* size of the workspace */
618
+
619
+ /* return the ctc costs and gradients, given the probabilities and labels */
620
+ cudnnStatus_t CUDNNWINAPI
621
+ cudnnCTCLoss_v8(
622
+ cudnnHandle_t handle,
623
+ cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
624
+ cudnnCTCLossDescriptor_t ctcLossDesc,
625
+ const cudnnTensorDescriptor_t
626
+ probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the timing steps, N is the
627
+ mini batch size, A is the alphabet size) */
628
+ const void *probs, /* probabilities after softmax, in GPU memory */
629
+ const int labels[], /* labels, in GPU memory */
630
+ const int labelLengths[], /* the length of each label, in GPU memory */
631
+ const int inputLengths[], /* the lengths of timing steps in each batch, in GPU memory */
632
+ void *costs, /* the returned costs of CTC, in GPU memory */
633
+ const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the dimensions are T,N,A */
634
+ void *gradients, /* the returned CTC gradients, in GPU memory, to compute costs only, set it to NULL */
635
+ size_t workSpaceSizeInBytes, /* size of the workspace */
636
+ void *workspace); /* pointer to the workspace, in GPU memory */
637
+
638
+ /* return the workspace size needed for ctc */
639
+ cudnnStatus_t CUDNNWINAPI
640
+ cudnnGetCTCLossWorkspaceSize(
641
+ cudnnHandle_t handle,
642
+ const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
643
+ timing steps, N is the mini batch size, A is the alphabet size) */
644
+ const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
645
+ dimensions are T,N,A. To compute costs
646
+ only, set it to NULL */
647
+ const int *labels, /* labels, in CPU memory */
648
+ const int *labelLengths, /* the length of each label, in CPU memory */
649
+ const int *inputLengths, /* the lengths of timing steps in each batch, in CPU memory */
650
+ cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
651
+ cudnnCTCLossDescriptor_t ctcLossDesc,
652
+ size_t *sizeInBytes); /* pointer to the returned workspace size */
653
+
654
+ /* return the workspace size needed for ctc */
655
+ cudnnStatus_t CUDNNWINAPI
656
+ cudnnGetCTCLossWorkspaceSize_v8(
657
+ cudnnHandle_t handle,
658
+ cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */
659
+ cudnnCTCLossDescriptor_t ctcLossDesc,
660
+ const cudnnTensorDescriptor_t probsDesc, /* Tensor descriptor for probabilities, the dimensions are T,N,A (T is the
661
+ timing steps, N is the mini batch size, A is the alphabet size) */
662
+ const cudnnTensorDescriptor_t gradientsDesc, /* Tensor descriptor for gradients, the
663
+ dimensions are T,N,A. To compute costs
664
+ only, set it to NULL */
665
+ size_t *sizeInBytes); /* pointer to the returned workspace size */
653
666
 
654
667
  #if defined(__cplusplus)
655
668
  }
656
669
  #endif
657
670
 
658
- #endif /* CUDNN_ADV_INFER_H_ */
671
+ #endif /* CUDNN_ADV_H_ */