nvidia-cudnn-cu12 8.9.7.29__py3-none-win_amd64.whl → 9.0.0.312__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. nvidia/cudnn/bin/cudnn64_9.dll +0 -0
  2. nvidia/cudnn/bin/{cudnn_adv_infer64_8.dll → cudnn_adv64_9.dll} +0 -0
  3. nvidia/cudnn/bin/cudnn_cnn64_9.dll +0 -0
  4. nvidia/cudnn/bin/{cudnn_cnn_infer64_8.dll → cudnn_engines_precompiled64_9.dll} +0 -0
  5. nvidia/cudnn/bin/cudnn_engines_runtime_compiled64_9.dll +0 -0
  6. nvidia/cudnn/bin/cudnn_graph64_9.dll +0 -0
  7. nvidia/cudnn/bin/{cudnn_ops_infer64_8.dll → cudnn_heuristic64_9.dll} +0 -0
  8. nvidia/cudnn/bin/{cudnn_adv_train64_8.dll → cudnn_ops64_9.dll} +0 -0
  9. nvidia/cudnn/include/cudnn.h +8 -18
  10. nvidia/cudnn/include/{cudnn_adv_infer.h → cudnn_adv.h} +265 -252
  11. nvidia/cudnn/include/cudnn_backend.h +3 -558
  12. nvidia/cudnn/include/{cudnn_cnn_infer.h → cudnn_cnn.h} +187 -65
  13. nvidia/cudnn/include/cudnn_graph.h +908 -0
  14. nvidia/cudnn/include/{cudnn_ops_infer.h → cudnn_ops.h} +469 -336
  15. nvidia/cudnn/include/cudnn_version.h +4 -43
  16. nvidia/cudnn/lib/x64/cudnn.lib +0 -0
  17. nvidia/cudnn/lib/x64/cudnn64_9.lib +0 -0
  18. nvidia/cudnn/lib/x64/cudnn_adv.lib +0 -0
  19. nvidia/cudnn/lib/x64/cudnn_adv64_9.lib +0 -0
  20. nvidia/cudnn/lib/x64/cudnn_cnn.lib +0 -0
  21. nvidia/cudnn/lib/x64/cudnn_cnn64_9.lib +0 -0
  22. nvidia/cudnn/lib/x64/cudnn_engines_precompiled.lib +0 -0
  23. nvidia/cudnn/lib/x64/cudnn_engines_precompiled64_9.lib +0 -0
  24. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled.lib +0 -0
  25. nvidia/cudnn/lib/x64/cudnn_engines_runtime_compiled64_9.lib +0 -0
  26. nvidia/cudnn/lib/x64/cudnn_graph.lib +0 -0
  27. nvidia/cudnn/lib/x64/cudnn_graph64_9.lib +0 -0
  28. nvidia/cudnn/lib/x64/cudnn_heuristic.lib +0 -0
  29. nvidia/cudnn/lib/x64/cudnn_heuristic64_9.lib +0 -0
  30. nvidia/cudnn/lib/x64/cudnn_ops.lib +0 -0
  31. nvidia/cudnn/lib/x64/cudnn_ops64_9.lib +0 -0
  32. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/METADATA +1 -1
  33. nvidia_cudnn_cu12-9.0.0.312.dist-info/RECORD +41 -0
  34. nvidia/cudnn/bin/cudnn64_8.dll +0 -0
  35. nvidia/cudnn/bin/cudnn_cnn_train64_8.dll +0 -0
  36. nvidia/cudnn/bin/cudnn_ops_train64_8.dll +0 -0
  37. nvidia/cudnn/include/cudnn_adv_train.h +0 -540
  38. nvidia/cudnn/include/cudnn_cnn_train.h +0 -219
  39. nvidia/cudnn/include/cudnn_ops_train.h +0 -501
  40. nvidia/cudnn/lib/x64/cudnn64_8.lib +0 -0
  41. nvidia/cudnn/lib/x64/cudnn_adv_infer.lib +0 -0
  42. nvidia/cudnn/lib/x64/cudnn_adv_infer64_8.lib +0 -0
  43. nvidia/cudnn/lib/x64/cudnn_adv_train.lib +0 -0
  44. nvidia/cudnn/lib/x64/cudnn_adv_train64_8.lib +0 -0
  45. nvidia/cudnn/lib/x64/cudnn_cnn_infer.lib +0 -0
  46. nvidia/cudnn/lib/x64/cudnn_cnn_infer64_8.lib +0 -0
  47. nvidia/cudnn/lib/x64/cudnn_cnn_train.lib +0 -0
  48. nvidia/cudnn/lib/x64/cudnn_cnn_train64_8.lib +0 -0
  49. nvidia/cudnn/lib/x64/cudnn_ops_infer.lib +0 -0
  50. nvidia/cudnn/lib/x64/cudnn_ops_infer64_8.lib +0 -0
  51. nvidia/cudnn/lib/x64/cudnn_ops_train.lib +0 -0
  52. nvidia/cudnn/lib/x64/cudnn_ops_train64_8.lib +0 -0
  53. nvidia_cudnn_cu12-8.9.7.29.dist-info/RECORD +0 -40
  54. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/License.txt +0 -0
  55. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/WHEEL +0 -0
  56. {nvidia_cudnn_cu12-8.9.7.29.dist-info → nvidia_cudnn_cu12-9.0.0.312.dist-info}/top_level.txt +0 -0
@@ -51,565 +51,10 @@
51
51
  #define _CUDNN_BACKEND_H_
52
52
 
53
53
  /*
54
- * The content in this header file is under development to be included in cudnn.h in the future
55
- * Production code should have all include of this header file remove.
54
+ * The content of this header has been moved into cudnn_graph.h.
55
+ * This header is kept for the backward compatibility purpose.
56
56
  */
57
57
 
58
- #include "cudnn_ops_infer.h"
59
- #include "cudnn_cnn_infer.h"
60
-
61
- /* NOTE: definition in extern "C" to be copied later to public header */
62
- #if defined(__cplusplus)
63
- extern "C" {
64
- #endif
65
-
66
- typedef void *cudnnBackendDescriptor_t;
67
-
68
- typedef struct cudnnFractionStruct {
69
- int64_t numerator;
70
- int64_t denominator;
71
- } cudnnFraction_t;
72
-
73
- typedef enum {
74
- CUDNN_POINTWISE_ADD = 0,
75
- CUDNN_POINTWISE_ADD_SQUARE = 5,
76
- CUDNN_POINTWISE_DIV = 6,
77
- CUDNN_POINTWISE_MAX = 3,
78
- CUDNN_POINTWISE_MIN = 2,
79
- CUDNN_POINTWISE_MOD = 7,
80
- CUDNN_POINTWISE_MUL = 1,
81
- CUDNN_POINTWISE_POW = 8,
82
- CUDNN_POINTWISE_SUB = 9,
83
-
84
- CUDNN_POINTWISE_ABS = 10,
85
- CUDNN_POINTWISE_CEIL = 11,
86
- CUDNN_POINTWISE_COS = 12,
87
- CUDNN_POINTWISE_EXP = 13,
88
- CUDNN_POINTWISE_FLOOR = 14,
89
- CUDNN_POINTWISE_LOG = 15,
90
- CUDNN_POINTWISE_NEG = 16,
91
- CUDNN_POINTWISE_RSQRT = 17,
92
- CUDNN_POINTWISE_SIN = 18,
93
- CUDNN_POINTWISE_SQRT = 4,
94
- CUDNN_POINTWISE_TAN = 19,
95
- CUDNN_POINTWISE_ERF = 20,
96
- CUDNN_POINTWISE_IDENTITY = 21,
97
- CUDNN_POINTWISE_RECIPROCAL = 22,
98
-
99
- CUDNN_POINTWISE_RELU_FWD = 100,
100
- CUDNN_POINTWISE_TANH_FWD = 101,
101
- CUDNN_POINTWISE_SIGMOID_FWD = 102,
102
- CUDNN_POINTWISE_ELU_FWD = 103,
103
- CUDNN_POINTWISE_GELU_FWD = 104,
104
- CUDNN_POINTWISE_SOFTPLUS_FWD = 105,
105
- CUDNN_POINTWISE_SWISH_FWD = 106,
106
- CUDNN_POINTWISE_GELU_APPROX_TANH_FWD = 107,
107
-
108
- CUDNN_POINTWISE_RELU_BWD = 200,
109
- CUDNN_POINTWISE_TANH_BWD = 201,
110
- CUDNN_POINTWISE_SIGMOID_BWD = 202,
111
- CUDNN_POINTWISE_ELU_BWD = 203,
112
- CUDNN_POINTWISE_GELU_BWD = 204,
113
- CUDNN_POINTWISE_SOFTPLUS_BWD = 205,
114
- CUDNN_POINTWISE_SWISH_BWD = 206,
115
- CUDNN_POINTWISE_GELU_APPROX_TANH_BWD = 207,
116
-
117
- CUDNN_POINTWISE_CMP_EQ = 300,
118
- CUDNN_POINTWISE_CMP_NEQ = 301,
119
- CUDNN_POINTWISE_CMP_GT = 302,
120
- CUDNN_POINTWISE_CMP_GE = 303,
121
- CUDNN_POINTWISE_CMP_LT = 304,
122
- CUDNN_POINTWISE_CMP_LE = 305,
123
-
124
- CUDNN_POINTWISE_LOGICAL_AND = 400,
125
- CUDNN_POINTWISE_LOGICAL_OR = 401,
126
- CUDNN_POINTWISE_LOGICAL_NOT = 402,
127
-
128
- CUDNN_POINTWISE_GEN_INDEX = 501,
129
-
130
- CUDNN_POINTWISE_BINARY_SELECT = 601,
131
- } cudnnPointwiseMode_t;
132
-
133
- typedef enum {
134
- CUDNN_RESAMPLE_NEAREST = 0,
135
- CUDNN_RESAMPLE_BILINEAR = 1,
136
- CUDNN_RESAMPLE_AVGPOOL = 2,
137
- CUDNN_RESAMPLE_AVGPOOL_INCLUDE_PADDING = 2,
138
- CUDNN_RESAMPLE_AVGPOOL_EXCLUDE_PADDING = 4,
139
- CUDNN_RESAMPLE_MAXPOOL = 3,
140
- } cudnnResampleMode_t;
141
-
142
- typedef enum {
143
- CUDNN_SIGNAL_SET = 0,
144
- CUDNN_SIGNAL_WAIT = 1,
145
- } cudnnSignalMode_t;
146
-
147
- typedef enum {
148
- CUDNN_GENSTATS_SUM_SQSUM = 0,
149
- } cudnnGenStatsMode_t;
150
-
151
- typedef enum {
152
- CUDNN_BN_FINALIZE_STATISTICS_TRAINING = 0,
153
- CUDNN_BN_FINALIZE_STATISTICS_INFERENCE = 1,
154
- } cudnnBnFinalizeStatsMode_t;
155
-
156
- typedef enum {
157
- CUDNN_RNG_DISTRIBUTION_BERNOULLI,
158
- CUDNN_RNG_DISTRIBUTION_UNIFORM,
159
- CUDNN_RNG_DISTRIBUTION_NORMAL,
160
- } cudnnRngDistribution_t;
161
-
162
- typedef enum {
163
- CUDNN_ATTR_POINTWISE_MODE = 0,
164
- CUDNN_ATTR_POINTWISE_MATH_PREC = 1,
165
- CUDNN_ATTR_POINTWISE_NAN_PROPAGATION = 2,
166
- CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP = 3,
167
- CUDNN_ATTR_POINTWISE_RELU_UPPER_CLIP = 4,
168
- CUDNN_ATTR_POINTWISE_RELU_LOWER_CLIP_SLOPE = 5,
169
- CUDNN_ATTR_POINTWISE_ELU_ALPHA = 6,
170
- CUDNN_ATTR_POINTWISE_SOFTPLUS_BETA = 7,
171
- CUDNN_ATTR_POINTWISE_SWISH_BETA = 8,
172
- CUDNN_ATTR_POINTWISE_AXIS = 9,
173
-
174
- CUDNN_ATTR_CONVOLUTION_COMP_TYPE = 100,
175
- CUDNN_ATTR_CONVOLUTION_CONV_MODE = 101,
176
- CUDNN_ATTR_CONVOLUTION_DILATIONS = 102,
177
- CUDNN_ATTR_CONVOLUTION_FILTER_STRIDES = 103,
178
- CUDNN_ATTR_CONVOLUTION_POST_PADDINGS = 104,
179
- CUDNN_ATTR_CONVOLUTION_PRE_PADDINGS = 105,
180
- CUDNN_ATTR_CONVOLUTION_SPATIAL_DIMS = 106,
181
-
182
- CUDNN_ATTR_ENGINEHEUR_MODE = 200,
183
- CUDNN_ATTR_ENGINEHEUR_OPERATION_GRAPH = 201,
184
- CUDNN_ATTR_ENGINEHEUR_RESULTS = 202,
185
- CUDNN_ATTR_ENGINEHEUR_SM_COUNT_TARGET = 203,
186
-
187
- CUDNN_ATTR_ENGINECFG_ENGINE = 300,
188
- CUDNN_ATTR_ENGINECFG_INTERMEDIATE_INFO = 301,
189
- CUDNN_ATTR_ENGINECFG_KNOB_CHOICES = 302,
190
-
191
- CUDNN_ATTR_EXECUTION_PLAN_HANDLE = 400,
192
- CUDNN_ATTR_EXECUTION_PLAN_ENGINE_CONFIG = 401,
193
- CUDNN_ATTR_EXECUTION_PLAN_WORKSPACE_SIZE = 402,
194
- CUDNN_ATTR_EXECUTION_PLAN_COMPUTED_INTERMEDIATE_UIDS = 403,
195
- CUDNN_ATTR_EXECUTION_PLAN_RUN_ONLY_INTERMEDIATE_UIDS = 404,
196
- CUDNN_ATTR_EXECUTION_PLAN_JSON_REPRESENTATION = 405,
197
-
198
- CUDNN_ATTR_INTERMEDIATE_INFO_UNIQUE_ID = 500,
199
- CUDNN_ATTR_INTERMEDIATE_INFO_SIZE = 501,
200
- CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_DATA_UIDS = 502,
201
- CUDNN_ATTR_INTERMEDIATE_INFO_DEPENDENT_ATTRIBUTES = 503,
202
-
203
- CUDNN_ATTR_KNOB_CHOICE_KNOB_TYPE = 600,
204
- CUDNN_ATTR_KNOB_CHOICE_KNOB_VALUE = 601,
205
-
206
- CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_ALPHA = 700,
207
- CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_BETA = 701,
208
- CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_CONV_DESC = 702,
209
- CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_W = 703,
210
- CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_X = 704,
211
- CUDNN_ATTR_OPERATION_CONVOLUTION_FORWARD_Y = 705,
212
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_ALPHA = 706,
213
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_BETA = 707,
214
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_CONV_DESC = 708,
215
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_W = 709,
216
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DX = 710,
217
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_DATA_DY = 711,
218
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_ALPHA = 712,
219
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_BETA = 713,
220
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_CONV_DESC = 714,
221
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DW = 715,
222
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_X = 716,
223
- CUDNN_ATTR_OPERATION_CONVOLUTION_BWD_FILTER_DY = 717,
224
-
225
- CUDNN_ATTR_OPERATION_POINTWISE_PW_DESCRIPTOR = 750,
226
- CUDNN_ATTR_OPERATION_POINTWISE_XDESC = 751,
227
- CUDNN_ATTR_OPERATION_POINTWISE_BDESC = 752,
228
- CUDNN_ATTR_OPERATION_POINTWISE_YDESC = 753,
229
- CUDNN_ATTR_OPERATION_POINTWISE_ALPHA1 = 754,
230
- CUDNN_ATTR_OPERATION_POINTWISE_ALPHA2 = 755,
231
- CUDNN_ATTR_OPERATION_POINTWISE_DXDESC = 756,
232
- CUDNN_ATTR_OPERATION_POINTWISE_DYDESC = 757,
233
- CUDNN_ATTR_OPERATION_POINTWISE_TDESC = 758,
234
-
235
- CUDNN_ATTR_OPERATION_GENSTATS_MODE = 770,
236
- CUDNN_ATTR_OPERATION_GENSTATS_MATH_PREC = 771,
237
- CUDNN_ATTR_OPERATION_GENSTATS_XDESC = 772,
238
- CUDNN_ATTR_OPERATION_GENSTATS_SUMDESC = 773,
239
- CUDNN_ATTR_OPERATION_GENSTATS_SQSUMDESC = 774,
240
-
241
- CUDNN_ATTR_OPERATION_BN_FINALIZE_STATS_MODE = 780,
242
- CUDNN_ATTR_OPERATION_BN_FINALIZE_MATH_PREC = 781,
243
- CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SUM_DESC = 782,
244
- CUDNN_ATTR_OPERATION_BN_FINALIZE_Y_SQ_SUM_DESC = 783,
245
- CUDNN_ATTR_OPERATION_BN_FINALIZE_SCALE_DESC = 784,
246
- CUDNN_ATTR_OPERATION_BN_FINALIZE_BIAS_DESC = 785,
247
- CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_MEAN_DESC = 786,
248
- CUDNN_ATTR_OPERATION_BN_FINALIZE_PREV_RUNNING_VAR_DESC = 787,
249
- CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_MEAN_DESC = 788,
250
- CUDNN_ATTR_OPERATION_BN_FINALIZE_UPDATED_RUNNING_VAR_DESC = 789,
251
- CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_MEAN_DESC = 790,
252
- CUDNN_ATTR_OPERATION_BN_FINALIZE_SAVED_INV_STD_DESC = 791,
253
- CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_SCALE_DESC = 792,
254
- CUDNN_ATTR_OPERATION_BN_FINALIZE_EQ_BIAS_DESC = 793,
255
- CUDNN_ATTR_OPERATION_BN_FINALIZE_ACCUM_COUNT_DESC = 794,
256
- CUDNN_ATTR_OPERATION_BN_FINALIZE_EPSILON_DESC = 795,
257
- CUDNN_ATTR_OPERATION_BN_FINALIZE_EXP_AVERATE_FACTOR_DESC = 796,
258
-
259
- CUDNN_ATTR_OPERATIONGRAPH_HANDLE = 800,
260
- CUDNN_ATTR_OPERATIONGRAPH_OPS = 801,
261
- CUDNN_ATTR_OPERATIONGRAPH_ENGINE_GLOBAL_COUNT = 802,
262
-
263
- CUDNN_ATTR_TENSOR_BYTE_ALIGNMENT = 900,
264
- CUDNN_ATTR_TENSOR_DATA_TYPE = 901,
265
- CUDNN_ATTR_TENSOR_DIMENSIONS = 902,
266
- CUDNN_ATTR_TENSOR_STRIDES = 903,
267
- CUDNN_ATTR_TENSOR_VECTOR_COUNT = 904,
268
- CUDNN_ATTR_TENSOR_VECTORIZED_DIMENSION = 905,
269
- CUDNN_ATTR_TENSOR_UNIQUE_ID = 906,
270
- CUDNN_ATTR_TENSOR_IS_VIRTUAL = 907,
271
- CUDNN_ATTR_TENSOR_IS_BY_VALUE = 908,
272
- CUDNN_ATTR_TENSOR_REORDERING_MODE = 909,
273
- CUDNN_ATTR_TENSOR_RAGGED_OFFSET_DESC = 913,
274
-
275
- CUDNN_ATTR_VARIANT_PACK_UNIQUE_IDS = 1000,
276
- CUDNN_ATTR_VARIANT_PACK_DATA_POINTERS = 1001,
277
- CUDNN_ATTR_VARIANT_PACK_INTERMEDIATES = 1002,
278
- CUDNN_ATTR_VARIANT_PACK_WORKSPACE = 1003,
279
-
280
- CUDNN_ATTR_LAYOUT_INFO_TENSOR_UID = 1100,
281
- CUDNN_ATTR_LAYOUT_INFO_TYPES = 1101,
282
-
283
- CUDNN_ATTR_KNOB_INFO_TYPE = 1200,
284
- CUDNN_ATTR_KNOB_INFO_MAXIMUM_VALUE = 1201,
285
- CUDNN_ATTR_KNOB_INFO_MINIMUM_VALUE = 1202,
286
- CUDNN_ATTR_KNOB_INFO_STRIDE = 1203,
287
-
288
- CUDNN_ATTR_ENGINE_OPERATION_GRAPH = 1300,
289
- CUDNN_ATTR_ENGINE_GLOBAL_INDEX = 1301,
290
- CUDNN_ATTR_ENGINE_KNOB_INFO = 1302,
291
- CUDNN_ATTR_ENGINE_NUMERICAL_NOTE = 1303,
292
- CUDNN_ATTR_ENGINE_LAYOUT_INFO = 1304,
293
- CUDNN_ATTR_ENGINE_BEHAVIOR_NOTE = 1305,
294
- CUDNN_ATTR_ENGINE_SM_COUNT_TARGET = 1306,
295
-
296
- CUDNN_ATTR_MATMUL_COMP_TYPE = 1500,
297
- CUDNN_ATTR_MATMUL_PADDING_VALUE = 1503,
298
-
299
- CUDNN_ATTR_OPERATION_MATMUL_ADESC = 1520,
300
- CUDNN_ATTR_OPERATION_MATMUL_BDESC = 1521,
301
- CUDNN_ATTR_OPERATION_MATMUL_CDESC = 1522,
302
- CUDNN_ATTR_OPERATION_MATMUL_DESC = 1523,
303
- CUDNN_ATTR_OPERATION_MATMUL_IRREGULARLY_STRIDED_BATCH_COUNT = 1524,
304
- CUDNN_ATTR_OPERATION_MATMUL_GEMM_M_OVERRIDE_DESC = 1525,
305
- CUDNN_ATTR_OPERATION_MATMUL_GEMM_N_OVERRIDE_DESC = 1526,
306
- CUDNN_ATTR_OPERATION_MATMUL_GEMM_K_OVERRIDE_DESC = 1527,
307
-
308
- CUDNN_ATTR_REDUCTION_OPERATOR = 1600,
309
- CUDNN_ATTR_REDUCTION_COMP_TYPE = 1601,
310
-
311
- CUDNN_ATTR_OPERATION_REDUCTION_XDESC = 1610,
312
- CUDNN_ATTR_OPERATION_REDUCTION_YDESC = 1611,
313
- CUDNN_ATTR_OPERATION_REDUCTION_DESC = 1612,
314
-
315
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MATH_PREC = 1620,
316
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_MEAN_DESC = 1621,
317
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_INVSTD_DESC = 1622,
318
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_BN_SCALE_DESC = 1623,
319
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_X_DESC = 1624,
320
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DY_DESC = 1625,
321
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_SCALE_DESC = 1626,
322
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_DBN_BIAS_DESC = 1627,
323
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_DY_SCALE_DESC = 1628,
324
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_X_SCALE_DESC = 1629,
325
- CUDNN_ATTR_OPERATION_BN_BWD_WEIGHTS_EQ_BIAS = 1630,
326
-
327
- CUDNN_ATTR_RESAMPLE_MODE = 1700,
328
- CUDNN_ATTR_RESAMPLE_COMP_TYPE = 1701,
329
- CUDNN_ATTR_RESAMPLE_SPATIAL_DIMS = 1702,
330
- CUDNN_ATTR_RESAMPLE_POST_PADDINGS = 1703,
331
- CUDNN_ATTR_RESAMPLE_PRE_PADDINGS = 1704,
332
- CUDNN_ATTR_RESAMPLE_STRIDES = 1705,
333
- CUDNN_ATTR_RESAMPLE_WINDOW_DIMS = 1706,
334
- CUDNN_ATTR_RESAMPLE_NAN_PROPAGATION = 1707,
335
- CUDNN_ATTR_RESAMPLE_PADDING_MODE = 1708,
336
-
337
- CUDNN_ATTR_OPERATION_RESAMPLE_FWD_XDESC = 1710,
338
- CUDNN_ATTR_OPERATION_RESAMPLE_FWD_YDESC = 1711,
339
- CUDNN_ATTR_OPERATION_RESAMPLE_FWD_IDXDESC = 1712,
340
- CUDNN_ATTR_OPERATION_RESAMPLE_FWD_ALPHA = 1713,
341
- CUDNN_ATTR_OPERATION_RESAMPLE_FWD_BETA = 1714,
342
- CUDNN_ATTR_OPERATION_RESAMPLE_FWD_DESC = 1716,
343
-
344
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DXDESC = 1720,
345
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DYDESC = 1721,
346
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_IDXDESC = 1722,
347
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_ALPHA = 1723,
348
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_BETA = 1724,
349
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_DESC = 1725,
350
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_XDESC = 1726,
351
- CUDNN_ATTR_OPERATION_RESAMPLE_BWD_YDESC = 1727,
352
-
353
- CUDNN_ATTR_OPERATION_CONCAT_AXIS = 1800,
354
- CUDNN_ATTR_OPERATION_CONCAT_INPUT_DESCS = 1801,
355
- CUDNN_ATTR_OPERATION_CONCAT_INPLACE_INDEX = 1802,
356
- CUDNN_ATTR_OPERATION_CONCAT_OUTPUT_DESC = 1803,
357
-
358
- CUDNN_ATTR_OPERATION_SIGNAL_MODE = 1900,
359
- CUDNN_ATTR_OPERATION_SIGNAL_FLAGDESC = 1901,
360
- CUDNN_ATTR_OPERATION_SIGNAL_VALUE = 1902,
361
- CUDNN_ATTR_OPERATION_SIGNAL_XDESC = 1903,
362
- CUDNN_ATTR_OPERATION_SIGNAL_YDESC = 1904,
363
-
364
- CUDNN_ATTR_OPERATION_NORM_FWD_MODE = 2000,
365
- CUDNN_ATTR_OPERATION_NORM_FWD_PHASE = 2001,
366
- CUDNN_ATTR_OPERATION_NORM_FWD_XDESC = 2002,
367
- CUDNN_ATTR_OPERATION_NORM_FWD_MEAN_DESC = 2003,
368
- CUDNN_ATTR_OPERATION_NORM_FWD_INV_VARIANCE_DESC = 2004,
369
- CUDNN_ATTR_OPERATION_NORM_FWD_SCALE_DESC = 2005,
370
- CUDNN_ATTR_OPERATION_NORM_FWD_BIAS_DESC = 2006,
371
- CUDNN_ATTR_OPERATION_NORM_FWD_EPSILON_DESC = 2007,
372
- CUDNN_ATTR_OPERATION_NORM_FWD_EXP_AVG_FACTOR_DESC = 2008,
373
- CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_MEAN_DESC = 2009,
374
- CUDNN_ATTR_OPERATION_NORM_FWD_INPUT_RUNNING_VAR_DESC = 2010,
375
- CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_MEAN_DESC = 2011,
376
- CUDNN_ATTR_OPERATION_NORM_FWD_OUTPUT_RUNNING_VAR_DESC = 2012,
377
- CUDNN_ATTR_OPERATION_NORM_FWD_YDESC = 2013,
378
- CUDNN_ATTR_OPERATION_NORM_FWD_PEER_STAT_DESCS = 2014,
379
-
380
- CUDNN_ATTR_OPERATION_NORM_BWD_MODE = 2100,
381
- CUDNN_ATTR_OPERATION_NORM_BWD_XDESC = 2101,
382
- CUDNN_ATTR_OPERATION_NORM_BWD_MEAN_DESC = 2102,
383
- CUDNN_ATTR_OPERATION_NORM_BWD_INV_VARIANCE_DESC = 2103,
384
- CUDNN_ATTR_OPERATION_NORM_BWD_DYDESC = 2104,
385
- CUDNN_ATTR_OPERATION_NORM_BWD_SCALE_DESC = 2105,
386
- CUDNN_ATTR_OPERATION_NORM_BWD_EPSILON_DESC = 2106,
387
- CUDNN_ATTR_OPERATION_NORM_BWD_DSCALE_DESC = 2107,
388
- CUDNN_ATTR_OPERATION_NORM_BWD_DBIAS_DESC = 2108,
389
- CUDNN_ATTR_OPERATION_NORM_BWD_DXDESC = 2109,
390
- CUDNN_ATTR_OPERATION_NORM_BWD_PEER_STAT_DESCS = 2110,
391
-
392
- CUDNN_ATTR_OPERATION_RESHAPE_XDESC = 2200,
393
- CUDNN_ATTR_OPERATION_RESHAPE_YDESC = 2201,
394
-
395
- CUDNN_ATTR_RNG_DISTRIBUTION = 2300,
396
- CUDNN_ATTR_RNG_NORMAL_DIST_MEAN = 2301,
397
- CUDNN_ATTR_RNG_NORMAL_DIST_STANDARD_DEVIATION = 2302,
398
- CUDNN_ATTR_RNG_UNIFORM_DIST_MAXIMUM = 2303,
399
- CUDNN_ATTR_RNG_UNIFORM_DIST_MINIMUM = 2304,
400
- CUDNN_ATTR_RNG_BERNOULLI_DIST_PROBABILITY = 2305,
401
-
402
- CUDNN_ATTR_OPERATION_RNG_YDESC = 2310,
403
- CUDNN_ATTR_OPERATION_RNG_SEED = 2311,
404
- CUDNN_ATTR_OPERATION_RNG_DESC = 2312,
405
- CUDNN_ATTR_OPERATION_RNG_OFFSET_DESC = 2313,
406
-
407
- } cudnnBackendAttributeName_t;
408
-
409
- typedef enum {
410
- CUDNN_TYPE_HANDLE = 0,
411
- CUDNN_TYPE_DATA_TYPE,
412
- CUDNN_TYPE_BOOLEAN,
413
- CUDNN_TYPE_INT64,
414
- CUDNN_TYPE_FLOAT,
415
- CUDNN_TYPE_DOUBLE,
416
- CUDNN_TYPE_VOID_PTR,
417
- CUDNN_TYPE_CONVOLUTION_MODE,
418
- CUDNN_TYPE_HEUR_MODE,
419
- CUDNN_TYPE_KNOB_TYPE,
420
- CUDNN_TYPE_NAN_PROPOGATION,
421
- CUDNN_TYPE_NUMERICAL_NOTE,
422
- CUDNN_TYPE_LAYOUT_TYPE,
423
- CUDNN_TYPE_ATTRIB_NAME,
424
- CUDNN_TYPE_POINTWISE_MODE,
425
- CUDNN_TYPE_BACKEND_DESCRIPTOR,
426
- CUDNN_TYPE_GENSTATS_MODE,
427
- CUDNN_TYPE_BN_FINALIZE_STATS_MODE,
428
- CUDNN_TYPE_REDUCTION_OPERATOR_TYPE,
429
- CUDNN_TYPE_BEHAVIOR_NOTE,
430
- CUDNN_TYPE_TENSOR_REORDERING_MODE,
431
- CUDNN_TYPE_RESAMPLE_MODE,
432
- CUDNN_TYPE_PADDING_MODE,
433
- CUDNN_TYPE_INT32,
434
- CUDNN_TYPE_CHAR,
435
- CUDNN_TYPE_SIGNAL_MODE,
436
- CUDNN_TYPE_FRACTION,
437
- CUDNN_TYPE_NORM_MODE,
438
- CUDNN_TYPE_NORM_FWD_PHASE,
439
- CUDNN_TYPE_RNG_DISTRIBUTION
440
- } cudnnBackendAttributeType_t;
441
-
442
- typedef enum {
443
- CUDNN_BACKEND_POINTWISE_DESCRIPTOR = 0,
444
- CUDNN_BACKEND_CONVOLUTION_DESCRIPTOR,
445
- CUDNN_BACKEND_ENGINE_DESCRIPTOR,
446
- CUDNN_BACKEND_ENGINECFG_DESCRIPTOR,
447
- CUDNN_BACKEND_ENGINEHEUR_DESCRIPTOR,
448
- CUDNN_BACKEND_EXECUTION_PLAN_DESCRIPTOR,
449
- CUDNN_BACKEND_INTERMEDIATE_INFO_DESCRIPTOR,
450
- CUDNN_BACKEND_KNOB_CHOICE_DESCRIPTOR,
451
- CUDNN_BACKEND_KNOB_INFO_DESCRIPTOR,
452
- CUDNN_BACKEND_LAYOUT_INFO_DESCRIPTOR,
453
- CUDNN_BACKEND_OPERATION_CONVOLUTION_FORWARD_DESCRIPTOR,
454
- CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_FILTER_DESCRIPTOR,
455
- CUDNN_BACKEND_OPERATION_CONVOLUTION_BACKWARD_DATA_DESCRIPTOR,
456
- CUDNN_BACKEND_OPERATION_POINTWISE_DESCRIPTOR,
457
- CUDNN_BACKEND_OPERATION_GEN_STATS_DESCRIPTOR,
458
- CUDNN_BACKEND_OPERATIONGRAPH_DESCRIPTOR,
459
- CUDNN_BACKEND_VARIANT_PACK_DESCRIPTOR,
460
- CUDNN_BACKEND_TENSOR_DESCRIPTOR,
461
- CUDNN_BACKEND_MATMUL_DESCRIPTOR,
462
- CUDNN_BACKEND_OPERATION_MATMUL_DESCRIPTOR,
463
- CUDNN_BACKEND_OPERATION_BN_FINALIZE_STATISTICS_DESCRIPTOR,
464
- CUDNN_BACKEND_REDUCTION_DESCRIPTOR,
465
- CUDNN_BACKEND_OPERATION_REDUCTION_DESCRIPTOR,
466
- CUDNN_BACKEND_OPERATION_BN_BWD_WEIGHTS_DESCRIPTOR,
467
- CUDNN_BACKEND_RESAMPLE_DESCRIPTOR,
468
- CUDNN_BACKEND_OPERATION_RESAMPLE_FWD_DESCRIPTOR,
469
- CUDNN_BACKEND_OPERATION_RESAMPLE_BWD_DESCRIPTOR,
470
- CUDNN_BACKEND_OPERATION_CONCAT_DESCRIPTOR,
471
- CUDNN_BACKEND_OPERATION_SIGNAL_DESCRIPTOR,
472
- CUDNN_BACKEND_OPERATION_NORM_FORWARD_DESCRIPTOR,
473
- CUDNN_BACKEND_OPERATION_NORM_BACKWARD_DESCRIPTOR,
474
- CUDNN_BACKEND_OPERATION_RESHAPE_DESCRIPTOR,
475
- CUDNN_BACKEND_RNG_DESCRIPTOR,
476
- CUDNN_BACKEND_OPERATION_RNG_DESCRIPTOR
477
- } cudnnBackendDescriptorType_t;
478
-
479
- typedef enum {
480
- CUDNN_NUMERICAL_NOTE_TENSOR_CORE = 0,
481
- CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS,
482
- CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION,
483
- CUDNN_NUMERICAL_NOTE_FFT,
484
- CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC,
485
- CUDNN_NUMERICAL_NOTE_WINOGRAD,
486
- CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4,
487
- CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6,
488
- CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13,
489
- CUDNN_NUMERICAL_NOTE_TYPE_COUNT,
490
- } cudnnBackendNumericalNote_t;
491
-
492
- typedef enum {
493
- CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION = 0,
494
- CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER = 1,
495
- CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER = 2,
496
- CUDNN_BEHAVIOR_NOTE_TYPE_COUNT,
497
- } cudnnBackendBehaviorNote_t;
498
-
499
- typedef enum {
500
- CUDNN_KNOB_TYPE_SPLIT_K = 0,
501
- CUDNN_KNOB_TYPE_SWIZZLE = 1,
502
- CUDNN_KNOB_TYPE_TILE_SIZE = 2,
503
- CUDNN_KNOB_TYPE_USE_TEX = 3,
504
- CUDNN_KNOB_TYPE_EDGE = 4,
505
- CUDNN_KNOB_TYPE_KBLOCK = 5,
506
- CUDNN_KNOB_TYPE_LDGA = 6,
507
- CUDNN_KNOB_TYPE_LDGB = 7,
508
- CUDNN_KNOB_TYPE_CHUNK_K = 8,
509
- CUDNN_KNOB_TYPE_SPLIT_H = 9,
510
- CUDNN_KNOB_TYPE_WINO_TILE = 10,
511
- CUDNN_KNOB_TYPE_MULTIPLY = 11,
512
- CUDNN_KNOB_TYPE_SPLIT_K_BUF = 12,
513
- CUDNN_KNOB_TYPE_TILEK = 13,
514
- CUDNN_KNOB_TYPE_STAGES = 14,
515
- CUDNN_KNOB_TYPE_REDUCTION_MODE = 15,
516
- CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE = 16,
517
- CUDNN_KNOB_TYPE_SPLIT_K_SLC = 17,
518
- CUDNN_KNOB_TYPE_IDX_MODE = 18,
519
- CUDNN_KNOB_TYPE_SLICED = 19,
520
- CUDNN_KNOB_TYPE_SPLIT_RS = 20,
521
- CUDNN_KNOB_TYPE_SINGLEBUFFER = 21,
522
- CUDNN_KNOB_TYPE_LDGC = 22,
523
- CUDNN_KNOB_TYPE_SPECFILT = 23,
524
- CUDNN_KNOB_TYPE_KERNEL_CFG = 24,
525
- CUDNN_KNOB_TYPE_WORKSPACE = 25,
526
- CUDNN_KNOB_TYPE_TILE_CGA = 26,
527
- CUDNN_KNOB_TYPE_TILE_CGA_M = 27,
528
- CUDNN_KNOB_TYPE_TILE_CGA_N = 28,
529
- CUDNN_KNOB_TYPE_BLOCK_SIZE = 29,
530
- CUDNN_KNOB_TYPE_OCCUPANCY = 30,
531
- CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD = 31,
532
- CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK = 32,
533
- CUDNN_KNOB_TYPE_SPLIT_COLS = 33,
534
- CUDNN_KNOB_TYPE_TILE_ROWS = 34,
535
- CUDNN_KNOB_TYPE_TILE_COLS = 35,
536
- CUDNN_KNOB_TYPE_LOAD_SIZE = 36,
537
- CUDNN_KNOB_TYPE_COUNTS,
538
- } cudnnBackendKnobType_t;
539
-
540
- typedef enum {
541
- CUDNN_LAYOUT_TYPE_PREFERRED_NCHW = 0,
542
- CUDNN_LAYOUT_TYPE_PREFERRED_NHWC = 1,
543
- CUDNN_LAYOUT_TYPE_PREFERRED_PAD4CK = 2,
544
- CUDNN_LAYOUT_TYPE_PREFERRED_PAD8CK = 3,
545
- CUDNN_LAYOUT_TYPE_COUNT = 4,
546
- } cudnnBackendLayoutType_t;
547
-
548
- typedef enum {
549
- CUDNN_HEUR_MODE_INSTANT = 0,
550
- CUDNN_HEUR_MODE_B = 1,
551
- CUDNN_HEUR_MODE_FALLBACK = 2,
552
- CUDNN_HEUR_MODE_A = 3,
553
- CUDNN_HEUR_MODES_COUNT = 4,
554
- } cudnnBackendHeurMode_t;
555
-
556
- typedef enum {
557
- CUDNN_TENSOR_REORDERING_NONE = 0,
558
- CUDNN_TENSOR_REORDERING_INT8x32 = 1,
559
- CUDNN_TENSOR_REORDERING_F16x16 = 2,
560
- } cudnnBackendTensorReordering_t;
561
-
562
- typedef enum {
563
- CUDNN_ZERO_PAD = 0,
564
- CUDNN_NEG_INF_PAD = 1,
565
- CUDNN_EDGE_VAL_PAD = 2,
566
- } cudnnPaddingMode_t;
567
-
568
- typedef enum {
569
- CUDNN_LAYER_NORM = 0,
570
- CUDNN_INSTANCE_NORM = 1,
571
- CUDNN_BATCH_NORM = 2,
572
- CUDNN_GROUP_NORM = 3,
573
- CUDNN_RMS_NORM = 4,
574
- } cudnnBackendNormMode_t;
575
-
576
- typedef enum {
577
- CUDNN_NORM_FWD_INFERENCE = 0,
578
- CUDNN_NORM_FWD_TRAINING = 1,
579
- } cudnnBackendNormFwdPhase_t;
580
-
581
- cudnnStatus_t CUDNNWINAPI
582
- cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, cudnnBackendDescriptor_t *descriptor);
583
-
584
- cudnnStatus_t CUDNNWINAPI
585
- cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor);
586
-
587
- cudnnStatus_t CUDNNWINAPI
588
- cudnnBackendInitialize(cudnnBackendDescriptor_t descriptor);
589
-
590
- cudnnStatus_t CUDNNWINAPI
591
- cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor);
592
-
593
- cudnnStatus_t CUDNNWINAPI
594
- cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor,
595
- cudnnBackendAttributeName_t attributeName,
596
- cudnnBackendAttributeType_t attributeType,
597
- int64_t elementCount,
598
- const void *arrayOfElements);
599
-
600
- cudnnStatus_t CUDNNWINAPI
601
- cudnnBackendGetAttribute(cudnnBackendDescriptor_t const descriptor,
602
- cudnnBackendAttributeName_t attributeName,
603
- cudnnBackendAttributeType_t attributeType,
604
- int64_t requestedElementCount,
605
- int64_t *elementCount,
606
- void *arrayOfElements);
607
-
608
- cudnnStatus_t CUDNNWINAPI
609
- cudnnBackendExecute(cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, cudnnBackendDescriptor_t variantPack);
610
-
611
- #if defined(__cplusplus)
612
- }
613
- #endif
58
+ #include "cudnn_graph.h"
614
59
 
615
60
  #endif /* _CUDNN_BACKEND_H_ */