mindspore 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.10__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (149) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_akg/akg/composite/build_module.py +9 -15
  3. mindspore/_akg/akg/utils/ascend_profilier/__init__.py +0 -0
  4. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  5. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  6. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  7. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  8. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  9. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  10. mindspore/_akg/akg/utils/kernel_exec.py +41 -15
  11. mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
  12. mindspore/_akg/akg/utils/util.py +38 -0
  13. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  14. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  15. mindspore/_checkparam.py +3 -3
  16. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  17. mindspore/_extends/graph_kernel/splitter.py +3 -2
  18. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
  19. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
  20. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  21. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
  22. mindspore/_extends/parse/standard_method.py +2 -9
  23. mindspore/_extends/remote/kernel_build_server.py +2 -1
  24. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  25. mindspore/bin/cache_admin +0 -0
  26. mindspore/bin/cache_server +0 -0
  27. mindspore/common/api.py +1 -1
  28. mindspore/common/auto_dynamic_shape.py +81 -85
  29. mindspore/common/dump.py +1 -1
  30. mindspore/common/tensor.py +3 -20
  31. mindspore/config/op_info.config +1 -1
  32. mindspore/context.py +11 -4
  33. mindspore/dataset/engine/datasets_standard_format.py +5 -0
  34. mindspore/dataset/vision/transforms.py +21 -21
  35. mindspore/experimental/optim/adam.py +1 -1
  36. mindspore/gen_ops.py +1 -1
  37. mindspore/include/api/model.h +17 -0
  38. mindspore/include/api/status.h +8 -3
  39. mindspore/lib/libdnnl.so.2 +0 -0
  40. mindspore/lib/libmindspore.so +0 -0
  41. mindspore/lib/libmindspore_backend.so +0 -0
  42. mindspore/lib/libmindspore_common.so +0 -0
  43. mindspore/lib/libmindspore_core.so +0 -0
  44. mindspore/lib/libmindspore_glog.so.0 +0 -0
  45. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  46. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  47. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  48. mindspore/lib/libmindspore_shared_lib.so +0 -0
  49. mindspore/lib/libnnacl.so +0 -0
  50. mindspore/lib/libopencv_core.so.4.5 +0 -0
  51. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  52. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  53. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  54. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  55. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  56. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  57. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  58. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  59. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  60. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  61. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  62. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  63. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  64. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  65. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  66. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  67. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  68. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8 -80
  69. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  70. mindspore/lib/plugin/ascend/libakg.so +0 -0
  71. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  72. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  73. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  74. mindspore/lib/plugin/cpu/libakg.so +0 -0
  75. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  76. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  77. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  78. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  79. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  80. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  81. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  82. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  83. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  84. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  85. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  86. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  87. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  88. mindspore/nn/cell.py +0 -3
  89. mindspore/nn/layer/activation.py +4 -5
  90. mindspore/nn/layer/conv.py +39 -23
  91. mindspore/nn/layer/flash_attention.py +90 -78
  92. mindspore/nn/layer/math.py +3 -7
  93. mindspore/nn/layer/rnn_cells.py +5 -5
  94. mindspore/nn/wrap/cell_wrapper.py +6 -0
  95. mindspore/numpy/utils_const.py +5 -5
  96. mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
  97. mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
  98. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
  99. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  100. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  101. mindspore/ops/_utils/utils.py +2 -0
  102. mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
  103. mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
  104. mindspore/ops/function/array_func.py +10 -7
  105. mindspore/ops/function/grad/grad_func.py +0 -1
  106. mindspore/ops/function/nn_func.py +98 -9
  107. mindspore/ops/function/random_func.py +2 -1
  108. mindspore/ops/op_info_register.py +24 -21
  109. mindspore/ops/operations/__init__.py +3 -2
  110. mindspore/ops/operations/_grad_ops.py +24 -4
  111. mindspore/ops/operations/_inner_ops.py +155 -23
  112. mindspore/ops/operations/array_ops.py +9 -7
  113. mindspore/ops/operations/comm_ops.py +2 -2
  114. mindspore/ops/operations/custom_ops.py +85 -68
  115. mindspore/ops/operations/inner_ops.py +26 -3
  116. mindspore/ops/operations/math_ops.py +4 -3
  117. mindspore/ops/operations/nn_ops.py +109 -28
  118. mindspore/parallel/_parallel_serialization.py +10 -3
  119. mindspore/parallel/_tensor.py +4 -1
  120. mindspore/parallel/checkpoint_transform.py +13 -2
  121. mindspore/parallel/shard.py +17 -10
  122. mindspore/profiler/common/util.py +1 -0
  123. mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
  124. mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
  125. mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
  126. mindspore/profiler/parser/ascend_op_generator.py +1 -1
  127. mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
  128. mindspore/profiler/parser/base_timeline_generator.py +1 -1
  129. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
  130. mindspore/profiler/parser/framework_parser.py +1 -1
  131. mindspore/profiler/parser/profiler_info.py +19 -0
  132. mindspore/profiler/profiling.py +46 -24
  133. mindspore/rewrite/api/pattern_engine.py +1 -1
  134. mindspore/rewrite/parsers/for_parser.py +1 -1
  135. mindspore/rewrite/symbol_tree.py +1 -4
  136. mindspore/run_check/_check_version.py +5 -3
  137. mindspore/safeguard/rewrite_obfuscation.py +52 -28
  138. mindspore/train/callback/_summary_collector.py +1 -1
  139. mindspore/train/dataset_helper.py +1 -0
  140. mindspore/train/model.py +2 -2
  141. mindspore/train/serialization.py +97 -11
  142. mindspore/train/summary/_summary_adapter.py +1 -1
  143. mindspore/train/summary/summary_record.py +23 -7
  144. mindspore/version.py +1 -1
  145. {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +1 -1
  146. {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +149 -129
  147. {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
  148. {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -0
  149. {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
@@ -136,6 +136,13 @@ class MS_API Model {
136
136
  /// \return Status.
137
137
  Status UpdateWeights(const std::vector<MSTensor> &new_weights);
138
138
 
139
+ /// \brief Change the size and or content of weight tensors
140
+ ///
141
+ /// \param[in] A vector where model constant are arranged in sequence
142
+ ///
143
+ /// \return Status.
144
+ Status UpdateWeights(const std::vector<std::vector<MSTensor>> &new_weights);
145
+
139
146
  /// \brief Inference model API. If use this API in train mode, it's equal to RunStep API.
140
147
  ///
141
148
  /// \param[in] inputs A vector where model inputs are arranged in sequence.
@@ -358,6 +365,13 @@ class MS_API Model {
358
365
 
359
366
  const std::shared_ptr<ModelImpl> impl() const { return impl_; }
360
367
 
368
+ /// \brief Get model info by key
369
+ ///
370
+ /// \param[in] key The key of model info key-value pair
371
+ ///
372
+ /// \return The value of the model info associated with the given key.
373
+ inline std::string GetModelInfo(const std::string &key);
374
+
361
375
  private:
362
376
  friend class Serialization;
363
377
  // api without std::string
@@ -374,6 +388,7 @@ class MS_API Model {
374
388
  const std::vector<char> &cropto_lib_path);
375
389
  Status Build(const std::vector<char> &model_path, ModelType model_type, const std::shared_ptr<Context> &model_context,
376
390
  const Key &dec_key, const std::vector<char> &dec_mode, const std::vector<char> &cropto_lib_path);
391
+ std::vector<char> GetModelInfo(const std::vector<char> &key);
377
392
  std::shared_ptr<ModelImpl> impl_;
378
393
  };
379
394
 
@@ -416,5 +431,7 @@ Status Model::Build(const std::string &model_path, ModelType model_type,
416
431
  const std::shared_ptr<Context> &model_context) {
417
432
  return Build(StringToChar(model_path), model_type, model_context);
418
433
  }
434
+
435
+ inline std::string Model::GetModelInfo(const std::string &key) { return CharToString(GetModelInfo(StringToChar(key))); }
419
436
  } // namespace mindspore
420
437
  #endif // MINDSPORE_INCLUDE_API_MODEL_H
@@ -83,9 +83,14 @@ enum StatusCode : uint32_t {
83
83
  kLiteModelRebuild = kLite | (0x0FFFFFFF & -12), /**< Model has been built. */
84
84
 
85
85
  // Executor error code, range: [-100,-200)
86
- kLiteOutOfTensorRange = kLite | (0x0FFFFFFF & -100), /**< Failed to check range. */
87
- kLiteInputTensorError = kLite | (0x0FFFFFFF & -101), /**< Failed to check input tensor. */
88
- kLiteReentrantError = kLite | (0x0FFFFFFF & -102), /**< Exist executor running. */
86
+ kLiteOutOfTensorRange = kLite | (0x0FFFFFFF & -100), /**< Failed to check range. */
87
+ kLiteInputTensorError = kLite | (0x0FFFFFFF & -101), /**< Failed to check input tensor. */
88
+ kLiteReentrantError = kLite | (0x0FFFFFFF & -102), /**< Exist executor running. */
89
+ kLiteLLMWaitProcessTimeOut = kLite | (0x0FFFFFFF & -103), /**< Wait to be processed time out. */
90
+ kLiteLLMKVCacheNotExist = kLite | (0x0FFFFFFF & -104), /**< KV Cache not exist. */
91
+ kLiteLLMRepeatRequest = kLite | (0x0FFFFFFF & -105), /**< repeat request. */
92
+ kLiteLLMRequestAlreadyCompleted = kLite | (0x0FFFFFFF & -106), /**< request already complete!. */
93
+ kLiteLLMEngineFinalized = kLite | (0x0FFFFFFF & -107), /**< llm engine finalized. */
89
94
 
90
95
  // Graph error code, range: [-200,-300)
91
96
  kLiteGraphFileError = kLite | (0x0FFFFFFF & -200), /**< Failed to verify graph file. */
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
mindspore/lib/libnnacl.so CHANGED
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,123 @@
1
+ {
2
+ "AddDSL":{
3
+ "input0":{
4
+ "dtype":"float16",
5
+ "format":"NCHW",
6
+ "name":"x1",
7
+ "paramType":"required"
8
+ },
9
+ "input1":{
10
+ "dtype":"float16",
11
+ "format":"NCHW",
12
+ "name":"x2",
13
+ "paramType":"required"
14
+ },
15
+ "opFile":{
16
+ "value":"add_dsl"
17
+ },
18
+ "opInterface":{
19
+ "value":"add_dsl"
20
+ },
21
+ "output0":{
22
+ "dtype":"float16",
23
+ "format":"NCHW",
24
+ "name":"y",
25
+ "paramType":"required"
26
+ }
27
+ },
28
+ "AddTik":{
29
+ "input0":{
30
+ "dtype":"float16",
31
+ "format":"ND",
32
+ "name":"x1",
33
+ "paramType":"required"
34
+ },
35
+ "input1":{
36
+ "dtype":"float16",
37
+ "format":"ND",
38
+ "name":"x2",
39
+ "paramType":"required"
40
+ },
41
+ "opFile":{
42
+ "value":"add_tik"
43
+ },
44
+ "opInterface":{
45
+ "value":"add_tik"
46
+ },
47
+ "output0":{
48
+ "dtype":"float16",
49
+ "format":"ND",
50
+ "name":"y",
51
+ "paramType":"required"
52
+ }
53
+ },
54
+ "FlashAttention":{
55
+ "input0":{
56
+ "dtype":"float16,float32,int32",
57
+ "format":"ND,ND,ND",
58
+ "name":"q",
59
+ "paramType":"required"
60
+ },
61
+ "input1":{
62
+ "dtype":"float16,float32,int32",
63
+ "format":"ND,ND,ND",
64
+ "name":"k",
65
+ "paramType":"required"
66
+ },
67
+ "input2":{
68
+ "dtype":"float16,float32,int32",
69
+ "format":"ND,ND,ND",
70
+ "name":"v",
71
+ "paramType":"required"
72
+ },
73
+ "input3":{
74
+ "dtype":"float16,float32,int32",
75
+ "format":"ND,ND,ND",
76
+ "name":"attention_mask",
77
+ "paramType":"required"
78
+ },
79
+ "opFile":{
80
+ "value":"flash_attention"
81
+ },
82
+ "opInterface":{
83
+ "value":"flash_attention"
84
+ },
85
+ "output0":{
86
+ "dtype":"float16,float32,int32",
87
+ "format":"ND,ND,ND",
88
+ "name":"y",
89
+ "paramType":"required"
90
+ }
91
+ },
92
+ "MatmulTik":{
93
+ "input0":{
94
+ "dtype":"int8,uint8,float16",
95
+ "format":"ND,ND,ND",
96
+ "name":"x1",
97
+ "needCompile":"false",
98
+ "paramType":"required",
99
+ "shape":"all"
100
+ },
101
+ "input1":{
102
+ "dtype":"int8,int8,float16",
103
+ "format":"ND,ND,ND",
104
+ "name":"x2",
105
+ "needCompile":"false",
106
+ "paramType":"required",
107
+ "shape":"all"
108
+ },
109
+ "opFile":{
110
+ "value":"matmul_tik"
111
+ },
112
+ "opInterface":{
113
+ "value":"matmul_tik"
114
+ },
115
+ "output0":{
116
+ "dtype":"int32,int32,float",
117
+ "format":"ND,ND,ND",
118
+ "name":"y",
119
+ "paramType":"required",
120
+ "shape":"all"
121
+ }
122
+ }
123
+ }
@@ -0,0 +1,123 @@
1
+ {
2
+ "AddDSL":{
3
+ "input0":{
4
+ "dtype":"float16",
5
+ "format":"NCHW",
6
+ "name":"x1",
7
+ "paramType":"required"
8
+ },
9
+ "input1":{
10
+ "dtype":"float16",
11
+ "format":"NCHW",
12
+ "name":"x2",
13
+ "paramType":"required"
14
+ },
15
+ "opFile":{
16
+ "value":"add_dsl"
17
+ },
18
+ "opInterface":{
19
+ "value":"add_dsl"
20
+ },
21
+ "output0":{
22
+ "dtype":"float16",
23
+ "format":"NCHW",
24
+ "name":"y",
25
+ "paramType":"required"
26
+ }
27
+ },
28
+ "AddTik":{
29
+ "input0":{
30
+ "dtype":"float16",
31
+ "format":"ND",
32
+ "name":"x1",
33
+ "paramType":"required"
34
+ },
35
+ "input1":{
36
+ "dtype":"float16",
37
+ "format":"ND",
38
+ "name":"x2",
39
+ "paramType":"required"
40
+ },
41
+ "opFile":{
42
+ "value":"add_tik"
43
+ },
44
+ "opInterface":{
45
+ "value":"add_tik"
46
+ },
47
+ "output0":{
48
+ "dtype":"float16",
49
+ "format":"ND",
50
+ "name":"y",
51
+ "paramType":"required"
52
+ }
53
+ },
54
+ "FlashAttention":{
55
+ "input0":{
56
+ "dtype":"float16,float32,int32",
57
+ "format":"ND,ND,ND",
58
+ "name":"q",
59
+ "paramType":"required"
60
+ },
61
+ "input1":{
62
+ "dtype":"float16,float32,int32",
63
+ "format":"ND,ND,ND",
64
+ "name":"k",
65
+ "paramType":"required"
66
+ },
67
+ "input2":{
68
+ "dtype":"float16,float32,int32",
69
+ "format":"ND,ND,ND",
70
+ "name":"v",
71
+ "paramType":"required"
72
+ },
73
+ "input3":{
74
+ "dtype":"float16,float32,int32",
75
+ "format":"ND,ND,ND",
76
+ "name":"attention_mask",
77
+ "paramType":"required"
78
+ },
79
+ "opFile":{
80
+ "value":"flash_attention"
81
+ },
82
+ "opInterface":{
83
+ "value":"flash_attention"
84
+ },
85
+ "output0":{
86
+ "dtype":"float16,float32,int32",
87
+ "format":"ND,ND,ND",
88
+ "name":"y",
89
+ "paramType":"required"
90
+ }
91
+ },
92
+ "MatmulTik":{
93
+ "input0":{
94
+ "dtype":"int8,uint8,float16",
95
+ "format":"ND,ND,ND",
96
+ "name":"x1",
97
+ "needCompile":"false",
98
+ "paramType":"required",
99
+ "shape":"all"
100
+ },
101
+ "input1":{
102
+ "dtype":"int8,int8,float16",
103
+ "format":"ND,ND,ND",
104
+ "name":"x2",
105
+ "needCompile":"false",
106
+ "paramType":"required",
107
+ "shape":"all"
108
+ },
109
+ "opFile":{
110
+ "value":"matmul_tik"
111
+ },
112
+ "opInterface":{
113
+ "value":"matmul_tik"
114
+ },
115
+ "output0":{
116
+ "dtype":"int32,int32,float",
117
+ "format":"ND,ND,ND",
118
+ "name":"y",
119
+ "paramType":"required",
120
+ "shape":"all"
121
+ }
122
+ }
123
+ }
@@ -0,0 +1,158 @@
1
+ {
2
+ "AddDSL":{
3
+ "input0":{
4
+ "dtype":"float16,float32,int32",
5
+ "format":"ND,ND,ND",
6
+ "name":"x1",
7
+ "paramType":"required"
8
+ },
9
+ "input1":{
10
+ "dtype":"float16,float32,int32",
11
+ "format":"ND,ND,ND",
12
+ "name":"x2",
13
+ "paramType":"required"
14
+ },
15
+ "opFile":{
16
+ "value":"add_dsl"
17
+ },
18
+ "opInterface":{
19
+ "value":"add_dsl"
20
+ },
21
+ "output0":{
22
+ "dtype":"float16,float32,int32",
23
+ "format":"ND,ND,ND",
24
+ "name":"y",
25
+ "paramType":"required"
26
+ }
27
+ },
28
+ "AddTik":{
29
+ "input0":{
30
+ "dtype":"float16,float32,int32",
31
+ "format":"ND,ND,ND",
32
+ "name":"x1",
33
+ "paramType":"required"
34
+ },
35
+ "input1":{
36
+ "dtype":"float16,float32,int32",
37
+ "format":"ND,ND,ND",
38
+ "name":"x2",
39
+ "paramType":"required"
40
+ },
41
+ "opFile":{
42
+ "value":"add_tik"
43
+ },
44
+ "opInterface":{
45
+ "value":"add_tik"
46
+ },
47
+ "output0":{
48
+ "dtype":"float16,float32,int32",
49
+ "format":"ND,ND,ND",
50
+ "name":"y",
51
+ "paramType":"required"
52
+ }
53
+ },
54
+ "FlashAttention":{
55
+ "input0":{
56
+ "dtype":"float16,float32,int32",
57
+ "format":"ND,ND,ND",
58
+ "name":"q",
59
+ "paramType":"required"
60
+ },
61
+ "input1":{
62
+ "dtype":"float16,float32,int32",
63
+ "format":"ND,ND,ND",
64
+ "name":"k",
65
+ "paramType":"required"
66
+ },
67
+ "input2":{
68
+ "dtype":"float16,float32,int32",
69
+ "format":"ND,ND,ND",
70
+ "name":"v",
71
+ "paramType":"required"
72
+ },
73
+ "input3":{
74
+ "dtype":"float16,float32,int32",
75
+ "format":"ND,ND,ND",
76
+ "name":"attention_mask",
77
+ "paramType":"required"
78
+ },
79
+ "opFile":{
80
+ "value":"flash_attention"
81
+ },
82
+ "opInterface":{
83
+ "value":"flash_attention"
84
+ },
85
+ "output0":{
86
+ "dtype":"float16,float32,int32",
87
+ "format":"ND,ND,ND",
88
+ "name":"y",
89
+ "paramType":"required"
90
+ }
91
+ },
92
+ "KVCacheMgr":{
93
+ "input0":{
94
+ "dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
95
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
96
+ "name":"past",
97
+ "paramType":"required"
98
+ },
99
+ "input1":{
100
+ "dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
101
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
102
+ "name":"cur",
103
+ "paramType":"required"
104
+ },
105
+ "input2":{
106
+ "dtype":"int32,int32,int32,int32,int32,int32,int32,int32",
107
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
108
+ "name":"index",
109
+ "paramType":"required"
110
+ },
111
+ "needCheckSupport":{
112
+ "flag":"true"
113
+ },
114
+ "opFile":{
115
+ "value":"kv_cache_mgr"
116
+ },
117
+ "opInterface":{
118
+ "value":"kv_cache_mgr"
119
+ },
120
+ "output0":{
121
+ "dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
122
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
123
+ "name":"past",
124
+ "paramType":"required"
125
+ }
126
+ },
127
+ "MatmulTik":{
128
+ "input0":{
129
+ "dtype":"int8,uint8,float16",
130
+ "format":"ND,ND,ND",
131
+ "name":"x1",
132
+ "needCompile":"false",
133
+ "paramType":"required",
134
+ "shape":"all"
135
+ },
136
+ "input1":{
137
+ "dtype":"int8,int8,float16",
138
+ "format":"ND,ND,ND",
139
+ "name":"x2",
140
+ "needCompile":"false",
141
+ "paramType":"required",
142
+ "shape":"all"
143
+ },
144
+ "opFile":{
145
+ "value":"matmul_tik"
146
+ },
147
+ "opInterface":{
148
+ "value":"matmul_tik"
149
+ },
150
+ "output0":{
151
+ "dtype":"int32,int32,float",
152
+ "format":"ND,ND,ND",
153
+ "name":"y",
154
+ "paramType":"required",
155
+ "shape":"all"
156
+ }
157
+ }
158
+ }
@@ -0,0 +1,37 @@
1
+ {
2
+ "KVCacheMgr":{
3
+ "input0":{
4
+ "dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
5
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
6
+ "name":"past",
7
+ "paramType":"required"
8
+ },
9
+ "input1":{
10
+ "dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
11
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
12
+ "name":"cur",
13
+ "paramType":"required"
14
+ },
15
+ "input2":{
16
+ "dtype":"int32,int32,int32,int32,int32,int32,int32,int32",
17
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
18
+ "name":"index",
19
+ "paramType":"required"
20
+ },
21
+ "needCheckSupport":{
22
+ "flag":"true"
23
+ },
24
+ "opFile":{
25
+ "value":"kv_cache_mgr"
26
+ },
27
+ "opInterface":{
28
+ "value":"kv_cache_mgr"
29
+ },
30
+ "output0":{
31
+ "dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
32
+ "format":"ND,ND,ND,ND,ND,ND,ND,ND",
33
+ "name":"past",
34
+ "paramType":"required"
35
+ }
36
+ }
37
+ }
@@ -0,0 +1,46 @@
1
+ # Copyright 2022 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """ascend custom op: add by dsl"""
16
+ import tbe.dsl as tbe
17
+ from tbe import tvm
18
+ from tbe.common.register import register_op_compute
19
+ from tbe.common.utils import para_check
20
+
21
+
22
+ @register_op_compute("add_dsl")
23
+ def add_dsl_compute(x1, x2, y, kernel_name="add_dsl"):
24
+ res = tbe.vadd(x1, x2)
25
+ return res
26
+
27
+
28
+ @para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
29
+ para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
30
+ def add_dsl(x1, x2, y, kernel_name="add_dsl"):
31
+ """add dsl impl function"""
32
+ data_x1 = tvm.placeholder(
33
+ x1.get("shape"), dtype=x1.get("dtype"), name="data_x1")
34
+ data_x2 = tvm.placeholder(
35
+ x2.get("shape"), dtype=x2.get("dtype"), name="data_x2")
36
+
37
+ res = add_dsl_compute(data_x1, data_x2, y, kernel_name)
38
+
39
+ # auto schedule
40
+ with tvm.target.cce():
41
+ schedule = tbe.auto_schedule(res)
42
+
43
+ # operator build
44
+ config = {"name": kernel_name,
45
+ "tensor_list": [data_x1, data_x2, res]}
46
+ tbe.build(schedule, config)
@@ -0,0 +1,51 @@
1
+ # Copyright 2022 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """ascend custom op: add by tik"""
16
+ from tbe.common.register import register_op_compute
17
+ from tbe.common.utils import para_check
18
+ from tbe import tik
19
+
20
+
21
+ @register_op_compute("AddTik")
22
+ @para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
23
+ para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
24
+ def add_tik(x1, x2, y, kernel_name="add_tik"):
25
+ """add dsl impl function"""
26
+ tik_instance = tik.Tik()
27
+ x1_shape = x1.get("shape")
28
+ x2_shape = x2.get("shape")
29
+ y_shape = y.get("shape")
30
+
31
+ data_a = tik_instance.Tensor(
32
+ "float16", x1_shape, name="x1", scope=tik.scope_gm)
33
+ data_b = tik_instance.Tensor(
34
+ "float16", x2_shape, name="x2", scope=tik.scope_gm)
35
+ data_c = tik_instance.Tensor(
36
+ "float16", y_shape, name="y", scope=tik.scope_gm)
37
+ data_a_ub = tik_instance.Tensor(
38
+ "float16", x1_shape, name="data_A_ub", scope=tik.scope_ubuf)
39
+ data_b_ub = tik_instance.Tensor(
40
+ "float16", x2_shape, name="data_B_ub", scope=tik.scope_ubuf)
41
+ data_c_ub = tik_instance.Tensor(
42
+ "float16", y_shape, name="data_C_ub", scope=tik.scope_ubuf)
43
+
44
+ tik_instance.data_move(data_a_ub, data_a, 0, 1, 128 // 16, 0, 0)
45
+ tik_instance.data_move(data_b_ub, data_b, 0, 1, 128 // 16, 0, 0)
46
+ tik_instance.vec_add(
47
+ 128, data_c_ub[0], data_a_ub[0], data_b_ub[0], 1, 8, 8, 8)
48
+ tik_instance.data_move(data_c, data_c_ub, 0, 1, 128 // 16, 0, 0)
49
+ tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[data_a, data_b], outputs=[data_c])
50
+
51
+ return tik_instance