mindspore 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.10__cp38-cp38-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/_akg/akg/composite/build_module.py +9 -15
- mindspore/_akg/akg/utils/ascend_profilier/__init__.py +0 -0
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/kernel_exec.py +41 -15
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
- mindspore/_akg/akg/utils/util.py +38 -0
- mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +3 -3
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/splitter.py +3 -2
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
- mindspore/_extends/parse/standard_method.py +2 -9
- mindspore/_extends/remote/kernel_build_server.py +2 -1
- mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/common/api.py +1 -1
- mindspore/common/auto_dynamic_shape.py +81 -85
- mindspore/common/dump.py +1 -1
- mindspore/common/tensor.py +3 -20
- mindspore/config/op_info.config +1 -1
- mindspore/context.py +11 -4
- mindspore/dataset/engine/datasets_standard_format.py +5 -0
- mindspore/dataset/vision/transforms.py +21 -21
- mindspore/experimental/optim/adam.py +1 -1
- mindspore/gen_ops.py +1 -1
- mindspore/include/api/model.h +17 -0
- mindspore/include/api/status.h +8 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8 -80
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/nn/cell.py +0 -3
- mindspore/nn/layer/activation.py +4 -5
- mindspore/nn/layer/conv.py +39 -23
- mindspore/nn/layer/flash_attention.py +90 -78
- mindspore/nn/layer/math.py +3 -7
- mindspore/nn/layer/rnn_cells.py +5 -5
- mindspore/nn/wrap/cell_wrapper.py +6 -0
- mindspore/numpy/utils_const.py +5 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
- mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_utils/utils.py +2 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
- mindspore/ops/function/array_func.py +10 -7
- mindspore/ops/function/grad/grad_func.py +0 -1
- mindspore/ops/function/nn_func.py +98 -9
- mindspore/ops/function/random_func.py +2 -1
- mindspore/ops/op_info_register.py +24 -21
- mindspore/ops/operations/__init__.py +3 -2
- mindspore/ops/operations/_grad_ops.py +24 -4
- mindspore/ops/operations/_inner_ops.py +155 -23
- mindspore/ops/operations/array_ops.py +9 -7
- mindspore/ops/operations/comm_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +85 -68
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +4 -3
- mindspore/ops/operations/nn_ops.py +109 -28
- mindspore/parallel/_parallel_serialization.py +10 -3
- mindspore/parallel/_tensor.py +4 -1
- mindspore/parallel/checkpoint_transform.py +13 -2
- mindspore/parallel/shard.py +17 -10
- mindspore/profiler/common/util.py +1 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
- mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
- mindspore/profiler/parser/ascend_op_generator.py +1 -1
- mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
- mindspore/profiler/parser/base_timeline_generator.py +1 -1
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
- mindspore/profiler/parser/framework_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +19 -0
- mindspore/profiler/profiling.py +46 -24
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/parsers/for_parser.py +1 -1
- mindspore/rewrite/symbol_tree.py +1 -4
- mindspore/run_check/_check_version.py +5 -3
- mindspore/safeguard/rewrite_obfuscation.py +52 -28
- mindspore/train/callback/_summary_collector.py +1 -1
- mindspore/train/dataset_helper.py +1 -0
- mindspore/train/model.py +2 -2
- mindspore/train/serialization.py +97 -11
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +23 -7
- mindspore/version.py +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +149 -129
- {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
mindspore/include/api/model.h
CHANGED
|
@@ -136,6 +136,13 @@ class MS_API Model {
|
|
|
136
136
|
/// \return Status.
|
|
137
137
|
Status UpdateWeights(const std::vector<MSTensor> &new_weights);
|
|
138
138
|
|
|
139
|
+
/// \brief Change the size and or content of weight tensors
|
|
140
|
+
///
|
|
141
|
+
/// \param[in] A vector where model constant are arranged in sequence
|
|
142
|
+
///
|
|
143
|
+
/// \return Status.
|
|
144
|
+
Status UpdateWeights(const std::vector<std::vector<MSTensor>> &new_weights);
|
|
145
|
+
|
|
139
146
|
/// \brief Inference model API. If use this API in train mode, it's equal to RunStep API.
|
|
140
147
|
///
|
|
141
148
|
/// \param[in] inputs A vector where model inputs are arranged in sequence.
|
|
@@ -358,6 +365,13 @@ class MS_API Model {
|
|
|
358
365
|
|
|
359
366
|
const std::shared_ptr<ModelImpl> impl() const { return impl_; }
|
|
360
367
|
|
|
368
|
+
/// \brief Get model info by key
|
|
369
|
+
///
|
|
370
|
+
/// \param[in] key The key of model info key-value pair
|
|
371
|
+
///
|
|
372
|
+
/// \return The value of the model info associated with the given key.
|
|
373
|
+
inline std::string GetModelInfo(const std::string &key);
|
|
374
|
+
|
|
361
375
|
private:
|
|
362
376
|
friend class Serialization;
|
|
363
377
|
// api without std::string
|
|
@@ -374,6 +388,7 @@ class MS_API Model {
|
|
|
374
388
|
const std::vector<char> &cropto_lib_path);
|
|
375
389
|
Status Build(const std::vector<char> &model_path, ModelType model_type, const std::shared_ptr<Context> &model_context,
|
|
376
390
|
const Key &dec_key, const std::vector<char> &dec_mode, const std::vector<char> &cropto_lib_path);
|
|
391
|
+
std::vector<char> GetModelInfo(const std::vector<char> &key);
|
|
377
392
|
std::shared_ptr<ModelImpl> impl_;
|
|
378
393
|
};
|
|
379
394
|
|
|
@@ -416,5 +431,7 @@ Status Model::Build(const std::string &model_path, ModelType model_type,
|
|
|
416
431
|
const std::shared_ptr<Context> &model_context) {
|
|
417
432
|
return Build(StringToChar(model_path), model_type, model_context);
|
|
418
433
|
}
|
|
434
|
+
|
|
435
|
+
inline std::string Model::GetModelInfo(const std::string &key) { return CharToString(GetModelInfo(StringToChar(key))); }
|
|
419
436
|
} // namespace mindspore
|
|
420
437
|
#endif // MINDSPORE_INCLUDE_API_MODEL_H
|
mindspore/include/api/status.h
CHANGED
|
@@ -83,9 +83,14 @@ enum StatusCode : uint32_t {
|
|
|
83
83
|
kLiteModelRebuild = kLite | (0x0FFFFFFF & -12), /**< Model has been built. */
|
|
84
84
|
|
|
85
85
|
// Executor error code, range: [-100,-200)
|
|
86
|
-
kLiteOutOfTensorRange = kLite | (0x0FFFFFFF & -100),
|
|
87
|
-
kLiteInputTensorError = kLite | (0x0FFFFFFF & -101),
|
|
88
|
-
kLiteReentrantError = kLite | (0x0FFFFFFF & -102),
|
|
86
|
+
kLiteOutOfTensorRange = kLite | (0x0FFFFFFF & -100), /**< Failed to check range. */
|
|
87
|
+
kLiteInputTensorError = kLite | (0x0FFFFFFF & -101), /**< Failed to check input tensor. */
|
|
88
|
+
kLiteReentrantError = kLite | (0x0FFFFFFF & -102), /**< Exist executor running. */
|
|
89
|
+
kLiteLLMWaitProcessTimeOut = kLite | (0x0FFFFFFF & -103), /**< Wait to be processed time out. */
|
|
90
|
+
kLiteLLMKVCacheNotExist = kLite | (0x0FFFFFFF & -104), /**< KV Cache not exist. */
|
|
91
|
+
kLiteLLMRepeatRequest = kLite | (0x0FFFFFFF & -105), /**< repeat request. */
|
|
92
|
+
kLiteLLMRequestAlreadyCompleted = kLite | (0x0FFFFFFF & -106), /**< request already complete!. */
|
|
93
|
+
kLiteLLMEngineFinalized = kLite | (0x0FFFFFFF & -107), /**< llm engine finalized. */
|
|
89
94
|
|
|
90
95
|
// Graph error code, range: [-200,-300)
|
|
91
96
|
kLiteGraphFileError = kLite | (0x0FFFFFFF & -200), /**< Failed to verify graph file. */
|
mindspore/lib/libdnnl.so.2
CHANGED
|
Binary file
|
mindspore/lib/libmindspore.so
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
mindspore/lib/libnnacl.so
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
{
|
|
2
|
+
"AddDSL":{
|
|
3
|
+
"input0":{
|
|
4
|
+
"dtype":"float16",
|
|
5
|
+
"format":"NCHW",
|
|
6
|
+
"name":"x1",
|
|
7
|
+
"paramType":"required"
|
|
8
|
+
},
|
|
9
|
+
"input1":{
|
|
10
|
+
"dtype":"float16",
|
|
11
|
+
"format":"NCHW",
|
|
12
|
+
"name":"x2",
|
|
13
|
+
"paramType":"required"
|
|
14
|
+
},
|
|
15
|
+
"opFile":{
|
|
16
|
+
"value":"add_dsl"
|
|
17
|
+
},
|
|
18
|
+
"opInterface":{
|
|
19
|
+
"value":"add_dsl"
|
|
20
|
+
},
|
|
21
|
+
"output0":{
|
|
22
|
+
"dtype":"float16",
|
|
23
|
+
"format":"NCHW",
|
|
24
|
+
"name":"y",
|
|
25
|
+
"paramType":"required"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"AddTik":{
|
|
29
|
+
"input0":{
|
|
30
|
+
"dtype":"float16",
|
|
31
|
+
"format":"ND",
|
|
32
|
+
"name":"x1",
|
|
33
|
+
"paramType":"required"
|
|
34
|
+
},
|
|
35
|
+
"input1":{
|
|
36
|
+
"dtype":"float16",
|
|
37
|
+
"format":"ND",
|
|
38
|
+
"name":"x2",
|
|
39
|
+
"paramType":"required"
|
|
40
|
+
},
|
|
41
|
+
"opFile":{
|
|
42
|
+
"value":"add_tik"
|
|
43
|
+
},
|
|
44
|
+
"opInterface":{
|
|
45
|
+
"value":"add_tik"
|
|
46
|
+
},
|
|
47
|
+
"output0":{
|
|
48
|
+
"dtype":"float16",
|
|
49
|
+
"format":"ND",
|
|
50
|
+
"name":"y",
|
|
51
|
+
"paramType":"required"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"FlashAttention":{
|
|
55
|
+
"input0":{
|
|
56
|
+
"dtype":"float16,float32,int32",
|
|
57
|
+
"format":"ND,ND,ND",
|
|
58
|
+
"name":"q",
|
|
59
|
+
"paramType":"required"
|
|
60
|
+
},
|
|
61
|
+
"input1":{
|
|
62
|
+
"dtype":"float16,float32,int32",
|
|
63
|
+
"format":"ND,ND,ND",
|
|
64
|
+
"name":"k",
|
|
65
|
+
"paramType":"required"
|
|
66
|
+
},
|
|
67
|
+
"input2":{
|
|
68
|
+
"dtype":"float16,float32,int32",
|
|
69
|
+
"format":"ND,ND,ND",
|
|
70
|
+
"name":"v",
|
|
71
|
+
"paramType":"required"
|
|
72
|
+
},
|
|
73
|
+
"input3":{
|
|
74
|
+
"dtype":"float16,float32,int32",
|
|
75
|
+
"format":"ND,ND,ND",
|
|
76
|
+
"name":"attention_mask",
|
|
77
|
+
"paramType":"required"
|
|
78
|
+
},
|
|
79
|
+
"opFile":{
|
|
80
|
+
"value":"flash_attention"
|
|
81
|
+
},
|
|
82
|
+
"opInterface":{
|
|
83
|
+
"value":"flash_attention"
|
|
84
|
+
},
|
|
85
|
+
"output0":{
|
|
86
|
+
"dtype":"float16,float32,int32",
|
|
87
|
+
"format":"ND,ND,ND",
|
|
88
|
+
"name":"y",
|
|
89
|
+
"paramType":"required"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"MatmulTik":{
|
|
93
|
+
"input0":{
|
|
94
|
+
"dtype":"int8,uint8,float16",
|
|
95
|
+
"format":"ND,ND,ND",
|
|
96
|
+
"name":"x1",
|
|
97
|
+
"needCompile":"false",
|
|
98
|
+
"paramType":"required",
|
|
99
|
+
"shape":"all"
|
|
100
|
+
},
|
|
101
|
+
"input1":{
|
|
102
|
+
"dtype":"int8,int8,float16",
|
|
103
|
+
"format":"ND,ND,ND",
|
|
104
|
+
"name":"x2",
|
|
105
|
+
"needCompile":"false",
|
|
106
|
+
"paramType":"required",
|
|
107
|
+
"shape":"all"
|
|
108
|
+
},
|
|
109
|
+
"opFile":{
|
|
110
|
+
"value":"matmul_tik"
|
|
111
|
+
},
|
|
112
|
+
"opInterface":{
|
|
113
|
+
"value":"matmul_tik"
|
|
114
|
+
},
|
|
115
|
+
"output0":{
|
|
116
|
+
"dtype":"int32,int32,float",
|
|
117
|
+
"format":"ND,ND,ND",
|
|
118
|
+
"name":"y",
|
|
119
|
+
"paramType":"required",
|
|
120
|
+
"shape":"all"
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
{
|
|
2
|
+
"AddDSL":{
|
|
3
|
+
"input0":{
|
|
4
|
+
"dtype":"float16",
|
|
5
|
+
"format":"NCHW",
|
|
6
|
+
"name":"x1",
|
|
7
|
+
"paramType":"required"
|
|
8
|
+
},
|
|
9
|
+
"input1":{
|
|
10
|
+
"dtype":"float16",
|
|
11
|
+
"format":"NCHW",
|
|
12
|
+
"name":"x2",
|
|
13
|
+
"paramType":"required"
|
|
14
|
+
},
|
|
15
|
+
"opFile":{
|
|
16
|
+
"value":"add_dsl"
|
|
17
|
+
},
|
|
18
|
+
"opInterface":{
|
|
19
|
+
"value":"add_dsl"
|
|
20
|
+
},
|
|
21
|
+
"output0":{
|
|
22
|
+
"dtype":"float16",
|
|
23
|
+
"format":"NCHW",
|
|
24
|
+
"name":"y",
|
|
25
|
+
"paramType":"required"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"AddTik":{
|
|
29
|
+
"input0":{
|
|
30
|
+
"dtype":"float16",
|
|
31
|
+
"format":"ND",
|
|
32
|
+
"name":"x1",
|
|
33
|
+
"paramType":"required"
|
|
34
|
+
},
|
|
35
|
+
"input1":{
|
|
36
|
+
"dtype":"float16",
|
|
37
|
+
"format":"ND",
|
|
38
|
+
"name":"x2",
|
|
39
|
+
"paramType":"required"
|
|
40
|
+
},
|
|
41
|
+
"opFile":{
|
|
42
|
+
"value":"add_tik"
|
|
43
|
+
},
|
|
44
|
+
"opInterface":{
|
|
45
|
+
"value":"add_tik"
|
|
46
|
+
},
|
|
47
|
+
"output0":{
|
|
48
|
+
"dtype":"float16",
|
|
49
|
+
"format":"ND",
|
|
50
|
+
"name":"y",
|
|
51
|
+
"paramType":"required"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"FlashAttention":{
|
|
55
|
+
"input0":{
|
|
56
|
+
"dtype":"float16,float32,int32",
|
|
57
|
+
"format":"ND,ND,ND",
|
|
58
|
+
"name":"q",
|
|
59
|
+
"paramType":"required"
|
|
60
|
+
},
|
|
61
|
+
"input1":{
|
|
62
|
+
"dtype":"float16,float32,int32",
|
|
63
|
+
"format":"ND,ND,ND",
|
|
64
|
+
"name":"k",
|
|
65
|
+
"paramType":"required"
|
|
66
|
+
},
|
|
67
|
+
"input2":{
|
|
68
|
+
"dtype":"float16,float32,int32",
|
|
69
|
+
"format":"ND,ND,ND",
|
|
70
|
+
"name":"v",
|
|
71
|
+
"paramType":"required"
|
|
72
|
+
},
|
|
73
|
+
"input3":{
|
|
74
|
+
"dtype":"float16,float32,int32",
|
|
75
|
+
"format":"ND,ND,ND",
|
|
76
|
+
"name":"attention_mask",
|
|
77
|
+
"paramType":"required"
|
|
78
|
+
},
|
|
79
|
+
"opFile":{
|
|
80
|
+
"value":"flash_attention"
|
|
81
|
+
},
|
|
82
|
+
"opInterface":{
|
|
83
|
+
"value":"flash_attention"
|
|
84
|
+
},
|
|
85
|
+
"output0":{
|
|
86
|
+
"dtype":"float16,float32,int32",
|
|
87
|
+
"format":"ND,ND,ND",
|
|
88
|
+
"name":"y",
|
|
89
|
+
"paramType":"required"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"MatmulTik":{
|
|
93
|
+
"input0":{
|
|
94
|
+
"dtype":"int8,uint8,float16",
|
|
95
|
+
"format":"ND,ND,ND",
|
|
96
|
+
"name":"x1",
|
|
97
|
+
"needCompile":"false",
|
|
98
|
+
"paramType":"required",
|
|
99
|
+
"shape":"all"
|
|
100
|
+
},
|
|
101
|
+
"input1":{
|
|
102
|
+
"dtype":"int8,int8,float16",
|
|
103
|
+
"format":"ND,ND,ND",
|
|
104
|
+
"name":"x2",
|
|
105
|
+
"needCompile":"false",
|
|
106
|
+
"paramType":"required",
|
|
107
|
+
"shape":"all"
|
|
108
|
+
},
|
|
109
|
+
"opFile":{
|
|
110
|
+
"value":"matmul_tik"
|
|
111
|
+
},
|
|
112
|
+
"opInterface":{
|
|
113
|
+
"value":"matmul_tik"
|
|
114
|
+
},
|
|
115
|
+
"output0":{
|
|
116
|
+
"dtype":"int32,int32,float",
|
|
117
|
+
"format":"ND,ND,ND",
|
|
118
|
+
"name":"y",
|
|
119
|
+
"paramType":"required",
|
|
120
|
+
"shape":"all"
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
{
|
|
2
|
+
"AddDSL":{
|
|
3
|
+
"input0":{
|
|
4
|
+
"dtype":"float16,float32,int32",
|
|
5
|
+
"format":"ND,ND,ND",
|
|
6
|
+
"name":"x1",
|
|
7
|
+
"paramType":"required"
|
|
8
|
+
},
|
|
9
|
+
"input1":{
|
|
10
|
+
"dtype":"float16,float32,int32",
|
|
11
|
+
"format":"ND,ND,ND",
|
|
12
|
+
"name":"x2",
|
|
13
|
+
"paramType":"required"
|
|
14
|
+
},
|
|
15
|
+
"opFile":{
|
|
16
|
+
"value":"add_dsl"
|
|
17
|
+
},
|
|
18
|
+
"opInterface":{
|
|
19
|
+
"value":"add_dsl"
|
|
20
|
+
},
|
|
21
|
+
"output0":{
|
|
22
|
+
"dtype":"float16,float32,int32",
|
|
23
|
+
"format":"ND,ND,ND",
|
|
24
|
+
"name":"y",
|
|
25
|
+
"paramType":"required"
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"AddTik":{
|
|
29
|
+
"input0":{
|
|
30
|
+
"dtype":"float16,float32,int32",
|
|
31
|
+
"format":"ND,ND,ND",
|
|
32
|
+
"name":"x1",
|
|
33
|
+
"paramType":"required"
|
|
34
|
+
},
|
|
35
|
+
"input1":{
|
|
36
|
+
"dtype":"float16,float32,int32",
|
|
37
|
+
"format":"ND,ND,ND",
|
|
38
|
+
"name":"x2",
|
|
39
|
+
"paramType":"required"
|
|
40
|
+
},
|
|
41
|
+
"opFile":{
|
|
42
|
+
"value":"add_tik"
|
|
43
|
+
},
|
|
44
|
+
"opInterface":{
|
|
45
|
+
"value":"add_tik"
|
|
46
|
+
},
|
|
47
|
+
"output0":{
|
|
48
|
+
"dtype":"float16,float32,int32",
|
|
49
|
+
"format":"ND,ND,ND",
|
|
50
|
+
"name":"y",
|
|
51
|
+
"paramType":"required"
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"FlashAttention":{
|
|
55
|
+
"input0":{
|
|
56
|
+
"dtype":"float16,float32,int32",
|
|
57
|
+
"format":"ND,ND,ND",
|
|
58
|
+
"name":"q",
|
|
59
|
+
"paramType":"required"
|
|
60
|
+
},
|
|
61
|
+
"input1":{
|
|
62
|
+
"dtype":"float16,float32,int32",
|
|
63
|
+
"format":"ND,ND,ND",
|
|
64
|
+
"name":"k",
|
|
65
|
+
"paramType":"required"
|
|
66
|
+
},
|
|
67
|
+
"input2":{
|
|
68
|
+
"dtype":"float16,float32,int32",
|
|
69
|
+
"format":"ND,ND,ND",
|
|
70
|
+
"name":"v",
|
|
71
|
+
"paramType":"required"
|
|
72
|
+
},
|
|
73
|
+
"input3":{
|
|
74
|
+
"dtype":"float16,float32,int32",
|
|
75
|
+
"format":"ND,ND,ND",
|
|
76
|
+
"name":"attention_mask",
|
|
77
|
+
"paramType":"required"
|
|
78
|
+
},
|
|
79
|
+
"opFile":{
|
|
80
|
+
"value":"flash_attention"
|
|
81
|
+
},
|
|
82
|
+
"opInterface":{
|
|
83
|
+
"value":"flash_attention"
|
|
84
|
+
},
|
|
85
|
+
"output0":{
|
|
86
|
+
"dtype":"float16,float32,int32",
|
|
87
|
+
"format":"ND,ND,ND",
|
|
88
|
+
"name":"y",
|
|
89
|
+
"paramType":"required"
|
|
90
|
+
}
|
|
91
|
+
},
|
|
92
|
+
"KVCacheMgr":{
|
|
93
|
+
"input0":{
|
|
94
|
+
"dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
|
|
95
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
96
|
+
"name":"past",
|
|
97
|
+
"paramType":"required"
|
|
98
|
+
},
|
|
99
|
+
"input1":{
|
|
100
|
+
"dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
|
|
101
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
102
|
+
"name":"cur",
|
|
103
|
+
"paramType":"required"
|
|
104
|
+
},
|
|
105
|
+
"input2":{
|
|
106
|
+
"dtype":"int32,int32,int32,int32,int32,int32,int32,int32",
|
|
107
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
108
|
+
"name":"index",
|
|
109
|
+
"paramType":"required"
|
|
110
|
+
},
|
|
111
|
+
"needCheckSupport":{
|
|
112
|
+
"flag":"true"
|
|
113
|
+
},
|
|
114
|
+
"opFile":{
|
|
115
|
+
"value":"kv_cache_mgr"
|
|
116
|
+
},
|
|
117
|
+
"opInterface":{
|
|
118
|
+
"value":"kv_cache_mgr"
|
|
119
|
+
},
|
|
120
|
+
"output0":{
|
|
121
|
+
"dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
|
|
122
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
123
|
+
"name":"past",
|
|
124
|
+
"paramType":"required"
|
|
125
|
+
}
|
|
126
|
+
},
|
|
127
|
+
"MatmulTik":{
|
|
128
|
+
"input0":{
|
|
129
|
+
"dtype":"int8,uint8,float16",
|
|
130
|
+
"format":"ND,ND,ND",
|
|
131
|
+
"name":"x1",
|
|
132
|
+
"needCompile":"false",
|
|
133
|
+
"paramType":"required",
|
|
134
|
+
"shape":"all"
|
|
135
|
+
},
|
|
136
|
+
"input1":{
|
|
137
|
+
"dtype":"int8,int8,float16",
|
|
138
|
+
"format":"ND,ND,ND",
|
|
139
|
+
"name":"x2",
|
|
140
|
+
"needCompile":"false",
|
|
141
|
+
"paramType":"required",
|
|
142
|
+
"shape":"all"
|
|
143
|
+
},
|
|
144
|
+
"opFile":{
|
|
145
|
+
"value":"matmul_tik"
|
|
146
|
+
},
|
|
147
|
+
"opInterface":{
|
|
148
|
+
"value":"matmul_tik"
|
|
149
|
+
},
|
|
150
|
+
"output0":{
|
|
151
|
+
"dtype":"int32,int32,float",
|
|
152
|
+
"format":"ND,ND,ND",
|
|
153
|
+
"name":"y",
|
|
154
|
+
"paramType":"required",
|
|
155
|
+
"shape":"all"
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
{
|
|
2
|
+
"KVCacheMgr":{
|
|
3
|
+
"input0":{
|
|
4
|
+
"dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
|
|
5
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
6
|
+
"name":"past",
|
|
7
|
+
"paramType":"required"
|
|
8
|
+
},
|
|
9
|
+
"input1":{
|
|
10
|
+
"dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
|
|
11
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
12
|
+
"name":"cur",
|
|
13
|
+
"paramType":"required"
|
|
14
|
+
},
|
|
15
|
+
"input2":{
|
|
16
|
+
"dtype":"int32,int32,int32,int32,int32,int32,int32,int32",
|
|
17
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
18
|
+
"name":"index",
|
|
19
|
+
"paramType":"required"
|
|
20
|
+
},
|
|
21
|
+
"needCheckSupport":{
|
|
22
|
+
"flag":"true"
|
|
23
|
+
},
|
|
24
|
+
"opFile":{
|
|
25
|
+
"value":"kv_cache_mgr"
|
|
26
|
+
},
|
|
27
|
+
"opInterface":{
|
|
28
|
+
"value":"kv_cache_mgr"
|
|
29
|
+
},
|
|
30
|
+
"output0":{
|
|
31
|
+
"dtype":"float32, int32, uint32, float16, int16, uint16, int8, uint8",
|
|
32
|
+
"format":"ND,ND,ND,ND,ND,ND,ND,ND",
|
|
33
|
+
"name":"past",
|
|
34
|
+
"paramType":"required"
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""ascend custom op: add by dsl"""
|
|
16
|
+
import tbe.dsl as tbe
|
|
17
|
+
from tbe import tvm
|
|
18
|
+
from tbe.common.register import register_op_compute
|
|
19
|
+
from tbe.common.utils import para_check
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@register_op_compute("add_dsl")
|
|
23
|
+
def add_dsl_compute(x1, x2, y, kernel_name="add_dsl"):
|
|
24
|
+
res = tbe.vadd(x1, x2)
|
|
25
|
+
return res
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
|
|
29
|
+
para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
|
|
30
|
+
def add_dsl(x1, x2, y, kernel_name="add_dsl"):
|
|
31
|
+
"""add dsl impl function"""
|
|
32
|
+
data_x1 = tvm.placeholder(
|
|
33
|
+
x1.get("shape"), dtype=x1.get("dtype"), name="data_x1")
|
|
34
|
+
data_x2 = tvm.placeholder(
|
|
35
|
+
x2.get("shape"), dtype=x2.get("dtype"), name="data_x2")
|
|
36
|
+
|
|
37
|
+
res = add_dsl_compute(data_x1, data_x2, y, kernel_name)
|
|
38
|
+
|
|
39
|
+
# auto schedule
|
|
40
|
+
with tvm.target.cce():
|
|
41
|
+
schedule = tbe.auto_schedule(res)
|
|
42
|
+
|
|
43
|
+
# operator build
|
|
44
|
+
config = {"name": kernel_name,
|
|
45
|
+
"tensor_list": [data_x1, data_x2, res]}
|
|
46
|
+
tbe.build(schedule, config)
|
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""ascend custom op: add by tik"""
|
|
16
|
+
from tbe.common.register import register_op_compute
|
|
17
|
+
from tbe.common.utils import para_check
|
|
18
|
+
from tbe import tik
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@register_op_compute("AddTik")
|
|
22
|
+
@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
|
|
23
|
+
para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
|
|
24
|
+
def add_tik(x1, x2, y, kernel_name="add_tik"):
|
|
25
|
+
"""add dsl impl function"""
|
|
26
|
+
tik_instance = tik.Tik()
|
|
27
|
+
x1_shape = x1.get("shape")
|
|
28
|
+
x2_shape = x2.get("shape")
|
|
29
|
+
y_shape = y.get("shape")
|
|
30
|
+
|
|
31
|
+
data_a = tik_instance.Tensor(
|
|
32
|
+
"float16", x1_shape, name="x1", scope=tik.scope_gm)
|
|
33
|
+
data_b = tik_instance.Tensor(
|
|
34
|
+
"float16", x2_shape, name="x2", scope=tik.scope_gm)
|
|
35
|
+
data_c = tik_instance.Tensor(
|
|
36
|
+
"float16", y_shape, name="y", scope=tik.scope_gm)
|
|
37
|
+
data_a_ub = tik_instance.Tensor(
|
|
38
|
+
"float16", x1_shape, name="data_A_ub", scope=tik.scope_ubuf)
|
|
39
|
+
data_b_ub = tik_instance.Tensor(
|
|
40
|
+
"float16", x2_shape, name="data_B_ub", scope=tik.scope_ubuf)
|
|
41
|
+
data_c_ub = tik_instance.Tensor(
|
|
42
|
+
"float16", y_shape, name="data_C_ub", scope=tik.scope_ubuf)
|
|
43
|
+
|
|
44
|
+
tik_instance.data_move(data_a_ub, data_a, 0, 1, 128 // 16, 0, 0)
|
|
45
|
+
tik_instance.data_move(data_b_ub, data_b, 0, 1, 128 // 16, 0, 0)
|
|
46
|
+
tik_instance.vec_add(
|
|
47
|
+
128, data_c_ub[0], data_a_ub[0], data_b_ub[0], 1, 8, 8, 8)
|
|
48
|
+
tik_instance.data_move(data_c, data_c_ub, 0, 1, 128 // 16, 0, 0)
|
|
49
|
+
tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[data_a, data_b], outputs=[data_c])
|
|
50
|
+
|
|
51
|
+
return tik_instance
|