mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (423) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
  3. mindspore/__init__.py +1 -2
  4. mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
  5. mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
  6. mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
  7. mindspore/_checkparam.py +25 -5
  8. mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
  9. mindspore/_extends/parse/__init__.py +2 -2
  10. mindspore/_extends/parse/compile_config.py +0 -29
  11. mindspore/_extends/parse/namespace.py +2 -2
  12. mindspore/_extends/parse/parser.py +5 -21
  13. mindspore/_extends/parse/resources.py +7 -5
  14. mindspore/_extends/parse/standard_method.py +59 -40
  15. mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
  16. mindspore/amp.py +5 -26
  17. mindspore/bin/cache_admin +0 -0
  18. mindspore/bin/cache_server +0 -0
  19. mindspore/boost/adasum.py +1 -1
  20. mindspore/boost/base.py +1 -1
  21. mindspore/boost/boost_cell_wrapper.py +1 -1
  22. mindspore/boost/grad_freeze.py +2 -2
  23. mindspore/boost/less_batch_normalization.py +6 -9
  24. mindspore/common/__init__.py +1 -8
  25. mindspore/common/_register_for_tensor.py +9 -8
  26. mindspore/common/api.py +65 -275
  27. mindspore/common/dtype.py +4 -8
  28. mindspore/common/dump.py +5 -2
  29. mindspore/common/jit_config.py +1 -1
  30. mindspore/common/lazy_inline.py +2 -14
  31. mindspore/common/parameter.py +15 -14
  32. mindspore/common/recompute.py +5 -20
  33. mindspore/common/sparse_tensor.py +6 -21
  34. mindspore/common/tensor.py +52 -100
  35. mindspore/communication/__init__.py +11 -6
  36. mindspore/communication/management.py +94 -92
  37. mindspore/context.py +18 -180
  38. mindspore/dataset/engine/datasets.py +46 -69
  39. mindspore/dataset/engine/datasets_user_defined.py +53 -72
  40. mindspore/dataset/engine/datasets_vision.py +2 -2
  41. mindspore/dataset/engine/queue.py +38 -56
  42. mindspore/dataset/engine/validators.py +5 -11
  43. mindspore/dataset/vision/__init__.py +5 -5
  44. mindspore/dataset/vision/c_transforms.py +5 -5
  45. mindspore/dataset/vision/py_transforms_util.py +1 -1
  46. mindspore/dataset/vision/transforms.py +46 -591
  47. mindspore/dataset/vision/utils.py +1 -121
  48. mindspore/dataset/vision/validators.py +3 -9
  49. mindspore/hal/__init__.py +1 -7
  50. mindspore/hal/device.py +1 -1
  51. mindspore/include/api/model.h +0 -3
  52. mindspore/include/dataset/vision.h +2 -54
  53. mindspore/include/mindapi/base/types.h +0 -1
  54. mindspore/lib/libdnnl.so.2 +0 -0
  55. mindspore/lib/libmindspore.so +0 -0
  56. mindspore/lib/libmindspore_backend.so +0 -0
  57. mindspore/lib/libmindspore_common.so +0 -0
  58. mindspore/lib/libmindspore_core.so +0 -0
  59. mindspore/lib/libmindspore_glog.so.0 +0 -0
  60. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  61. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  62. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  63. mindspore/lib/libmindspore_shared_lib.so +0 -0
  64. mindspore/lib/libmpi_adapter.so +0 -0
  65. mindspore/lib/libmpi_collective.so +0 -0
  66. mindspore/lib/libnnacl.so +0 -0
  67. mindspore/lib/libopencv_core.so.4.5 +0 -0
  68. mindspore/lib/libps_cache.so +0 -0
  69. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
  70. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
  71. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
  72. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  73. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
  74. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  75. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
  76. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
  77. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
  78. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
  79. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
  80. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
  81. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
  82. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
  83. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
  84. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
  85. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
  86. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  87. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  88. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
  89. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  90. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  91. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  92. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  93. mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
  94. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  95. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
  96. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  97. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  98. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
  99. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  100. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  101. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  102. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  103. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
  104. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
  105. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
  106. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
  107. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
  108. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
  109. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
  110. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
  111. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
  112. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
  113. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
  114. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
  115. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
  116. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
  117. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
  118. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
  119. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
  120. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
  121. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
  122. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
  123. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
  124. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
  125. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
  126. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
  127. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
  128. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
  129. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
  130. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
  131. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
  132. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
  133. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
  134. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
  135. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
  136. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
  137. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
  138. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
  139. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
  140. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
  141. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
  142. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
  143. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
  144. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
  145. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
  146. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  147. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
  148. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
  149. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
  150. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  151. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  152. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  153. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  154. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  155. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  156. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  157. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  158. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
  159. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
  160. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
  161. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
  162. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
  163. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
  164. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  165. mindspore/mindrecord/filewriter.py +2 -2
  166. mindspore/mint/__init__.py +40 -720
  167. mindspore/mint/nn/__init__.py +7 -89
  168. mindspore/mint/nn/functional.py +16 -165
  169. mindspore/mint/optim/adamw.py +16 -15
  170. mindspore/nn/__init__.py +2 -0
  171. mindspore/nn/cell.py +98 -97
  172. mindspore/nn/extend/basic.py +2 -2
  173. mindspore/nn/extend/embedding.py +1 -1
  174. mindspore/nn/extend/layer/normalization.py +5 -7
  175. mindspore/nn/generator.py +297 -0
  176. mindspore/nn/layer/activation.py +3 -4
  177. mindspore/nn/layer/basic.py +16 -79
  178. mindspore/nn/layer/conv.py +8 -17
  179. mindspore/nn/layer/embedding.py +4 -1
  180. mindspore/nn/layer/math.py +1 -1
  181. mindspore/nn/layer/normalization.py +1 -1
  182. mindspore/nn/layer/pooling.py +0 -5
  183. mindspore/nn/layer/rnn_cells.py +2 -2
  184. mindspore/nn/loss/loss.py +19 -19
  185. mindspore/nn/optim/adasum.py +1 -1
  186. mindspore/nn/optim/sgd.py +2 -3
  187. mindspore/nn/probability/distribution/exponential.py +1 -1
  188. mindspore/nn/probability/distribution/geometric.py +1 -1
  189. mindspore/nn/probability/distribution/logistic.py +1 -1
  190. mindspore/nn/wrap/cell_wrapper.py +1 -25
  191. mindspore/nn/wrap/loss_scale.py +1 -24
  192. mindspore/numpy/array_ops.py +1 -5
  193. mindspore/numpy/dtypes.py +3 -3
  194. mindspore/numpy/math_ops.py +8 -8
  195. mindspore/ops/__init__.py +1 -1
  196. mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
  197. mindspore/ops/_vmap/vmap_array_ops.py +0 -27
  198. mindspore/ops/_vmap/vmap_math_ops.py +1 -29
  199. mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
  200. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
  201. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
  202. mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
  203. mindspore/ops/auto_generate/gen_extend_func.py +27 -603
  204. mindspore/ops/auto_generate/gen_ops_def.py +203 -993
  205. mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
  206. mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
  207. mindspore/ops/composite/base.py +6 -3
  208. mindspore/ops/composite/math_ops.py +1 -1
  209. mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
  210. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  211. mindspore/ops/extend/__init__.py +3 -2
  212. mindspore/ops/extend/array_func.py +51 -10
  213. mindspore/ops/extend/nn_func.py +78 -2
  214. mindspore/ops/function/__init__.py +13 -8
  215. mindspore/ops/function/array_func.py +179 -455
  216. mindspore/ops/function/clip_func.py +1 -1
  217. mindspore/ops/function/grad/grad_func.py +3 -3
  218. mindspore/ops/function/math_func.py +103 -117
  219. mindspore/ops/function/nn_func.py +163 -275
  220. mindspore/ops/function/other_func.py +2 -2
  221. mindspore/ops/function/random_func.py +69 -202
  222. mindspore/ops/function/sparse_func.py +4 -4
  223. mindspore/ops/functional.py +327 -332
  224. mindspore/ops/operations/__init__.py +3 -13
  225. mindspore/ops/operations/_grad_ops.py +27 -3
  226. mindspore/ops/operations/_inner_ops.py +356 -53
  227. mindspore/ops/operations/_rl_inner_ops.py +2 -2
  228. mindspore/ops/operations/_tensor_array.py +8 -8
  229. mindspore/ops/operations/array_ops.py +65 -82
  230. mindspore/ops/operations/comm_ops.py +93 -784
  231. mindspore/ops/operations/custom_ops.py +28 -51
  232. mindspore/ops/operations/debug_ops.py +4 -4
  233. mindspore/ops/operations/inner_ops.py +2 -2
  234. mindspore/ops/operations/manually_defined/ops_def.py +4 -304
  235. mindspore/ops/operations/math_ops.py +50 -3
  236. mindspore/ops/operations/nn_ops.py +247 -14
  237. mindspore/ops/operations/other_ops.py +3 -3
  238. mindspore/ops/operations/random_ops.py +1 -1
  239. mindspore/ops/operations/sparse_ops.py +1 -1
  240. mindspore/ops/primitive.py +8 -9
  241. mindspore/ops/silent_check.py +5 -5
  242. mindspore/ops_generate/arg_dtype_cast.py +9 -2
  243. mindspore/ops_generate/arg_handler.py +0 -26
  244. mindspore/ops_generate/gen_aclnn_implement.py +4 -1
  245. mindspore/ops_generate/gen_ops.py +4 -26
  246. mindspore/ops_generate/gen_pyboost_func.py +12 -41
  247. mindspore/ops_generate/gen_utils.py +0 -21
  248. mindspore/ops_generate/pyboost_utils.py +2 -7
  249. mindspore/ops_generate/template.py +0 -1
  250. mindspore/parallel/_auto_parallel_context.py +1 -21
  251. mindspore/parallel/_tensor.py +5 -0
  252. mindspore/parallel/_transformer/transformer.py +1 -1
  253. mindspore/parallel/_utils.py +1 -15
  254. mindspore/parallel/algo_parameter_config.py +3 -1
  255. mindspore/parallel/checkpoint_transform.py +9 -12
  256. mindspore/parallel/cluster/process_entity/_api.py +29 -28
  257. mindspore/parallel/cluster/process_entity/_utils.py +3 -13
  258. mindspore/parallel/cluster/run.py +16 -13
  259. mindspore/parallel/parameter_broadcast.py +2 -2
  260. mindspore/parallel/shard.py +17 -31
  261. mindspore/profiler/__init__.py +2 -3
  262. mindspore/profiler/common/util.py +2 -107
  263. mindspore/profiler/envprofiling.py +1 -1
  264. mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
  265. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
  266. mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
  267. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
  268. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
  269. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
  270. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
  271. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
  272. mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
  273. mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
  274. mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
  275. mindspore/profiler/parser/minddata_parser.py +3 -72
  276. mindspore/profiler/profiling.py +59 -176
  277. mindspore/rewrite/api/node.py +1 -1
  278. mindspore/rewrite/common/namespace.py +5 -5
  279. mindspore/rewrite/parsers/assign_parser.py +0 -2
  280. mindspore/rewrite/parsers/class_def_parser.py +4 -8
  281. mindspore/run_check/_check_version.py +1 -1
  282. mindspore/scipy/fft.py +3 -1
  283. mindspore/scipy/linalg.py +3 -2
  284. mindspore/scipy/ops.py +3 -5
  285. mindspore/scipy/optimize/__init__.py +2 -2
  286. mindspore/train/__init__.py +4 -4
  287. mindspore/train/anf_ir_pb2.py +2 -8
  288. mindspore/train/callback/__init__.py +2 -5
  289. mindspore/train/callback/_backup_and_restore.py +2 -2
  290. mindspore/train/callback/_checkpoint.py +16 -104
  291. mindspore/train/callback/_landscape.py +1 -1
  292. mindspore/train/callback/_time_monitor.py +1 -1
  293. mindspore/train/data_sink.py +4 -5
  294. mindspore/train/dataset_helper.py +20 -45
  295. mindspore/train/model.py +38 -266
  296. mindspore/train/serialization.py +105 -256
  297. mindspore/train/summary/_summary_adapter.py +1 -1
  298. mindspore/version.py +1 -1
  299. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
  300. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
  301. mindspore/_extends/pijit/__init__.py +0 -23
  302. mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
  303. mindspore/common/file_system.py +0 -48
  304. mindspore/common/generator.py +0 -260
  305. mindspore/common/no_inline.py +0 -54
  306. mindspore/common/np_dtype.py +0 -25
  307. mindspore/communication/comm_func.py +0 -1140
  308. mindspore/hal/memory.py +0 -326
  309. mindspore/lib/libavcodec.so.59 +0 -0
  310. mindspore/lib/libavdevice.so.59 +0 -0
  311. mindspore/lib/libavfilter.so.8 +0 -0
  312. mindspore/lib/libavformat.so.59 +0 -0
  313. mindspore/lib/libavutil.so.57 +0 -0
  314. mindspore/lib/libmindspore_np_dtype.so +0 -0
  315. mindspore/lib/libswresample.so.4 +0 -0
  316. mindspore/lib/libswscale.so.6 +0 -0
  317. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
  318. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
  319. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  320. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
  321. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  322. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
  323. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  324. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
  325. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  326. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  327. mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
  328. mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
  329. mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
  330. mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
  331. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
  332. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
  333. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
  334. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
  335. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
  336. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
  337. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
  338. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
  339. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
  340. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
  341. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
  342. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
  343. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
  344. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
  345. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
  346. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
  347. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
  348. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
  349. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
  350. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
  351. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
  352. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
  353. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
  354. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
  355. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
  356. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
  357. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
  358. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
  359. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
  360. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
  361. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
  362. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
  363. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
  364. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
  365. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
  366. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
  367. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
  368. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
  369. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
  370. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
  371. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
  372. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
  373. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
  374. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
  375. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
  376. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
  377. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
  378. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
  379. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
  380. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
  381. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
  382. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
  383. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
  384. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
  385. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
  386. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
  387. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
  388. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
  389. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
  390. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
  391. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
  392. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
  393. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
  394. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
  395. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
  396. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
  397. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
  398. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
  399. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
  400. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
  401. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
  402. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
  403. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
  404. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
  405. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
  406. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
  407. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  408. mindspore/mint/linalg/__init__.py +0 -22
  409. mindspore/nn/layer/embedding_service.py +0 -531
  410. mindspore/nn/layer/embedding_service_layer.py +0 -393
  411. mindspore/ops/function/reshard_func.py +0 -102
  412. mindspore/ops/operations/_infer_ops.py +0 -19
  413. mindspore/ops/operations/reshard_ops.py +0 -53
  414. mindspore/profiler/common/process_pool.py +0 -41
  415. mindspore/profiler/common/singleton.py +0 -28
  416. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  417. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  418. mindspore/train/callback/_cluster_monitor.py +0 -201
  419. mindspore/train/callback/_flops_collector.py +0 -238
  420. mindspore/train/callback/_mindio_ttp.py +0 -443
  421. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
  422. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
  423. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
@@ -39,7 +39,6 @@ class AddImpl : public InternelKernelImpl {
39
39
  int Tiling(HostRawBuf &tilingBuf) override;
40
40
  std::vector<uint64_t> GetWorkSpaceSize() override;
41
41
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
42
- bool IsSupported() override;
43
42
 
44
43
  private:
45
44
  void NoBroadCastTiling(AddTilingData *tiling);
@@ -36,7 +36,6 @@ class ApplyRotaryPosEmbImpl : public InternelKernelImpl {
36
36
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
37
37
 
38
38
  private:
39
- void SetTilingID(RopeTilingData *tiling, int typeKey);
40
39
  DeviceRawBuf tiling_buf_;
41
40
  DeviceRawBuf workSpace_buf_;
42
41
  std::string soc_{"Ascend910B2"};
@@ -40,9 +40,6 @@ class AsdOpsImpl : public InternelKernelImpl {
40
40
  int Tiling(HostRawBuf &tilingBuf) override;
41
41
  std::vector<uint64_t> GetWorkSpaceSize() override;
42
42
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
43
- std::string GetOpName() override {
44
- return tactic_->GetName();
45
- }
46
43
 
47
44
  private:
48
45
  AsdOps::Tactic *InitAndGetTactic();
@@ -27,7 +27,6 @@ struct HardwareInfo {
27
27
  uint32_t l0cSize{0};
28
28
  uint32_t hbmBandWidth{1};
29
29
  uint32_t l2BandWidth{5};
30
- uint32_t ubSize{0};
31
30
  };
32
31
 
33
32
  static void GetHardwareInfoPPMatmul910B1(HardwareInfo &hwInfo) {
@@ -37,7 +36,6 @@ static void GetHardwareInfoPPMatmul910B1(HardwareInfo &hwInfo) {
37
36
  hwInfo.l0aSize = 65536;
38
37
  hwInfo.l0bSize = 65536;
39
38
  hwInfo.l0cSize = 131072;
40
- hwInfo.ubSize = 196608;
41
39
  }
42
40
 
43
41
  static void GetHardwareInfoPPMatmul910B2(HardwareInfo &hwInfo) {
@@ -47,7 +45,6 @@ static void GetHardwareInfoPPMatmul910B2(HardwareInfo &hwInfo) {
47
45
  hwInfo.l0aSize = 65536;
48
46
  hwInfo.l0bSize = 65536;
49
47
  hwInfo.l0cSize = 131072;
50
- hwInfo.ubSize = 196608;
51
48
  }
52
49
 
53
50
  static void GetHardwareInfoPPMatmul910B3(HardwareInfo &hwInfo) {
@@ -57,7 +54,6 @@ static void GetHardwareInfoPPMatmul910B3(HardwareInfo &hwInfo) {
57
54
  hwInfo.l0aSize = 65536;
58
55
  hwInfo.l0bSize = 65536;
59
56
  hwInfo.l0cSize = 131072;
60
- hwInfo.ubSize = 196608;
61
57
  }
62
58
 
63
59
  static void GetHardwareInfoPPMatmul910B4(HardwareInfo &hwInfo) {
@@ -67,7 +63,6 @@ static void GetHardwareInfoPPMatmul910B4(HardwareInfo &hwInfo) {
67
63
  hwInfo.l0aSize = 65536;
68
64
  hwInfo.l0bSize = 65536;
69
65
  hwInfo.l0cSize = 131072;
70
- hwInfo.ubSize = 196608;
71
66
  }
72
67
  } // namespace internal
73
68
  } // namespace mindspore
@@ -17,6 +17,50 @@
17
17
  #ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_CAST_TILING_H_
18
18
  #define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_CAST_TILING_H_
19
19
 
20
- #include "acme/src/ops/device_src/ascendc/cast/cast_tiling.h"
20
+ enum CastDType : int32_t {
21
+ FLOAT16_TO_FLOAT = 17,
22
+ FLOAT16_TO_UINT8,
23
+ FLOAT16_TO_INT8,
24
+ FLOAT16_TO_INT16,
25
+ FLOAT16_TO_INT32,
26
+ FLOAT16_TO_BF16,
27
+
28
+ FLOAT_TO_FLOAT16 = 33,
29
+ FLOAT_TO_UINT8,
30
+ FLOAT_TO_INT8,
31
+ FLOAT_TO_INT32,
32
+ FLOAT_TO_BF16,
33
+
34
+ INT8_TO_FLOAT16 = 48,
35
+ INT8_TO_FLOAT,
36
+ INT8_TO_BF16,
37
+
38
+ INT32_TO_INT64 = 99,
39
+ INT32_TO_FLOAT,
40
+
41
+ INT64_TO_INT32 = 114,
42
+ INT64_TO_FLOAT,
43
+
44
+ BF16_TO_FLOAT16 = 147,
45
+ BF16_TO_FLOAT,
46
+
47
+ UNSUPPORTED_DTYPE
48
+ };
49
+
50
+ typedef struct CastTilingData {
51
+ uint32_t buffer_num;
52
+ uint32_t cast_dtype;
53
+ uint32_t core_num;
54
+
55
+ uint32_t avg_block_count;
56
+ uint32_t avg_block_ub_num;
57
+ uint32_t avg_block_ub_tail;
58
+ uint32_t avg_block_ub_loop;
59
+
60
+ uint32_t tail_block_count;
61
+ uint32_t tail_block_ub_num;
62
+ uint32_t tail_block_ub_tail;
63
+ uint32_t tail_block_ub_loop;
64
+ } CastTilingData;
21
65
 
22
66
  #endif
@@ -34,7 +34,6 @@ class CompareImpl : public InternelKernelImpl {
34
34
  int Tiling(HostRawBuf &tilingBuf) override;
35
35
  std::vector<uint64_t> GetWorkSpaceSize() override;
36
36
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
37
- bool IsSupported() override;
38
37
 
39
38
  private:
40
39
  int32_t GetMaxUbCount(uint32_t in_dtype);
@@ -37,6 +37,7 @@ class FlashAttentionScoreImpl : public InternelKernelImpl {
37
37
  virtual ~FlashAttentionScoreImpl() = default;
38
38
  bool Init(const ValidateInfo &info) override;
39
39
  void SetInputs(const std::vector<Tensor *> &inputs) override;
40
+ void SetOutputs(const std::vector<Tensor *> &outputs) override;
40
41
  void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace) override;
41
42
  void SetStream(const void *stream_ptr) override;
42
43
  void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
@@ -45,18 +46,13 @@ class FlashAttentionScoreImpl : public InternelKernelImpl {
45
46
  int Tiling(HostRawBuf &tilingBuf) override;
46
47
  std::vector<uint64_t> GetWorkSpaceSize() override;
47
48
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
48
- bool IsSupported() override;
49
49
 
50
50
  private:
51
- // init val
52
- int head_num_ = 0;
53
- int pre_tokens_ = 2147483647;
54
- int next_tokens_ = 0;
55
- int inner_precise_ = 0;
56
- int sparse_mode_ = 0;
57
- // impl val
58
51
  uint64_t B, N, Q_S, KV_S, D, G, CORE_NUM;
52
+ int inner_precise, pre_tokens, next_tokens, sparse_mode;
59
53
  bool BFLOAT16, BSH, ALIBI, AMASK;
54
+ const std::vector<Tensor *> *inputs_;
55
+ const std::vector<Tensor *> *outputs_;
60
56
  void *stream_ptr_ = nullptr;
61
57
  void *workspace_addr = nullptr;
62
58
  void *tiling_addr_ = nullptr;
@@ -21,7 +21,9 @@ typedef struct {
21
21
  #define ATTENTION_DEBUG false // 开启时会对S/P写入调试数据
22
22
  #define ROWMAX true
23
23
  #define OP_NAME FlashAttentionScore
24
- #define BUFFER_NUM 4 // 核间流水数,暂不支持修改
24
+ #define BUFFER_NUM 2 // 核间流水数,暂不支持修改
25
+ constexpr uint64_t WORKSPACE_MAX_SEQLEN = 16384; // max seqlen
26
+ constexpr uint64_t WORKSPACE_SIZE = 128 * WORKSPACE_MAX_SEQLEN;
25
27
 
26
28
  #if BFLOAT16
27
29
  #define TYPE_NAME _bf16
@@ -59,16 +61,7 @@ typedef struct {
59
61
  // 第四种:全矩阵,LOWER_TRIANGLE、BLOCK_SPARSE和AMASK如果全部关闭,则此attention采用全矩阵运算,不抑制S中的元素
60
62
  // *******************************************//
61
63
 
62
- constexpr uint64_t WORKSPACE_MAX_SEQLEN = 4096;
63
- constexpr uint64_t MAX_ROW = 128;
64
64
  constexpr uint64_t WORKSPACE_MAX_SEQLEN_BLOCK = WORKSPACE_MAX_SEQLEN / 16;
65
- constexpr uint64_t WORKSPACE_SIZE0 = MAX_ROW * WORKSPACE_MAX_SEQLEN; // for s, p
66
- constexpr uint64_t WORKSPACE_SIZE1 = MAX_ROW * MAX_ROW; // for o_tmp
67
- constexpr uint64_t WORKSPACE_SIZE2 = MAX_ROW * MAX_ROW; // for global_o
68
-
69
- constexpr uint64_t WORKSPACE_OFFSET1 = WORKSPACE_SIZE0;
70
- constexpr uint64_t WORKSPACE_OFFSET2 = WORKSPACE_OFFSET1 + WORKSPACE_SIZE1;
71
- constexpr uint64_t WORKSPACE_SIZE = WORKSPACE_SIZE0 + WORKSPACE_SIZE1 + WORKSPACE_SIZE2;
72
- constexpr uint64_t BUFFER_SIZE = WORKSPACE_SIZE * MAX_CORE_NUM * sizeof(uint16_t);
65
+ constexpr uint64_t BUFFER_SIZE = MAX_CORE_NUM * WORKSPACE_SIZE * sizeof(uint16_t);
73
66
 
74
67
  #endif
@@ -32,24 +32,6 @@ inline uint64_t round(uint64_t y, uint64_t x) {
32
32
  return ceil(y, x) * x;
33
33
  }
34
34
 
35
- inline uint64_t get_m(uint64_t D) {
36
- if (D <= 128) {
37
- return D;
38
- } else {
39
- return 64;
40
- }
41
- }
42
-
43
- inline bool isUpperTriangleTask(int32_t m_idx, int32_t kv_split_idx, int32_t m)
44
- {
45
- return (m_idx + 1) * m <= kv_split_idx * WORKSPACE_MAX_SEQLEN;
46
- }
47
-
48
- inline bool isLowerTriangleTask(int32_t m_idx, int32_t kv_split_idx, int32_t m)
49
- {
50
- return m_idx * m >= (kv_split_idx + 1) * WORKSPACE_MAX_SEQLEN;
51
- }
52
-
53
35
  #if BFLOAT16
54
36
  #define CALC_DATA_TYPE bfloat16_t
55
37
  #else
@@ -32,8 +32,6 @@ using HostRawBuf = RawBuf;
32
32
  using DeviceRawBuf = RawBuf;
33
33
 
34
34
  using OpParamPtr = std::shared_ptr<OpParam>;
35
- using DtypesParamPtr = std::shared_ptr<DtypesParam>;
36
-
37
35
  struct ValidateInfo {
38
36
  size_t input_num_;
39
37
  size_t output_num_;
@@ -55,14 +53,11 @@ class InternelKernelImpl {
55
53
  virtual void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace);
56
54
  virtual void SetStream(const void *stream_ptr);
57
55
  virtual void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) = 0;
58
- virtual int LaunchWithProfiling();
59
56
  virtual int Launch() = 0;
60
57
  virtual uint64_t GetTilingBufSize() = 0;
61
58
  virtual int Tiling(HostRawBuf &tilingBuf) = 0;
62
59
  virtual std::vector<uint64_t> GetWorkSpaceSize() = 0;
63
60
  virtual int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) = 0;
64
- virtual bool IsSupported() { return true; }
65
- virtual std::string GetOpName();
66
61
 
67
62
  virtual CacheInfo &GetCacheInfo() { return cache_info_; }
68
63
 
@@ -86,7 +81,6 @@ class InternelKernelImpl {
86
81
  };
87
82
  using InternalKernelImplPtr = std::shared_ptr<InternelKernelImpl>;
88
83
  InternalKernelImplPtr CreateInternalKernelImpl(const OpParamPtr &param);
89
- bool IsInternalKernelDtypesSupported(const DtypesParamPtr &param);
90
84
  } // namespace internal
91
85
  } // namespace mindspore
92
86
  #endif
@@ -16,6 +16,80 @@
16
16
  #ifndef MS_KERNEL_INTERNAL_INTERNAL_RTBACKEND_H
17
17
  #define MS_KERNEL_INTERNAL_INTERNAL_RTBACKEND_H
18
18
 
19
- #include "acme/src/core/platform/rt_funcs.h"
19
+ #ifdef __cplusplus
20
+ extern "C" {
21
+ #endif
20
22
 
23
+ #define RT_DEV_BINARY_MAGIC_ELF 0x43554245U
24
+ #define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U
25
+ #define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U
26
+
27
+ typedef void *rtStream_t;
28
+
29
+ typedef enum {
30
+ INTERNAL_RTSUCCESS = 0,
31
+ INTERNAL_RTERROR_NOT_INITIALIZED = -1,
32
+ INTERNAL_RTERROR_NOT_IMPLMENT = -2,
33
+ INTERNAL_RTERROR_ASCEND_ENV_NOT_EXIST = -3,
34
+ INTERNAL_RTERROR_LOAD_RUNTIME_FAIL = -4,
35
+ INTERNAL_RTERROR_FUNC_NOT_EXIST = -5,
36
+ INTERNAL_RTERROR_OPEN_BIN_FILE_FAIL = -6,
37
+ INTERNAL_RTERROR_PARA_CHECK_FAIL = -7,
38
+ } RtError;
39
+
40
+ typedef enum tagRtError {
41
+ RT_ERROR_NONE = 0x0, // success
42
+ RT_ERROR_INVALID_VALUE = 0x1, // invalid value
43
+ RT_ERROR_MEMORY_ALLOCATION = 0x2, // memory allocation fail
44
+ RT_ERROR_INVALID_RESOURCE_HANDLE = 0x3, // invalid handle
45
+ RT_ERROR_INVALID_DEVICE_POINTER = 0x4, // invalid device point
46
+ RT_ERROR_INVALID_MEMCPY_DIRECTION = 0x5, // invalid memory copy dirction
47
+ RT_ERROR_INVALID_DEVICE = 0x6, // invalid device
48
+ RT_ERROR_NO_DEVICE = 0x7, // no valid device
49
+ RT_ERROR_CMD_OCCUPY_FAILURE = 0x8, // command occpuy failure
50
+ RT_ERROR_SET_SIGNAL_FAILURE = 0x9, // set signal failure
51
+ RT_ERROR_UNSET_SIGNAL_FAILURE = 0xA, // unset signal failure
52
+ RT_ERROR_OPEN_FILE_FAILURE = 0xB, // unset signal failure
53
+ RT_ERROR_WRITE_FILE_FAILURE = 0xC,
54
+ RT_ERROR_MEMORY_ADDRESS_UNALIGNED = 0xD,
55
+ RT_ERROR_DRV_ERR = 0xE,
56
+ RT_ERROR_LOST_HEARTBEAT = 0xF,
57
+ RT_ERROR_REPORT_TIMEOUT = 0x10,
58
+ RT_ERROR_NOT_READY = 0x11,
59
+ RT_ERROR_DATA_OPERATION_FAIL = 0x12,
60
+ RT_ERROR_INVALID_L2_INSTR_SIZE = 0x13,
61
+ RT_ERROR_DEVICE_PROC_HANG_OUT = 0x14,
62
+ RT_ERROR_DEVICE_POWER_UP_FAIL = 0x15,
63
+ RT_ERROR_DEVICE_POWER_DOWN_FAIL = 0x16,
64
+ RT_ERROR_FEATURE_NOT_SUPPROT = 0x17,
65
+ RT_ERROR_KERNEL_DUPLICATE = 0x18, // register same kernel repeatly
66
+ RT_ERROR_MODEL_STREAM_EXE_FAILED = 0x91, // the model stream failed
67
+ RT_ERROR_MODEL_LOAD_FAILED = 0x94, // the model stream failed
68
+ RT_ERROR_END_OF_SEQUENCE = 0x95, // end of sequence
69
+ RT_ERROR_NO_STREAM_CB_REG = 0x96, // no callback register info for stream
70
+ RT_ERROR_DATA_DUMP_LOAD_FAILED = 0x97, // data dump load info fail
71
+ RT_ERROR_CALLBACK_THREAD_UNSUBSTRIBE = 0x98, // callback thread unsubstribe
72
+ RT_ERROR_RESERVED
73
+ } rtError_t;
74
+
75
+ // rt kernel
76
+ typedef struct {
77
+ uint32_t magic{0};
78
+ uint32_t version{0};
79
+ const void *data{nullptr};
80
+ uint64_t length{0};
81
+ } RtDevBinary_T;
82
+
83
+ typedef void *rtStream_t;
84
+
85
+ using RtDevBinaryRegisterFunc = rtError_t (*)(const RtDevBinary_T *bin, void **hdl);
86
+ using RtFunctionRegisterFunc = rtError_t (*)(void *binHandle, const void *subFunc, const char *stubName,
87
+ const void *kernelInfoExt, uint32_t funcMode);
88
+ using RtKernelLaunchFunc = rtError_t (*)(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, void *smDesc,
89
+ rtStream_t sm);
90
+ using RtGetC2cCtrlAddrFunc = rtError_t (*)(uint64_t *addr, uint32_t *len);
91
+
92
+ #ifdef __cplusplus
93
+ }
94
+ #endif
21
95
  #endif // MS_KERNEL_INTERNAL_INTERNAL_RTBACKEND_H
@@ -14,8 +14,8 @@
14
14
  * limitations under the License.
15
15
  */
16
16
 
17
- #ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_KERNEL_H_
18
- #define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_KERNEL_H_
19
- void MatMulOp(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm_a, uint8_t *gm_b, uint8_t *gm_bias, uint8_t *gm_scale,
20
- uint8_t *gm_c, uint8_t *tilingData);
21
- #endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_KERNEL_H_
17
+ #ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
18
+ #define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
19
+ void MatMulOp(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm_a, uint8_t *gm_b, uint8_t *gm_c,
20
+ uint8_t *tilingData);
21
+ #endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
@@ -42,7 +42,6 @@ namespace internal {
42
42
  using namespace tiling;
43
43
 
44
44
  enum class MatMulAlgo { PP = 0, LLM_CUSTOM = 1 };
45
- enum class MatMulFusionLevel { NONE = 0, CUBE = 1, MIX = 2 };
46
45
 
47
46
  class MatMulImpl : public InternelKernelImpl {
48
47
  public:
@@ -51,33 +50,22 @@ class MatMulImpl : public InternelKernelImpl {
51
50
  bool Init(const ValidateInfo &info) override;
52
51
  void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
53
52
  int Launch() override;
54
- int LaunchMix();
55
53
  size_t GetTilingBufSize() override;
56
54
  int Tiling(HostRawBuf &tilingBuf) override;
57
55
  void TilingBasicFromPp(uint32_t &blockDim, PpTilingData &tilingdata);
58
- int TilingPp(HostRawBuf &tilingBuf, uint32_t tilingId, const uint32_t &blockDim, const PpTilingData &tilingdata);
59
- int TilingLLMCustom(HostRawBuf &tilingBuf, uint32_t tilingId, const uint32_t &blockDim,
56
+ int TilingPp(HostRawBuf &tilingBuf, uint64_t tilingId, const uint32_t &blockDim, const PpTilingData &tilingdata);
57
+ int TilingLLMCustom(HostRawBuf &tilingBuf, uint64_t tilingId, const uint32_t &blockDim,
60
58
  const PpTilingData &tilingdata);
61
- void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace) override;
62
59
  std::vector<uint64_t> GetWorkSpaceSize() override;
63
60
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
64
- bool IsSupported() override;
65
- void RegsiterMixKernels();
66
-
67
61
  bool UseCustomMatMul();
68
62
  void GetTunedKey();
69
63
  void SetTunedValueCustom(const std::vector<int> &tuned_config);
70
- bool GenTilingId(uint32_t &tiling_id);
71
- void SetFusionLevel();
72
- void SetTilingKeyCustom();
73
64
 
74
65
  private:
75
66
  uint32_t m_, k_, n_;
76
67
  const char *func_name_ = "UnknownFunc";
77
68
  MatMulAlgo algo_ = MatMulAlgo::PP;
78
- MatMulFusionLevel fusion_level_ = MatMulFusionLevel::NONE;
79
- uint32_t fusion_type_{0};
80
- std::shared_ptr<MatMulExtParam> mm_ext_param_;
81
69
  DeviceRawBuf tiling_addr_;
82
70
  std::string soc_{"Ascend910B2"};
83
71
  HardwareInfo hwInfo_;
@@ -87,12 +75,9 @@ class MatMulImpl : public InternelKernelImpl {
87
75
  REPO tuningTableCustom_;
88
76
  TensorDType input_dtype_;
89
77
  TensorDType output_dtype_;
78
+ int block_dim_ = 0;
90
79
  bool trans_a_{false};
91
80
  bool trans_b_{true};
92
- bool enable_dequant_{false};
93
- static bool _is_inited;
94
- static std::unordered_map<const char *, const char *> internal_mix_matmul_kernels_map;
95
- void *workspace_addr = nullptr;
96
81
  };
97
82
 
98
83
  } // namespace internal
@@ -150,7 +150,7 @@ void TilingFunc(OpShareType &opShape, TilingType &tilingParam, const HardwareTyp
150
150
 
151
151
  template <typename PpTilingDataType>
152
152
  uint32_t Swizzl(PpTilingDataType &tilingData) {
153
- uint32_t swizzleDirect = 0;
153
+ uint32_t swizzlDirect = 0;
154
154
  uint32_t swizzlCount = 1;
155
155
  float m0 = tilingData.opShape.m0;
156
156
  float n0 = tilingData.opShape.n0;
@@ -164,14 +164,14 @@ uint32_t Swizzl(PpTilingDataType &tilingData) {
164
164
  float cost;
165
165
  // B0 + A < A0 + B
166
166
  if (i * n0 + m < m0 * c + n) {
167
- swizzleDirect = 1; // Nz
167
+ swizzlDirect = 1; // Nz
168
168
  cost = n0 * i + m0 * c;
169
169
  if (cost <= mincost) {
170
170
  mincost = cost;
171
171
  swizzlCount = i;
172
172
  }
173
173
  } else {
174
- swizzleDirect = 0; // Zn
174
+ swizzlDirect = 0; // Zn
175
175
  cost = m0 * i + n0 * c;
176
176
  if (cost < mincost) {
177
177
  mincost = cost;
@@ -179,9 +179,9 @@ uint32_t Swizzl(PpTilingDataType &tilingData) {
179
179
  }
180
180
  }
181
181
  }
182
- tilingData.swizzleDirect = swizzleDirect;
182
+ tilingData.swizzlDirect = swizzlDirect;
183
183
  tilingData.swizzlCount = swizzlCount;
184
- return swizzleDirect;
184
+ return swizzlDirect;
185
185
  }
186
186
 
187
187
  } // namespace tiling
@@ -60,12 +60,12 @@ struct PpTilingData {
60
60
  uint32_t swizzlCount{1};
61
61
  uint32_t tilingKey{0};
62
62
  uint32_t blockDim{1};
63
- uint32_t swizzleDirect{0};
63
+ uint32_t swizzlDirect{0};
64
64
  uint32_t splitk{0};
65
65
 
66
66
  void SetBaseShape(uint32_t batchSize, uint32_t m, uint32_t k, uint32_t n);
67
67
  void SetBaseOp(uint32_t coreNum, uint32_t mBase, uint32_t nBase, uint32_t qkv_n0, uint32_t qkv_n1, uint32_t qkv_n2);
68
- void SetTilingKey(const MatMulInfo &mmInfo, uint32_t swizzleDirect, uint32_t enSplitK);
68
+ void SetTilingKey(const MatMulInfo &mmInfo, uint32_t swizzlDirect, uint32_t enSplitK);
69
69
  uint32_t End(const MatMulInfo &mmInfo);
70
70
  };
71
71
  } // namespace tiling
@@ -37,7 +37,7 @@ struct PpMatmulTilingData {
37
37
  uint32_t swizzlCount{0};
38
38
  uint32_t tilingKey{0};
39
39
  uint32_t blockDim{1};
40
- uint32_t swizzleDirect{0};
40
+ uint32_t swizzlDirect{0};
41
41
  uint32_t splitk{0};
42
42
  uint32_t enShuffleK{0};
43
43
  uint32_t unused0{0};
@@ -48,7 +48,6 @@ struct PpMatmulTilingData {
48
48
  uint32_t unused5{0};
49
49
  uint32_t unused6{0};
50
50
  uint32_t tilingId{0};
51
- uint64_t sync_addr{0};
52
51
  };
53
52
 
54
53
  struct CustomMatmulTilingData {
@@ -77,10 +76,10 @@ struct CustomMatmulTilingData {
77
76
  uint32_t TransB{0};
78
77
  uint32_t shuffleFlag{0};
79
78
  uint32_t tilingId{0};
80
- uint32_t tilingKey{0};
81
- uint64_t sync_addr{0};
82
79
  };
83
80
 
81
+ constexpr size_t maxTilingBufSize = sizeof(CustomMatmulTilingData);
82
+
84
83
  struct MatmulStridedSliceFusionTilingData {
85
84
  uint32_t tilingId{0};
86
85
  uint32_t BlockDimM{0};
@@ -109,40 +108,8 @@ struct MatmulStridedSliceFusionTilingData {
109
108
  uint32_t TransA{0};
110
109
  uint32_t TransB{1};
111
110
  uint32_t shuffleFlag{0};
112
- uint32_t tilingKey{0};
113
- uint64_t sync_addr{0};
114
- uint32_t silu_pos{0};
115
111
  };
116
112
 
117
- // qkv ffn tiling
118
- struct PpMultiMatmulTilingData {
119
- uint32_t tilingId{0};
120
- uint32_t batch{0};
121
- uint32_t m{0};
122
- uint32_t k{0};
123
- uint32_t n{0};
124
- uint32_t m0{0};
125
- uint32_t k0{0};
126
- uint32_t n0{0};
127
- uint32_t mLoop{0};
128
- uint32_t kLoop{0};
129
- uint32_t nLoop{0};
130
- uint32_t coreLoop{0};
131
- uint32_t swizzlCount{0};
132
- uint32_t tilingKey{0};
133
- uint32_t blockDim{1};
134
- uint32_t swizzleDirect{0};
135
- uint32_t splitk{0};
136
- uint32_t enShuffleK{0};
137
- uint32_t mm_n_len_0{0};
138
- uint32_t mm_n_len_1{0};
139
- uint32_t mm_n_len_2{0};
140
- uint64_t sync_addr{0};
141
- uint32_t silu_pos{0};
142
- };
143
-
144
- constexpr size_t maxTilingBufSize = sizeof(uint32_t) * 32;
145
-
146
113
  } // namespace tiling
147
114
  } // namespace internal
148
115
  } // namespace mindspore
@@ -16,7 +16,7 @@
16
16
 
17
17
  #ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
18
18
  #define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
19
- void MatMulStridedSliceFusion(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *globalA, uint8_t *globalB,
20
- uint8_t *globalBias, uint8_t *globalScale, uint8_t *globalC0, uint8_t *globalC1,
19
+ void MatMulStridedSliceFusion(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *globalA, uint8_t *globalB0,
20
+ uint8_t *globalB1, uint8_t *globalB2, uint8_t *globalC0, uint8_t *globalC1,
21
21
  uint8_t *globalC2, uint8_t *tilingData);
22
22
  #endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
@@ -42,32 +42,19 @@ namespace internal {
42
42
 
43
43
  using namespace tiling;
44
44
 
45
- enum class MultiMatMulAlgo { PP = 0, LLM_CUSTOM = 1 };
46
- enum class MultiMatMulFusionLevel { NONE = 0, CUBE = 1, MIX= 2 };
47
-
48
45
  class MatMulStridedSliceFusionImpl : public InternelKernelImpl {
49
46
  public:
50
47
  MatMulStridedSliceFusionImpl(const OpParamPtr &param) : InternelKernelImpl(param){};
51
48
  virtual ~MatMulStridedSliceFusionImpl() = default;
52
49
  bool Init(const ValidateInfo &info) override;
53
- void RegsiterCceKernels();
54
50
  void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
55
51
  int Launch() override;
56
- int LaunchMix();
57
52
  size_t GetTilingBufSize() override;
58
53
  int Tiling(HostRawBuf &tilingBuf) override;
59
54
  void TilingBasicFromPp(uint32_t &blockDim, PpTilingData &tilingdata);
60
- int TilingPp(HostRawBuf &tilingBuf, uint32_t tilingId, const uint32_t &blockDim, const PpTilingData &tilingdata);
61
- int TilingLLMCustom(HostRawBuf &tilingBuf, uint32_t tilingId, const uint32_t &blockDim,
62
- const PpTilingData &tilingdata);
63
- void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace) override;
55
+ int TilingLLMCustom(HostRawBuf &tilingBuf, const uint32_t &blockDim, const PpTilingData &tilingdata, bool has_tuned);
64
56
  std::vector<uint64_t> GetWorkSpaceSize() override;
65
57
  int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
66
- bool GenTilingId(uint32_t &tiling_id);
67
- void GetTunedKey();
68
- bool GetPpMatmulTiling(const MatMulInfo &mmInfo, const HardwareInfo &hwInfo_, uint32_t &blockDim,
69
- PpTilingData &tilingData);
70
- uint32_t MixSwizzle(PpTilingData &tilingData);
71
58
 
72
59
  private:
73
60
  std::string soc_{"Ascend910B2"};
@@ -80,16 +67,11 @@ class MatMulStridedSliceFusionImpl : public InternelKernelImpl {
80
67
  int block_dim_ = 0;
81
68
  bool trans_a_{false};
82
69
  bool trans_b_{true};
83
- std::vector<int> tune_key_;
84
- MultiMatMulAlgo algo_ = MultiMatMulAlgo::PP;
85
- MultiMatMulFusionLevel fusion_level_ = MultiMatMulFusionLevel::NONE;
86
- int32_t silu_position_{-1};
87
- uint32_t fusion_type_{0};
88
70
 
89
71
  REPO tuningTable_;
90
- static bool _is_inited;
91
- void *workspace_addr = nullptr;
92
- // static std::unordered_map<const char *, const char *> internal_multi_matmul_kernels_map;
72
+ tiling::MatmulStridedSliceFusionTilingData t_;
73
+ std::vector<int> GetTunedKey();
74
+ void SetTunedValue(const std::vector<int> &tuned_config);
93
75
  };
94
76
 
95
77
  } // namespace internal
@@ -33,30 +33,18 @@
33
33
  #include "asdops/params/sort.h"
34
34
  #include <memory>
35
35
  #include <vector>
36
- #include "types.h"
37
36
  namespace mindspore {
38
37
  namespace internal {
39
- struct DtypesParam {
40
- int op_id_ = 0;
41
- std::vector<int64_t> in_dtypes_;
42
- std::vector<int64_t> out_dtypes_;
43
- };
44
- struct OpParam : public AsdOps::OpDesc {
45
- int dtype_ = 0;
46
- std::vector<int64_t> in_dtypes_;
47
- std::vector<int64_t> out_dtypes_;
48
- std::string op_fullname_;
49
- };
38
+ using OpParam = AsdOps::OpDesc;
50
39
  enum OpId : int {
51
40
  MatMul,
41
+ KVCache,
52
42
  ReshapeAndCache,
53
43
  Slice,
54
44
  Gather,
55
45
  ApplyRotaryPosEmb,
56
46
  Add,
57
47
  Sub,
58
- Exp,
59
- Relu,
60
48
  FlashAttentionScore,
61
49
  PagedAttention,
62
50
  Cast,
@@ -86,8 +74,6 @@ enum OpId : int {
86
74
  ReduceSum,
87
75
  TopK,
88
76
  Tile,
89
- GroupedMatmul,
90
- OpId_END,
91
77
  };
92
78
  using MatMulParam = AsdOps::OpParam::MatMul;
93
79
  using MixParam = AsdOps::OpParam::Mix;
@@ -20,7 +20,9 @@ constexpr uint64_t L0AB_UINT8_BLOCK_SIZE = 32768; // 128 * 128 * 2B
20
20
  constexpr uint64_t L1_MAX_SHARE_NUM = (L1_SIZE - 8 * L0AB_UINT8_BLOCK_SIZE) / L0AB_UINT8_BLOCK_SIZE / 2;
21
21
  constexpr uint64_t SUB_SP_SIZE = 2048 * 8; // 1024*16, 2048*8, 4096*4, 8192*2, 16K*1,五种分块方法
22
22
 
23
- enum class FDMode{on, off};
23
+ enum class L1Mode{load, // 读取数据至L1的share区
24
+ share, // 使用share区的数据
25
+ noshare}; // 不读且不用share区
24
26
 
25
27
  inline uint64_t ceil(uint64_t y, uint64_t x) {
26
28
  return (y + x - 1) / x;