mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
- mindspore/__init__.py +1 -2
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +25 -5
- mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +0 -29
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +5 -21
- mindspore/_extends/parse/resources.py +7 -5
- mindspore/_extends/parse/standard_method.py +59 -40
- mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +5 -26
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +1 -1
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/less_batch_normalization.py +6 -9
- mindspore/common/__init__.py +1 -8
- mindspore/common/_register_for_tensor.py +9 -8
- mindspore/common/api.py +65 -275
- mindspore/common/dtype.py +4 -8
- mindspore/common/dump.py +5 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/lazy_inline.py +2 -14
- mindspore/common/parameter.py +15 -14
- mindspore/common/recompute.py +5 -20
- mindspore/common/sparse_tensor.py +6 -21
- mindspore/common/tensor.py +52 -100
- mindspore/communication/__init__.py +11 -6
- mindspore/communication/management.py +94 -92
- mindspore/context.py +18 -180
- mindspore/dataset/engine/datasets.py +46 -69
- mindspore/dataset/engine/datasets_user_defined.py +53 -72
- mindspore/dataset/engine/datasets_vision.py +2 -2
- mindspore/dataset/engine/queue.py +38 -56
- mindspore/dataset/engine/validators.py +5 -11
- mindspore/dataset/vision/__init__.py +5 -5
- mindspore/dataset/vision/c_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +46 -591
- mindspore/dataset/vision/utils.py +1 -121
- mindspore/dataset/vision/validators.py +3 -9
- mindspore/hal/__init__.py +1 -7
- mindspore/hal/device.py +1 -1
- mindspore/include/api/model.h +0 -3
- mindspore/include/dataset/vision.h +2 -54
- mindspore/include/mindapi/base/types.h +0 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/filewriter.py +2 -2
- mindspore/mint/__init__.py +40 -720
- mindspore/mint/nn/__init__.py +7 -89
- mindspore/mint/nn/functional.py +16 -165
- mindspore/mint/optim/adamw.py +16 -15
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +98 -97
- mindspore/nn/extend/basic.py +2 -2
- mindspore/nn/extend/embedding.py +1 -1
- mindspore/nn/extend/layer/normalization.py +5 -7
- mindspore/nn/generator.py +297 -0
- mindspore/nn/layer/activation.py +3 -4
- mindspore/nn/layer/basic.py +16 -79
- mindspore/nn/layer/conv.py +8 -17
- mindspore/nn/layer/embedding.py +4 -1
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +1 -1
- mindspore/nn/layer/pooling.py +0 -5
- mindspore/nn/layer/rnn_cells.py +2 -2
- mindspore/nn/loss/loss.py +19 -19
- mindspore/nn/optim/adasum.py +1 -1
- mindspore/nn/optim/sgd.py +2 -3
- mindspore/nn/probability/distribution/exponential.py +1 -1
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/wrap/cell_wrapper.py +1 -25
- mindspore/nn/wrap/loss_scale.py +1 -24
- mindspore/numpy/array_ops.py +1 -5
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/math_ops.py +8 -8
- mindspore/ops/__init__.py +1 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
- mindspore/ops/_vmap/vmap_array_ops.py +0 -27
- mindspore/ops/_vmap/vmap_math_ops.py +1 -29
- mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
- mindspore/ops/auto_generate/gen_extend_func.py +27 -603
- mindspore/ops/auto_generate/gen_ops_def.py +203 -993
- mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
- mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
- mindspore/ops/composite/base.py +6 -3
- mindspore/ops/composite/math_ops.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/extend/__init__.py +3 -2
- mindspore/ops/extend/array_func.py +51 -10
- mindspore/ops/extend/nn_func.py +78 -2
- mindspore/ops/function/__init__.py +13 -8
- mindspore/ops/function/array_func.py +179 -455
- mindspore/ops/function/clip_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +3 -3
- mindspore/ops/function/math_func.py +103 -117
- mindspore/ops/function/nn_func.py +163 -275
- mindspore/ops/function/other_func.py +2 -2
- mindspore/ops/function/random_func.py +69 -202
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/functional.py +327 -332
- mindspore/ops/operations/__init__.py +3 -13
- mindspore/ops/operations/_grad_ops.py +27 -3
- mindspore/ops/operations/_inner_ops.py +356 -53
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +65 -82
- mindspore/ops/operations/comm_ops.py +93 -784
- mindspore/ops/operations/custom_ops.py +28 -51
- mindspore/ops/operations/debug_ops.py +4 -4
- mindspore/ops/operations/inner_ops.py +2 -2
- mindspore/ops/operations/manually_defined/ops_def.py +4 -304
- mindspore/ops/operations/math_ops.py +50 -3
- mindspore/ops/operations/nn_ops.py +247 -14
- mindspore/ops/operations/other_ops.py +3 -3
- mindspore/ops/operations/random_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +1 -1
- mindspore/ops/primitive.py +8 -9
- mindspore/ops/silent_check.py +5 -5
- mindspore/ops_generate/arg_dtype_cast.py +9 -2
- mindspore/ops_generate/arg_handler.py +0 -26
- mindspore/ops_generate/gen_aclnn_implement.py +4 -1
- mindspore/ops_generate/gen_ops.py +4 -26
- mindspore/ops_generate/gen_pyboost_func.py +12 -41
- mindspore/ops_generate/gen_utils.py +0 -21
- mindspore/ops_generate/pyboost_utils.py +2 -7
- mindspore/ops_generate/template.py +0 -1
- mindspore/parallel/_auto_parallel_context.py +1 -21
- mindspore/parallel/_tensor.py +5 -0
- mindspore/parallel/_transformer/transformer.py +1 -1
- mindspore/parallel/_utils.py +1 -15
- mindspore/parallel/algo_parameter_config.py +3 -1
- mindspore/parallel/checkpoint_transform.py +9 -12
- mindspore/parallel/cluster/process_entity/_api.py +29 -28
- mindspore/parallel/cluster/process_entity/_utils.py +3 -13
- mindspore/parallel/cluster/run.py +16 -13
- mindspore/parallel/parameter_broadcast.py +2 -2
- mindspore/parallel/shard.py +17 -31
- mindspore/profiler/__init__.py +2 -3
- mindspore/profiler/common/util.py +2 -107
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
- mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
- mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
- mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
- mindspore/profiler/parser/minddata_parser.py +3 -72
- mindspore/profiler/profiling.py +59 -176
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/common/namespace.py +5 -5
- mindspore/rewrite/parsers/assign_parser.py +0 -2
- mindspore/rewrite/parsers/class_def_parser.py +4 -8
- mindspore/run_check/_check_version.py +1 -1
- mindspore/scipy/fft.py +3 -1
- mindspore/scipy/linalg.py +3 -2
- mindspore/scipy/ops.py +3 -5
- mindspore/scipy/optimize/__init__.py +2 -2
- mindspore/train/__init__.py +4 -4
- mindspore/train/anf_ir_pb2.py +2 -8
- mindspore/train/callback/__init__.py +2 -5
- mindspore/train/callback/_backup_and_restore.py +2 -2
- mindspore/train/callback/_checkpoint.py +16 -104
- mindspore/train/callback/_landscape.py +1 -1
- mindspore/train/callback/_time_monitor.py +1 -1
- mindspore/train/data_sink.py +4 -5
- mindspore/train/dataset_helper.py +20 -45
- mindspore/train/model.py +38 -266
- mindspore/train/serialization.py +105 -256
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
- mindspore/_extends/pijit/__init__.py +0 -23
- mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
- mindspore/common/file_system.py +0 -48
- mindspore/common/generator.py +0 -260
- mindspore/common/no_inline.py +0 -54
- mindspore/common/np_dtype.py +0 -25
- mindspore/communication/comm_func.py +0 -1140
- mindspore/hal/memory.py +0 -326
- mindspore/lib/libavcodec.so.59 +0 -0
- mindspore/lib/libavdevice.so.59 +0 -0
- mindspore/lib/libavfilter.so.8 +0 -0
- mindspore/lib/libavformat.so.59 +0 -0
- mindspore/lib/libavutil.so.57 +0 -0
- mindspore/lib/libmindspore_np_dtype.so +0 -0
- mindspore/lib/libswresample.so.4 +0 -0
- mindspore/lib/libswscale.so.6 +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
- mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
- mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
- mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
- mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/mint/linalg/__init__.py +0 -22
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/nn/layer/embedding_service_layer.py +0 -393
- mindspore/ops/function/reshard_func.py +0 -102
- mindspore/ops/operations/_infer_ops.py +0 -19
- mindspore/ops/operations/reshard_ops.py +0 -53
- mindspore/profiler/common/process_pool.py +0 -41
- mindspore/profiler/common/singleton.py +0 -28
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/train/callback/_cluster_monitor.py +0 -201
- mindspore/train/callback/_flops_collector.py +0 -238
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -39,7 +39,6 @@ class AddImpl : public InternelKernelImpl {
|
|
|
39
39
|
int Tiling(HostRawBuf &tilingBuf) override;
|
|
40
40
|
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
41
41
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
42
|
-
bool IsSupported() override;
|
|
43
42
|
|
|
44
43
|
private:
|
|
45
44
|
void NoBroadCastTiling(AddTilingData *tiling);
|
|
@@ -36,7 +36,6 @@ class ApplyRotaryPosEmbImpl : public InternelKernelImpl {
|
|
|
36
36
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
37
37
|
|
|
38
38
|
private:
|
|
39
|
-
void SetTilingID(RopeTilingData *tiling, int typeKey);
|
|
40
39
|
DeviceRawBuf tiling_buf_;
|
|
41
40
|
DeviceRawBuf workSpace_buf_;
|
|
42
41
|
std::string soc_{"Ascend910B2"};
|
|
@@ -40,9 +40,6 @@ class AsdOpsImpl : public InternelKernelImpl {
|
|
|
40
40
|
int Tiling(HostRawBuf &tilingBuf) override;
|
|
41
41
|
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
42
42
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
43
|
-
std::string GetOpName() override {
|
|
44
|
-
return tactic_->GetName();
|
|
45
|
-
}
|
|
46
43
|
|
|
47
44
|
private:
|
|
48
45
|
AsdOps::Tactic *InitAndGetTactic();
|
|
@@ -27,7 +27,6 @@ struct HardwareInfo {
|
|
|
27
27
|
uint32_t l0cSize{0};
|
|
28
28
|
uint32_t hbmBandWidth{1};
|
|
29
29
|
uint32_t l2BandWidth{5};
|
|
30
|
-
uint32_t ubSize{0};
|
|
31
30
|
};
|
|
32
31
|
|
|
33
32
|
static void GetHardwareInfoPPMatmul910B1(HardwareInfo &hwInfo) {
|
|
@@ -37,7 +36,6 @@ static void GetHardwareInfoPPMatmul910B1(HardwareInfo &hwInfo) {
|
|
|
37
36
|
hwInfo.l0aSize = 65536;
|
|
38
37
|
hwInfo.l0bSize = 65536;
|
|
39
38
|
hwInfo.l0cSize = 131072;
|
|
40
|
-
hwInfo.ubSize = 196608;
|
|
41
39
|
}
|
|
42
40
|
|
|
43
41
|
static void GetHardwareInfoPPMatmul910B2(HardwareInfo &hwInfo) {
|
|
@@ -47,7 +45,6 @@ static void GetHardwareInfoPPMatmul910B2(HardwareInfo &hwInfo) {
|
|
|
47
45
|
hwInfo.l0aSize = 65536;
|
|
48
46
|
hwInfo.l0bSize = 65536;
|
|
49
47
|
hwInfo.l0cSize = 131072;
|
|
50
|
-
hwInfo.ubSize = 196608;
|
|
51
48
|
}
|
|
52
49
|
|
|
53
50
|
static void GetHardwareInfoPPMatmul910B3(HardwareInfo &hwInfo) {
|
|
@@ -57,7 +54,6 @@ static void GetHardwareInfoPPMatmul910B3(HardwareInfo &hwInfo) {
|
|
|
57
54
|
hwInfo.l0aSize = 65536;
|
|
58
55
|
hwInfo.l0bSize = 65536;
|
|
59
56
|
hwInfo.l0cSize = 131072;
|
|
60
|
-
hwInfo.ubSize = 196608;
|
|
61
57
|
}
|
|
62
58
|
|
|
63
59
|
static void GetHardwareInfoPPMatmul910B4(HardwareInfo &hwInfo) {
|
|
@@ -67,7 +63,6 @@ static void GetHardwareInfoPPMatmul910B4(HardwareInfo &hwInfo) {
|
|
|
67
63
|
hwInfo.l0aSize = 65536;
|
|
68
64
|
hwInfo.l0bSize = 65536;
|
|
69
65
|
hwInfo.l0cSize = 131072;
|
|
70
|
-
hwInfo.ubSize = 196608;
|
|
71
66
|
}
|
|
72
67
|
} // namespace internal
|
|
73
68
|
} // namespace mindspore
|
|
@@ -17,6 +17,50 @@
|
|
|
17
17
|
#ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_CAST_TILING_H_
|
|
18
18
|
#define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_CAST_TILING_H_
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
enum CastDType : int32_t {
|
|
21
|
+
FLOAT16_TO_FLOAT = 17,
|
|
22
|
+
FLOAT16_TO_UINT8,
|
|
23
|
+
FLOAT16_TO_INT8,
|
|
24
|
+
FLOAT16_TO_INT16,
|
|
25
|
+
FLOAT16_TO_INT32,
|
|
26
|
+
FLOAT16_TO_BF16,
|
|
27
|
+
|
|
28
|
+
FLOAT_TO_FLOAT16 = 33,
|
|
29
|
+
FLOAT_TO_UINT8,
|
|
30
|
+
FLOAT_TO_INT8,
|
|
31
|
+
FLOAT_TO_INT32,
|
|
32
|
+
FLOAT_TO_BF16,
|
|
33
|
+
|
|
34
|
+
INT8_TO_FLOAT16 = 48,
|
|
35
|
+
INT8_TO_FLOAT,
|
|
36
|
+
INT8_TO_BF16,
|
|
37
|
+
|
|
38
|
+
INT32_TO_INT64 = 99,
|
|
39
|
+
INT32_TO_FLOAT,
|
|
40
|
+
|
|
41
|
+
INT64_TO_INT32 = 114,
|
|
42
|
+
INT64_TO_FLOAT,
|
|
43
|
+
|
|
44
|
+
BF16_TO_FLOAT16 = 147,
|
|
45
|
+
BF16_TO_FLOAT,
|
|
46
|
+
|
|
47
|
+
UNSUPPORTED_DTYPE
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
typedef struct CastTilingData {
|
|
51
|
+
uint32_t buffer_num;
|
|
52
|
+
uint32_t cast_dtype;
|
|
53
|
+
uint32_t core_num;
|
|
54
|
+
|
|
55
|
+
uint32_t avg_block_count;
|
|
56
|
+
uint32_t avg_block_ub_num;
|
|
57
|
+
uint32_t avg_block_ub_tail;
|
|
58
|
+
uint32_t avg_block_ub_loop;
|
|
59
|
+
|
|
60
|
+
uint32_t tail_block_count;
|
|
61
|
+
uint32_t tail_block_ub_num;
|
|
62
|
+
uint32_t tail_block_ub_tail;
|
|
63
|
+
uint32_t tail_block_ub_loop;
|
|
64
|
+
} CastTilingData;
|
|
21
65
|
|
|
22
66
|
#endif
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h
CHANGED
|
@@ -34,7 +34,6 @@ class CompareImpl : public InternelKernelImpl {
|
|
|
34
34
|
int Tiling(HostRawBuf &tilingBuf) override;
|
|
35
35
|
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
36
36
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
37
|
-
bool IsSupported() override;
|
|
38
37
|
|
|
39
38
|
private:
|
|
40
39
|
int32_t GetMaxUbCount(uint32_t in_dtype);
|
|
@@ -37,6 +37,7 @@ class FlashAttentionScoreImpl : public InternelKernelImpl {
|
|
|
37
37
|
virtual ~FlashAttentionScoreImpl() = default;
|
|
38
38
|
bool Init(const ValidateInfo &info) override;
|
|
39
39
|
void SetInputs(const std::vector<Tensor *> &inputs) override;
|
|
40
|
+
void SetOutputs(const std::vector<Tensor *> &outputs) override;
|
|
40
41
|
void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace) override;
|
|
41
42
|
void SetStream(const void *stream_ptr) override;
|
|
42
43
|
void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
|
|
@@ -45,18 +46,13 @@ class FlashAttentionScoreImpl : public InternelKernelImpl {
|
|
|
45
46
|
int Tiling(HostRawBuf &tilingBuf) override;
|
|
46
47
|
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
47
48
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
48
|
-
bool IsSupported() override;
|
|
49
49
|
|
|
50
50
|
private:
|
|
51
|
-
// init val
|
|
52
|
-
int head_num_ = 0;
|
|
53
|
-
int pre_tokens_ = 2147483647;
|
|
54
|
-
int next_tokens_ = 0;
|
|
55
|
-
int inner_precise_ = 0;
|
|
56
|
-
int sparse_mode_ = 0;
|
|
57
|
-
// impl val
|
|
58
51
|
uint64_t B, N, Q_S, KV_S, D, G, CORE_NUM;
|
|
52
|
+
int inner_precise, pre_tokens, next_tokens, sparse_mode;
|
|
59
53
|
bool BFLOAT16, BSH, ALIBI, AMASK;
|
|
54
|
+
const std::vector<Tensor *> *inputs_;
|
|
55
|
+
const std::vector<Tensor *> *outputs_;
|
|
60
56
|
void *stream_ptr_ = nullptr;
|
|
61
57
|
void *workspace_addr = nullptr;
|
|
62
58
|
void *tiling_addr_ = nullptr;
|
|
@@ -21,7 +21,9 @@ typedef struct {
|
|
|
21
21
|
#define ATTENTION_DEBUG false // 开启时会对S/P写入调试数据
|
|
22
22
|
#define ROWMAX true
|
|
23
23
|
#define OP_NAME FlashAttentionScore
|
|
24
|
-
#define BUFFER_NUM
|
|
24
|
+
#define BUFFER_NUM 2 // 核间流水数,暂不支持修改
|
|
25
|
+
constexpr uint64_t WORKSPACE_MAX_SEQLEN = 16384; // max seqlen
|
|
26
|
+
constexpr uint64_t WORKSPACE_SIZE = 128 * WORKSPACE_MAX_SEQLEN;
|
|
25
27
|
|
|
26
28
|
#if BFLOAT16
|
|
27
29
|
#define TYPE_NAME _bf16
|
|
@@ -59,16 +61,7 @@ typedef struct {
|
|
|
59
61
|
// 第四种:全矩阵,LOWER_TRIANGLE、BLOCK_SPARSE和AMASK如果全部关闭,则此attention采用全矩阵运算,不抑制S中的元素
|
|
60
62
|
// *******************************************//
|
|
61
63
|
|
|
62
|
-
constexpr uint64_t WORKSPACE_MAX_SEQLEN = 4096;
|
|
63
|
-
constexpr uint64_t MAX_ROW = 128;
|
|
64
64
|
constexpr uint64_t WORKSPACE_MAX_SEQLEN_BLOCK = WORKSPACE_MAX_SEQLEN / 16;
|
|
65
|
-
constexpr uint64_t
|
|
66
|
-
constexpr uint64_t WORKSPACE_SIZE1 = MAX_ROW * MAX_ROW; // for o_tmp
|
|
67
|
-
constexpr uint64_t WORKSPACE_SIZE2 = MAX_ROW * MAX_ROW; // for global_o
|
|
68
|
-
|
|
69
|
-
constexpr uint64_t WORKSPACE_OFFSET1 = WORKSPACE_SIZE0;
|
|
70
|
-
constexpr uint64_t WORKSPACE_OFFSET2 = WORKSPACE_OFFSET1 + WORKSPACE_SIZE1;
|
|
71
|
-
constexpr uint64_t WORKSPACE_SIZE = WORKSPACE_SIZE0 + WORKSPACE_SIZE1 + WORKSPACE_SIZE2;
|
|
72
|
-
constexpr uint64_t BUFFER_SIZE = WORKSPACE_SIZE * MAX_CORE_NUM * sizeof(uint16_t);
|
|
65
|
+
constexpr uint64_t BUFFER_SIZE = MAX_CORE_NUM * WORKSPACE_SIZE * sizeof(uint16_t);
|
|
73
66
|
|
|
74
67
|
#endif
|
|
@@ -32,24 +32,6 @@ inline uint64_t round(uint64_t y, uint64_t x) {
|
|
|
32
32
|
return ceil(y, x) * x;
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
-
inline uint64_t get_m(uint64_t D) {
|
|
36
|
-
if (D <= 128) {
|
|
37
|
-
return D;
|
|
38
|
-
} else {
|
|
39
|
-
return 64;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
inline bool isUpperTriangleTask(int32_t m_idx, int32_t kv_split_idx, int32_t m)
|
|
44
|
-
{
|
|
45
|
-
return (m_idx + 1) * m <= kv_split_idx * WORKSPACE_MAX_SEQLEN;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
inline bool isLowerTriangleTask(int32_t m_idx, int32_t kv_split_idx, int32_t m)
|
|
49
|
-
{
|
|
50
|
-
return m_idx * m >= (kv_split_idx + 1) * WORKSPACE_MAX_SEQLEN;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
35
|
#if BFLOAT16
|
|
54
36
|
#define CALC_DATA_TYPE bfloat16_t
|
|
55
37
|
#else
|
|
@@ -32,8 +32,6 @@ using HostRawBuf = RawBuf;
|
|
|
32
32
|
using DeviceRawBuf = RawBuf;
|
|
33
33
|
|
|
34
34
|
using OpParamPtr = std::shared_ptr<OpParam>;
|
|
35
|
-
using DtypesParamPtr = std::shared_ptr<DtypesParam>;
|
|
36
|
-
|
|
37
35
|
struct ValidateInfo {
|
|
38
36
|
size_t input_num_;
|
|
39
37
|
size_t output_num_;
|
|
@@ -55,14 +53,11 @@ class InternelKernelImpl {
|
|
|
55
53
|
virtual void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace);
|
|
56
54
|
virtual void SetStream(const void *stream_ptr);
|
|
57
55
|
virtual void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) = 0;
|
|
58
|
-
virtual int LaunchWithProfiling();
|
|
59
56
|
virtual int Launch() = 0;
|
|
60
57
|
virtual uint64_t GetTilingBufSize() = 0;
|
|
61
58
|
virtual int Tiling(HostRawBuf &tilingBuf) = 0;
|
|
62
59
|
virtual std::vector<uint64_t> GetWorkSpaceSize() = 0;
|
|
63
60
|
virtual int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) = 0;
|
|
64
|
-
virtual bool IsSupported() { return true; }
|
|
65
|
-
virtual std::string GetOpName();
|
|
66
61
|
|
|
67
62
|
virtual CacheInfo &GetCacheInfo() { return cache_info_; }
|
|
68
63
|
|
|
@@ -86,7 +81,6 @@ class InternelKernelImpl {
|
|
|
86
81
|
};
|
|
87
82
|
using InternalKernelImplPtr = std::shared_ptr<InternelKernelImpl>;
|
|
88
83
|
InternalKernelImplPtr CreateInternalKernelImpl(const OpParamPtr ¶m);
|
|
89
|
-
bool IsInternalKernelDtypesSupported(const DtypesParamPtr ¶m);
|
|
90
84
|
} // namespace internal
|
|
91
85
|
} // namespace mindspore
|
|
92
86
|
#endif
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h
CHANGED
|
@@ -16,6 +16,80 @@
|
|
|
16
16
|
#ifndef MS_KERNEL_INTERNAL_INTERNAL_RTBACKEND_H
|
|
17
17
|
#define MS_KERNEL_INTERNAL_INTERNAL_RTBACKEND_H
|
|
18
18
|
|
|
19
|
-
#
|
|
19
|
+
#ifdef __cplusplus
|
|
20
|
+
extern "C" {
|
|
21
|
+
#endif
|
|
20
22
|
|
|
23
|
+
#define RT_DEV_BINARY_MAGIC_ELF 0x43554245U
|
|
24
|
+
#define RT_DEV_BINARY_MAGIC_ELF_AIVEC 0x41415246U
|
|
25
|
+
#define RT_DEV_BINARY_MAGIC_ELF_AICUBE 0x41494343U
|
|
26
|
+
|
|
27
|
+
typedef void *rtStream_t;
|
|
28
|
+
|
|
29
|
+
typedef enum {
|
|
30
|
+
INTERNAL_RTSUCCESS = 0,
|
|
31
|
+
INTERNAL_RTERROR_NOT_INITIALIZED = -1,
|
|
32
|
+
INTERNAL_RTERROR_NOT_IMPLMENT = -2,
|
|
33
|
+
INTERNAL_RTERROR_ASCEND_ENV_NOT_EXIST = -3,
|
|
34
|
+
INTERNAL_RTERROR_LOAD_RUNTIME_FAIL = -4,
|
|
35
|
+
INTERNAL_RTERROR_FUNC_NOT_EXIST = -5,
|
|
36
|
+
INTERNAL_RTERROR_OPEN_BIN_FILE_FAIL = -6,
|
|
37
|
+
INTERNAL_RTERROR_PARA_CHECK_FAIL = -7,
|
|
38
|
+
} RtError;
|
|
39
|
+
|
|
40
|
+
typedef enum tagRtError {
|
|
41
|
+
RT_ERROR_NONE = 0x0, // success
|
|
42
|
+
RT_ERROR_INVALID_VALUE = 0x1, // invalid value
|
|
43
|
+
RT_ERROR_MEMORY_ALLOCATION = 0x2, // memory allocation fail
|
|
44
|
+
RT_ERROR_INVALID_RESOURCE_HANDLE = 0x3, // invalid handle
|
|
45
|
+
RT_ERROR_INVALID_DEVICE_POINTER = 0x4, // invalid device point
|
|
46
|
+
RT_ERROR_INVALID_MEMCPY_DIRECTION = 0x5, // invalid memory copy dirction
|
|
47
|
+
RT_ERROR_INVALID_DEVICE = 0x6, // invalid device
|
|
48
|
+
RT_ERROR_NO_DEVICE = 0x7, // no valid device
|
|
49
|
+
RT_ERROR_CMD_OCCUPY_FAILURE = 0x8, // command occpuy failure
|
|
50
|
+
RT_ERROR_SET_SIGNAL_FAILURE = 0x9, // set signal failure
|
|
51
|
+
RT_ERROR_UNSET_SIGNAL_FAILURE = 0xA, // unset signal failure
|
|
52
|
+
RT_ERROR_OPEN_FILE_FAILURE = 0xB, // unset signal failure
|
|
53
|
+
RT_ERROR_WRITE_FILE_FAILURE = 0xC,
|
|
54
|
+
RT_ERROR_MEMORY_ADDRESS_UNALIGNED = 0xD,
|
|
55
|
+
RT_ERROR_DRV_ERR = 0xE,
|
|
56
|
+
RT_ERROR_LOST_HEARTBEAT = 0xF,
|
|
57
|
+
RT_ERROR_REPORT_TIMEOUT = 0x10,
|
|
58
|
+
RT_ERROR_NOT_READY = 0x11,
|
|
59
|
+
RT_ERROR_DATA_OPERATION_FAIL = 0x12,
|
|
60
|
+
RT_ERROR_INVALID_L2_INSTR_SIZE = 0x13,
|
|
61
|
+
RT_ERROR_DEVICE_PROC_HANG_OUT = 0x14,
|
|
62
|
+
RT_ERROR_DEVICE_POWER_UP_FAIL = 0x15,
|
|
63
|
+
RT_ERROR_DEVICE_POWER_DOWN_FAIL = 0x16,
|
|
64
|
+
RT_ERROR_FEATURE_NOT_SUPPROT = 0x17,
|
|
65
|
+
RT_ERROR_KERNEL_DUPLICATE = 0x18, // register same kernel repeatly
|
|
66
|
+
RT_ERROR_MODEL_STREAM_EXE_FAILED = 0x91, // the model stream failed
|
|
67
|
+
RT_ERROR_MODEL_LOAD_FAILED = 0x94, // the model stream failed
|
|
68
|
+
RT_ERROR_END_OF_SEQUENCE = 0x95, // end of sequence
|
|
69
|
+
RT_ERROR_NO_STREAM_CB_REG = 0x96, // no callback register info for stream
|
|
70
|
+
RT_ERROR_DATA_DUMP_LOAD_FAILED = 0x97, // data dump load info fail
|
|
71
|
+
RT_ERROR_CALLBACK_THREAD_UNSUBSTRIBE = 0x98, // callback thread unsubstribe
|
|
72
|
+
RT_ERROR_RESERVED
|
|
73
|
+
} rtError_t;
|
|
74
|
+
|
|
75
|
+
// rt kernel
|
|
76
|
+
typedef struct {
|
|
77
|
+
uint32_t magic{0};
|
|
78
|
+
uint32_t version{0};
|
|
79
|
+
const void *data{nullptr};
|
|
80
|
+
uint64_t length{0};
|
|
81
|
+
} RtDevBinary_T;
|
|
82
|
+
|
|
83
|
+
typedef void *rtStream_t;
|
|
84
|
+
|
|
85
|
+
using RtDevBinaryRegisterFunc = rtError_t (*)(const RtDevBinary_T *bin, void **hdl);
|
|
86
|
+
using RtFunctionRegisterFunc = rtError_t (*)(void *binHandle, const void *subFunc, const char *stubName,
|
|
87
|
+
const void *kernelInfoExt, uint32_t funcMode);
|
|
88
|
+
using RtKernelLaunchFunc = rtError_t (*)(const void *stubFunc, uint32_t blockDim, void *args, uint32_t argsSize, void *smDesc,
|
|
89
|
+
rtStream_t sm);
|
|
90
|
+
using RtGetC2cCtrlAddrFunc = rtError_t (*)(uint64_t *addr, uint32_t *len);
|
|
91
|
+
|
|
92
|
+
#ifdef __cplusplus
|
|
93
|
+
}
|
|
94
|
+
#endif
|
|
21
95
|
#endif // MS_KERNEL_INTERNAL_INTERNAL_RTBACKEND_H
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h
CHANGED
|
@@ -14,8 +14,8 @@
|
|
|
14
14
|
* limitations under the License.
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
#ifndef
|
|
18
|
-
#define
|
|
19
|
-
void MatMulOp(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm_a, uint8_t *gm_b, uint8_t *
|
|
20
|
-
uint8_t *
|
|
21
|
-
#endif //
|
|
17
|
+
#ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
|
|
18
|
+
#define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
|
|
19
|
+
void MatMulOp(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *gm_a, uint8_t *gm_b, uint8_t *gm_c,
|
|
20
|
+
uint8_t *tilingData);
|
|
21
|
+
#endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h
CHANGED
|
@@ -42,7 +42,6 @@ namespace internal {
|
|
|
42
42
|
using namespace tiling;
|
|
43
43
|
|
|
44
44
|
enum class MatMulAlgo { PP = 0, LLM_CUSTOM = 1 };
|
|
45
|
-
enum class MatMulFusionLevel { NONE = 0, CUBE = 1, MIX = 2 };
|
|
46
45
|
|
|
47
46
|
class MatMulImpl : public InternelKernelImpl {
|
|
48
47
|
public:
|
|
@@ -51,33 +50,22 @@ class MatMulImpl : public InternelKernelImpl {
|
|
|
51
50
|
bool Init(const ValidateInfo &info) override;
|
|
52
51
|
void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
|
|
53
52
|
int Launch() override;
|
|
54
|
-
int LaunchMix();
|
|
55
53
|
size_t GetTilingBufSize() override;
|
|
56
54
|
int Tiling(HostRawBuf &tilingBuf) override;
|
|
57
55
|
void TilingBasicFromPp(uint32_t &blockDim, PpTilingData &tilingdata);
|
|
58
|
-
int TilingPp(HostRawBuf &tilingBuf,
|
|
59
|
-
int TilingLLMCustom(HostRawBuf &tilingBuf,
|
|
56
|
+
int TilingPp(HostRawBuf &tilingBuf, uint64_t tilingId, const uint32_t &blockDim, const PpTilingData &tilingdata);
|
|
57
|
+
int TilingLLMCustom(HostRawBuf &tilingBuf, uint64_t tilingId, const uint32_t &blockDim,
|
|
60
58
|
const PpTilingData &tilingdata);
|
|
61
|
-
void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace) override;
|
|
62
59
|
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
63
60
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
64
|
-
bool IsSupported() override;
|
|
65
|
-
void RegsiterMixKernels();
|
|
66
|
-
|
|
67
61
|
bool UseCustomMatMul();
|
|
68
62
|
void GetTunedKey();
|
|
69
63
|
void SetTunedValueCustom(const std::vector<int> &tuned_config);
|
|
70
|
-
bool GenTilingId(uint32_t &tiling_id);
|
|
71
|
-
void SetFusionLevel();
|
|
72
|
-
void SetTilingKeyCustom();
|
|
73
64
|
|
|
74
65
|
private:
|
|
75
66
|
uint32_t m_, k_, n_;
|
|
76
67
|
const char *func_name_ = "UnknownFunc";
|
|
77
68
|
MatMulAlgo algo_ = MatMulAlgo::PP;
|
|
78
|
-
MatMulFusionLevel fusion_level_ = MatMulFusionLevel::NONE;
|
|
79
|
-
uint32_t fusion_type_{0};
|
|
80
|
-
std::shared_ptr<MatMulExtParam> mm_ext_param_;
|
|
81
69
|
DeviceRawBuf tiling_addr_;
|
|
82
70
|
std::string soc_{"Ascend910B2"};
|
|
83
71
|
HardwareInfo hwInfo_;
|
|
@@ -87,12 +75,9 @@ class MatMulImpl : public InternelKernelImpl {
|
|
|
87
75
|
REPO tuningTableCustom_;
|
|
88
76
|
TensorDType input_dtype_;
|
|
89
77
|
TensorDType output_dtype_;
|
|
78
|
+
int block_dim_ = 0;
|
|
90
79
|
bool trans_a_{false};
|
|
91
80
|
bool trans_b_{true};
|
|
92
|
-
bool enable_dequant_{false};
|
|
93
|
-
static bool _is_inited;
|
|
94
|
-
static std::unordered_map<const char *, const char *> internal_mix_matmul_kernels_map;
|
|
95
|
-
void *workspace_addr = nullptr;
|
|
96
81
|
};
|
|
97
82
|
|
|
98
83
|
} // namespace internal
|
|
@@ -150,7 +150,7 @@ void TilingFunc(OpShareType &opShape, TilingType &tilingParam, const HardwareTyp
|
|
|
150
150
|
|
|
151
151
|
template <typename PpTilingDataType>
|
|
152
152
|
uint32_t Swizzl(PpTilingDataType &tilingData) {
|
|
153
|
-
uint32_t
|
|
153
|
+
uint32_t swizzlDirect = 0;
|
|
154
154
|
uint32_t swizzlCount = 1;
|
|
155
155
|
float m0 = tilingData.opShape.m0;
|
|
156
156
|
float n0 = tilingData.opShape.n0;
|
|
@@ -164,14 +164,14 @@ uint32_t Swizzl(PpTilingDataType &tilingData) {
|
|
|
164
164
|
float cost;
|
|
165
165
|
// B0 + A < A0 + B
|
|
166
166
|
if (i * n0 + m < m0 * c + n) {
|
|
167
|
-
|
|
167
|
+
swizzlDirect = 1; // Nz
|
|
168
168
|
cost = n0 * i + m0 * c;
|
|
169
169
|
if (cost <= mincost) {
|
|
170
170
|
mincost = cost;
|
|
171
171
|
swizzlCount = i;
|
|
172
172
|
}
|
|
173
173
|
} else {
|
|
174
|
-
|
|
174
|
+
swizzlDirect = 0; // Zn
|
|
175
175
|
cost = m0 * i + n0 * c;
|
|
176
176
|
if (cost < mincost) {
|
|
177
177
|
mincost = cost;
|
|
@@ -179,9 +179,9 @@ uint32_t Swizzl(PpTilingDataType &tilingData) {
|
|
|
179
179
|
}
|
|
180
180
|
}
|
|
181
181
|
}
|
|
182
|
-
tilingData.
|
|
182
|
+
tilingData.swizzlDirect = swizzlDirect;
|
|
183
183
|
tilingData.swizzlCount = swizzlCount;
|
|
184
|
-
return
|
|
184
|
+
return swizzlDirect;
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
} // namespace tiling
|
|
@@ -60,12 +60,12 @@ struct PpTilingData {
|
|
|
60
60
|
uint32_t swizzlCount{1};
|
|
61
61
|
uint32_t tilingKey{0};
|
|
62
62
|
uint32_t blockDim{1};
|
|
63
|
-
uint32_t
|
|
63
|
+
uint32_t swizzlDirect{0};
|
|
64
64
|
uint32_t splitk{0};
|
|
65
65
|
|
|
66
66
|
void SetBaseShape(uint32_t batchSize, uint32_t m, uint32_t k, uint32_t n);
|
|
67
67
|
void SetBaseOp(uint32_t coreNum, uint32_t mBase, uint32_t nBase, uint32_t qkv_n0, uint32_t qkv_n1, uint32_t qkv_n2);
|
|
68
|
-
void SetTilingKey(const MatMulInfo &mmInfo, uint32_t
|
|
68
|
+
void SetTilingKey(const MatMulInfo &mmInfo, uint32_t swizzlDirect, uint32_t enSplitK);
|
|
69
69
|
uint32_t End(const MatMulInfo &mmInfo);
|
|
70
70
|
};
|
|
71
71
|
} // namespace tiling
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h
CHANGED
|
@@ -37,7 +37,7 @@ struct PpMatmulTilingData {
|
|
|
37
37
|
uint32_t swizzlCount{0};
|
|
38
38
|
uint32_t tilingKey{0};
|
|
39
39
|
uint32_t blockDim{1};
|
|
40
|
-
uint32_t
|
|
40
|
+
uint32_t swizzlDirect{0};
|
|
41
41
|
uint32_t splitk{0};
|
|
42
42
|
uint32_t enShuffleK{0};
|
|
43
43
|
uint32_t unused0{0};
|
|
@@ -48,7 +48,6 @@ struct PpMatmulTilingData {
|
|
|
48
48
|
uint32_t unused5{0};
|
|
49
49
|
uint32_t unused6{0};
|
|
50
50
|
uint32_t tilingId{0};
|
|
51
|
-
uint64_t sync_addr{0};
|
|
52
51
|
};
|
|
53
52
|
|
|
54
53
|
struct CustomMatmulTilingData {
|
|
@@ -77,10 +76,10 @@ struct CustomMatmulTilingData {
|
|
|
77
76
|
uint32_t TransB{0};
|
|
78
77
|
uint32_t shuffleFlag{0};
|
|
79
78
|
uint32_t tilingId{0};
|
|
80
|
-
uint32_t tilingKey{0};
|
|
81
|
-
uint64_t sync_addr{0};
|
|
82
79
|
};
|
|
83
80
|
|
|
81
|
+
constexpr size_t maxTilingBufSize = sizeof(CustomMatmulTilingData);
|
|
82
|
+
|
|
84
83
|
struct MatmulStridedSliceFusionTilingData {
|
|
85
84
|
uint32_t tilingId{0};
|
|
86
85
|
uint32_t BlockDimM{0};
|
|
@@ -109,40 +108,8 @@ struct MatmulStridedSliceFusionTilingData {
|
|
|
109
108
|
uint32_t TransA{0};
|
|
110
109
|
uint32_t TransB{1};
|
|
111
110
|
uint32_t shuffleFlag{0};
|
|
112
|
-
uint32_t tilingKey{0};
|
|
113
|
-
uint64_t sync_addr{0};
|
|
114
|
-
uint32_t silu_pos{0};
|
|
115
111
|
};
|
|
116
112
|
|
|
117
|
-
// qkv ffn tiling
|
|
118
|
-
struct PpMultiMatmulTilingData {
|
|
119
|
-
uint32_t tilingId{0};
|
|
120
|
-
uint32_t batch{0};
|
|
121
|
-
uint32_t m{0};
|
|
122
|
-
uint32_t k{0};
|
|
123
|
-
uint32_t n{0};
|
|
124
|
-
uint32_t m0{0};
|
|
125
|
-
uint32_t k0{0};
|
|
126
|
-
uint32_t n0{0};
|
|
127
|
-
uint32_t mLoop{0};
|
|
128
|
-
uint32_t kLoop{0};
|
|
129
|
-
uint32_t nLoop{0};
|
|
130
|
-
uint32_t coreLoop{0};
|
|
131
|
-
uint32_t swizzlCount{0};
|
|
132
|
-
uint32_t tilingKey{0};
|
|
133
|
-
uint32_t blockDim{1};
|
|
134
|
-
uint32_t swizzleDirect{0};
|
|
135
|
-
uint32_t splitk{0};
|
|
136
|
-
uint32_t enShuffleK{0};
|
|
137
|
-
uint32_t mm_n_len_0{0};
|
|
138
|
-
uint32_t mm_n_len_1{0};
|
|
139
|
-
uint32_t mm_n_len_2{0};
|
|
140
|
-
uint64_t sync_addr{0};
|
|
141
|
-
uint32_t silu_pos{0};
|
|
142
|
-
};
|
|
143
|
-
|
|
144
|
-
constexpr size_t maxTilingBufSize = sizeof(uint32_t) * 32;
|
|
145
|
-
|
|
146
113
|
} // namespace tiling
|
|
147
114
|
} // namespace internal
|
|
148
115
|
} // namespace mindspore
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
#ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
|
|
18
18
|
#define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
|
|
19
|
-
void MatMulStridedSliceFusion(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *globalA, uint8_t *
|
|
20
|
-
uint8_t *
|
|
19
|
+
void MatMulStridedSliceFusion(uint32_t blockDim, void *l2ctrl, void *stream, uint8_t *globalA, uint8_t *globalB0,
|
|
20
|
+
uint8_t *globalB1, uint8_t *globalB2, uint8_t *globalC0, uint8_t *globalC1,
|
|
21
21
|
uint8_t *globalC2, uint8_t *tilingData);
|
|
22
22
|
#endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_MATMUL_STRIDEDSLICE_fUSION_FP16_KERNEL_H_
|
|
@@ -42,32 +42,19 @@ namespace internal {
|
|
|
42
42
|
|
|
43
43
|
using namespace tiling;
|
|
44
44
|
|
|
45
|
-
enum class MultiMatMulAlgo { PP = 0, LLM_CUSTOM = 1 };
|
|
46
|
-
enum class MultiMatMulFusionLevel { NONE = 0, CUBE = 1, MIX= 2 };
|
|
47
|
-
|
|
48
45
|
class MatMulStridedSliceFusionImpl : public InternelKernelImpl {
|
|
49
46
|
public:
|
|
50
47
|
MatMulStridedSliceFusionImpl(const OpParamPtr ¶m) : InternelKernelImpl(param){};
|
|
51
48
|
virtual ~MatMulStridedSliceFusionImpl() = default;
|
|
52
49
|
bool Init(const ValidateInfo &info) override;
|
|
53
|
-
void RegsiterCceKernels();
|
|
54
50
|
void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
|
|
55
51
|
int Launch() override;
|
|
56
|
-
int LaunchMix();
|
|
57
52
|
size_t GetTilingBufSize() override;
|
|
58
53
|
int Tiling(HostRawBuf &tilingBuf) override;
|
|
59
54
|
void TilingBasicFromPp(uint32_t &blockDim, PpTilingData &tilingdata);
|
|
60
|
-
int
|
|
61
|
-
int TilingLLMCustom(HostRawBuf &tilingBuf, uint32_t tilingId, const uint32_t &blockDim,
|
|
62
|
-
const PpTilingData &tilingdata);
|
|
63
|
-
void SetWorkSpace(const std::vector<DeviceRawBuf> &workspace) override;
|
|
55
|
+
int TilingLLMCustom(HostRawBuf &tilingBuf, const uint32_t &blockDim, const PpTilingData &tilingdata, bool has_tuned);
|
|
64
56
|
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
65
57
|
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
66
|
-
bool GenTilingId(uint32_t &tiling_id);
|
|
67
|
-
void GetTunedKey();
|
|
68
|
-
bool GetPpMatmulTiling(const MatMulInfo &mmInfo, const HardwareInfo &hwInfo_, uint32_t &blockDim,
|
|
69
|
-
PpTilingData &tilingData);
|
|
70
|
-
uint32_t MixSwizzle(PpTilingData &tilingData);
|
|
71
58
|
|
|
72
59
|
private:
|
|
73
60
|
std::string soc_{"Ascend910B2"};
|
|
@@ -80,16 +67,11 @@ class MatMulStridedSliceFusionImpl : public InternelKernelImpl {
|
|
|
80
67
|
int block_dim_ = 0;
|
|
81
68
|
bool trans_a_{false};
|
|
82
69
|
bool trans_b_{true};
|
|
83
|
-
std::vector<int> tune_key_;
|
|
84
|
-
MultiMatMulAlgo algo_ = MultiMatMulAlgo::PP;
|
|
85
|
-
MultiMatMulFusionLevel fusion_level_ = MultiMatMulFusionLevel::NONE;
|
|
86
|
-
int32_t silu_position_{-1};
|
|
87
|
-
uint32_t fusion_type_{0};
|
|
88
70
|
|
|
89
71
|
REPO tuningTable_;
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
72
|
+
tiling::MatmulStridedSliceFusionTilingData t_;
|
|
73
|
+
std::vector<int> GetTunedKey();
|
|
74
|
+
void SetTunedValue(const std::vector<int> &tuned_config);
|
|
93
75
|
};
|
|
94
76
|
|
|
95
77
|
} // namespace internal
|
|
@@ -33,30 +33,18 @@
|
|
|
33
33
|
#include "asdops/params/sort.h"
|
|
34
34
|
#include <memory>
|
|
35
35
|
#include <vector>
|
|
36
|
-
#include "types.h"
|
|
37
36
|
namespace mindspore {
|
|
38
37
|
namespace internal {
|
|
39
|
-
|
|
40
|
-
int op_id_ = 0;
|
|
41
|
-
std::vector<int64_t> in_dtypes_;
|
|
42
|
-
std::vector<int64_t> out_dtypes_;
|
|
43
|
-
};
|
|
44
|
-
struct OpParam : public AsdOps::OpDesc {
|
|
45
|
-
int dtype_ = 0;
|
|
46
|
-
std::vector<int64_t> in_dtypes_;
|
|
47
|
-
std::vector<int64_t> out_dtypes_;
|
|
48
|
-
std::string op_fullname_;
|
|
49
|
-
};
|
|
38
|
+
using OpParam = AsdOps::OpDesc;
|
|
50
39
|
enum OpId : int {
|
|
51
40
|
MatMul,
|
|
41
|
+
KVCache,
|
|
52
42
|
ReshapeAndCache,
|
|
53
43
|
Slice,
|
|
54
44
|
Gather,
|
|
55
45
|
ApplyRotaryPosEmb,
|
|
56
46
|
Add,
|
|
57
47
|
Sub,
|
|
58
|
-
Exp,
|
|
59
|
-
Relu,
|
|
60
48
|
FlashAttentionScore,
|
|
61
49
|
PagedAttention,
|
|
62
50
|
Cast,
|
|
@@ -86,8 +74,6 @@ enum OpId : int {
|
|
|
86
74
|
ReduceSum,
|
|
87
75
|
TopK,
|
|
88
76
|
Tile,
|
|
89
|
-
GroupedMatmul,
|
|
90
|
-
OpId_END,
|
|
91
77
|
};
|
|
92
78
|
using MatMulParam = AsdOps::OpParam::MatMul;
|
|
93
79
|
using MixParam = AsdOps::OpParam::Mix;
|
|
@@ -20,7 +20,9 @@ constexpr uint64_t L0AB_UINT8_BLOCK_SIZE = 32768; // 128 * 128 * 2B
|
|
|
20
20
|
constexpr uint64_t L1_MAX_SHARE_NUM = (L1_SIZE - 8 * L0AB_UINT8_BLOCK_SIZE) / L0AB_UINT8_BLOCK_SIZE / 2;
|
|
21
21
|
constexpr uint64_t SUB_SP_SIZE = 2048 * 8; // 1024*16, 2048*8, 4096*4, 8192*2, 16K*1,五种分块方法
|
|
22
22
|
|
|
23
|
-
enum class
|
|
23
|
+
enum class L1Mode{load, // 读取数据至L1的share区
|
|
24
|
+
share, // 使用share区的数据
|
|
25
|
+
noshare}; // 不读且不用share区
|
|
24
26
|
|
|
25
27
|
inline uint64_t ceil(uint64_t y, uint64_t x) {
|
|
26
28
|
return (y + x - 1) / x;
|