mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
- mindspore/__init__.py +1 -2
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +25 -5
- mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +0 -29
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +5 -21
- mindspore/_extends/parse/resources.py +7 -5
- mindspore/_extends/parse/standard_method.py +59 -40
- mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +5 -26
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +1 -1
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/less_batch_normalization.py +6 -9
- mindspore/common/__init__.py +1 -8
- mindspore/common/_register_for_tensor.py +9 -8
- mindspore/common/api.py +65 -275
- mindspore/common/dtype.py +4 -8
- mindspore/common/dump.py +5 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/lazy_inline.py +2 -14
- mindspore/common/parameter.py +15 -14
- mindspore/common/recompute.py +5 -20
- mindspore/common/sparse_tensor.py +6 -21
- mindspore/common/tensor.py +52 -100
- mindspore/communication/__init__.py +11 -6
- mindspore/communication/management.py +94 -92
- mindspore/context.py +18 -180
- mindspore/dataset/engine/datasets.py +46 -69
- mindspore/dataset/engine/datasets_user_defined.py +53 -72
- mindspore/dataset/engine/datasets_vision.py +2 -2
- mindspore/dataset/engine/queue.py +38 -56
- mindspore/dataset/engine/validators.py +5 -11
- mindspore/dataset/vision/__init__.py +5 -5
- mindspore/dataset/vision/c_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +46 -591
- mindspore/dataset/vision/utils.py +1 -121
- mindspore/dataset/vision/validators.py +3 -9
- mindspore/hal/__init__.py +1 -7
- mindspore/hal/device.py +1 -1
- mindspore/include/api/model.h +0 -3
- mindspore/include/dataset/vision.h +2 -54
- mindspore/include/mindapi/base/types.h +0 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/filewriter.py +2 -2
- mindspore/mint/__init__.py +40 -720
- mindspore/mint/nn/__init__.py +7 -89
- mindspore/mint/nn/functional.py +16 -165
- mindspore/mint/optim/adamw.py +16 -15
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +98 -97
- mindspore/nn/extend/basic.py +2 -2
- mindspore/nn/extend/embedding.py +1 -1
- mindspore/nn/extend/layer/normalization.py +5 -7
- mindspore/nn/generator.py +297 -0
- mindspore/nn/layer/activation.py +3 -4
- mindspore/nn/layer/basic.py +16 -79
- mindspore/nn/layer/conv.py +8 -17
- mindspore/nn/layer/embedding.py +4 -1
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +1 -1
- mindspore/nn/layer/pooling.py +0 -5
- mindspore/nn/layer/rnn_cells.py +2 -2
- mindspore/nn/loss/loss.py +19 -19
- mindspore/nn/optim/adasum.py +1 -1
- mindspore/nn/optim/sgd.py +2 -3
- mindspore/nn/probability/distribution/exponential.py +1 -1
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/wrap/cell_wrapper.py +1 -25
- mindspore/nn/wrap/loss_scale.py +1 -24
- mindspore/numpy/array_ops.py +1 -5
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/math_ops.py +8 -8
- mindspore/ops/__init__.py +1 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
- mindspore/ops/_vmap/vmap_array_ops.py +0 -27
- mindspore/ops/_vmap/vmap_math_ops.py +1 -29
- mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
- mindspore/ops/auto_generate/gen_extend_func.py +27 -603
- mindspore/ops/auto_generate/gen_ops_def.py +203 -993
- mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
- mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
- mindspore/ops/composite/base.py +6 -3
- mindspore/ops/composite/math_ops.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/extend/__init__.py +3 -2
- mindspore/ops/extend/array_func.py +51 -10
- mindspore/ops/extend/nn_func.py +78 -2
- mindspore/ops/function/__init__.py +13 -8
- mindspore/ops/function/array_func.py +179 -455
- mindspore/ops/function/clip_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +3 -3
- mindspore/ops/function/math_func.py +103 -117
- mindspore/ops/function/nn_func.py +163 -275
- mindspore/ops/function/other_func.py +2 -2
- mindspore/ops/function/random_func.py +69 -202
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/functional.py +327 -332
- mindspore/ops/operations/__init__.py +3 -13
- mindspore/ops/operations/_grad_ops.py +27 -3
- mindspore/ops/operations/_inner_ops.py +356 -53
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +65 -82
- mindspore/ops/operations/comm_ops.py +93 -784
- mindspore/ops/operations/custom_ops.py +28 -51
- mindspore/ops/operations/debug_ops.py +4 -4
- mindspore/ops/operations/inner_ops.py +2 -2
- mindspore/ops/operations/manually_defined/ops_def.py +4 -304
- mindspore/ops/operations/math_ops.py +50 -3
- mindspore/ops/operations/nn_ops.py +247 -14
- mindspore/ops/operations/other_ops.py +3 -3
- mindspore/ops/operations/random_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +1 -1
- mindspore/ops/primitive.py +8 -9
- mindspore/ops/silent_check.py +5 -5
- mindspore/ops_generate/arg_dtype_cast.py +9 -2
- mindspore/ops_generate/arg_handler.py +0 -26
- mindspore/ops_generate/gen_aclnn_implement.py +4 -1
- mindspore/ops_generate/gen_ops.py +4 -26
- mindspore/ops_generate/gen_pyboost_func.py +12 -41
- mindspore/ops_generate/gen_utils.py +0 -21
- mindspore/ops_generate/pyboost_utils.py +2 -7
- mindspore/ops_generate/template.py +0 -1
- mindspore/parallel/_auto_parallel_context.py +1 -21
- mindspore/parallel/_tensor.py +5 -0
- mindspore/parallel/_transformer/transformer.py +1 -1
- mindspore/parallel/_utils.py +1 -15
- mindspore/parallel/algo_parameter_config.py +3 -1
- mindspore/parallel/checkpoint_transform.py +9 -12
- mindspore/parallel/cluster/process_entity/_api.py +29 -28
- mindspore/parallel/cluster/process_entity/_utils.py +3 -13
- mindspore/parallel/cluster/run.py +16 -13
- mindspore/parallel/parameter_broadcast.py +2 -2
- mindspore/parallel/shard.py +17 -31
- mindspore/profiler/__init__.py +2 -3
- mindspore/profiler/common/util.py +2 -107
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
- mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
- mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
- mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
- mindspore/profiler/parser/minddata_parser.py +3 -72
- mindspore/profiler/profiling.py +59 -176
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/common/namespace.py +5 -5
- mindspore/rewrite/parsers/assign_parser.py +0 -2
- mindspore/rewrite/parsers/class_def_parser.py +4 -8
- mindspore/run_check/_check_version.py +1 -1
- mindspore/scipy/fft.py +3 -1
- mindspore/scipy/linalg.py +3 -2
- mindspore/scipy/ops.py +3 -5
- mindspore/scipy/optimize/__init__.py +2 -2
- mindspore/train/__init__.py +4 -4
- mindspore/train/anf_ir_pb2.py +2 -8
- mindspore/train/callback/__init__.py +2 -5
- mindspore/train/callback/_backup_and_restore.py +2 -2
- mindspore/train/callback/_checkpoint.py +16 -104
- mindspore/train/callback/_landscape.py +1 -1
- mindspore/train/callback/_time_monitor.py +1 -1
- mindspore/train/data_sink.py +4 -5
- mindspore/train/dataset_helper.py +20 -45
- mindspore/train/model.py +38 -266
- mindspore/train/serialization.py +105 -256
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
- mindspore/_extends/pijit/__init__.py +0 -23
- mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
- mindspore/common/file_system.py +0 -48
- mindspore/common/generator.py +0 -260
- mindspore/common/no_inline.py +0 -54
- mindspore/common/np_dtype.py +0 -25
- mindspore/communication/comm_func.py +0 -1140
- mindspore/hal/memory.py +0 -326
- mindspore/lib/libavcodec.so.59 +0 -0
- mindspore/lib/libavdevice.so.59 +0 -0
- mindspore/lib/libavfilter.so.8 +0 -0
- mindspore/lib/libavformat.so.59 +0 -0
- mindspore/lib/libavutil.so.57 +0 -0
- mindspore/lib/libmindspore_np_dtype.so +0 -0
- mindspore/lib/libswresample.so.4 +0 -0
- mindspore/lib/libswscale.so.6 +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
- mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
- mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
- mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
- mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/mint/linalg/__init__.py +0 -22
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/nn/layer/embedding_service_layer.py +0 -393
- mindspore/ops/function/reshard_func.py +0 -102
- mindspore/ops/operations/_infer_ops.py +0 -19
- mindspore/ops/operations/reshard_ops.py +0 -53
- mindspore/profiler/common/process_pool.py +0 -41
- mindspore/profiler/common/singleton.py +0 -28
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/train/callback/_cluster_monitor.py +0 -201
- mindspore/train/callback/_flops_collector.py +0 -238
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h
DELETED
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_ACME_SRC_UTILS_ASD_UTILS_H_
|
|
18
|
-
#define MS_KERNELS_INTERNAL_KERNEL_ACME_SRC_UTILS_ASD_UTILS_H_
|
|
19
|
-
|
|
20
|
-
#include "asdops/types.h"
|
|
21
|
-
#include "utils/log/log.h"
|
|
22
|
-
#include "acme/include/base_type.h"
|
|
23
|
-
|
|
24
|
-
namespace mindspore {
|
|
25
|
-
namespace acme {
|
|
26
|
-
inline AsdOps::TensorDType ToAsdType(DataType type) {
|
|
27
|
-
switch (type) {
|
|
28
|
-
case kTypeFloat16:
|
|
29
|
-
return AsdOps::TENSOR_DTYPE_FLOAT16;
|
|
30
|
-
case kTypeFloat32:
|
|
31
|
-
return AsdOps::TENSOR_DTYPE_FLOAT;
|
|
32
|
-
case kTypeFloat64:
|
|
33
|
-
return AsdOps::TENSOR_DTYPE_DOUBLE;
|
|
34
|
-
case kTypeUint8:
|
|
35
|
-
return AsdOps::TENSOR_DTYPE_UINT8;
|
|
36
|
-
case kTypeUint16:
|
|
37
|
-
return AsdOps::TENSOR_DTYPE_UINT16;
|
|
38
|
-
case kTypeUint32:
|
|
39
|
-
return AsdOps::TENSOR_DTYPE_UINT32;
|
|
40
|
-
case kTypeBF16:
|
|
41
|
-
return AsdOps::TENSOR_DTYPE_BF16;
|
|
42
|
-
case kTypeInt8:
|
|
43
|
-
return AsdOps::TENSOR_DTYPE_INT8;
|
|
44
|
-
case kTypeInt16:
|
|
45
|
-
return AsdOps::TENSOR_DTYPE_INT16;
|
|
46
|
-
case kTypeInt32:
|
|
47
|
-
return AsdOps::TENSOR_DTYPE_INT32;
|
|
48
|
-
case kTypeBool:
|
|
49
|
-
// I dont't known why..
|
|
50
|
-
return AsdOps::TENSOR_DTYPE_INT8;
|
|
51
|
-
default:
|
|
52
|
-
MSOP_LOG(EXCEPTION) << "Unsupported type: " << type;
|
|
53
|
-
return AsdOps::TENSOR_DTYPE_UNDEFINED;
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
inline AsdOps::TensorFormat ToAsdFormat(TensorFormat format) {
|
|
58
|
-
switch (format) {
|
|
59
|
-
case kFormatUnknown:
|
|
60
|
-
return AsdOps::TENSOR_FORMAT_UNDEFINED;
|
|
61
|
-
case kFormatNCHW:
|
|
62
|
-
return AsdOps::TENSOR_FORMAT_NCHW;
|
|
63
|
-
case kFormatND:
|
|
64
|
-
return AsdOps::TENSOR_FORMAT_ND;
|
|
65
|
-
case kFormatNHWC:
|
|
66
|
-
return AsdOps::TENSOR_FORMAT_NHWC;
|
|
67
|
-
default:
|
|
68
|
-
MSOP_LOG(EXCEPTION) << "Unsupported format: " << format;
|
|
69
|
-
return AsdOps::TENSOR_FORMAT_UNDEFINED;
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
inline AsdOps::SVector<int64_t> ToAsdDims(const ShapeInfo &shape) {
|
|
74
|
-
AsdOps::SVector<int64_t> asd_dims;
|
|
75
|
-
for (auto s : shape) {
|
|
76
|
-
asd_dims.emplace_back(s);
|
|
77
|
-
}
|
|
78
|
-
return asd_dims;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
} // namespace acme
|
|
82
|
-
} // namespace mindspore
|
|
83
|
-
|
|
84
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_ACME_SRC_UTILS_ASD_UTILS_H_
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_ACME_SRC_UTILS_COMM_UTILS_H_
|
|
18
|
-
#define MS_KERNELS_INTERNAL_KERNEL_ACME_SRC_UTILS_COMM_UTILS_H_
|
|
19
|
-
|
|
20
|
-
#include <unordered_map>
|
|
21
|
-
#include "acme/include/base_type.h"
|
|
22
|
-
#include "utils/log/log.h"
|
|
23
|
-
|
|
24
|
-
#define CeilDiv(dividend, divisor) (((divisor) == 0) ? 0 : (((dividend) + (divisor)-1) / (divisor)))
|
|
25
|
-
#define UpRound(in, round) ((((in) + (round)-1) / (round)) * (round))
|
|
26
|
-
|
|
27
|
-
namespace mindspore {
|
|
28
|
-
namespace acme {
|
|
29
|
-
inline size_t GetTypeSize(DataType type) {
|
|
30
|
-
static const std::unordered_map<DataType, size_t> kTypeSize = {
|
|
31
|
-
{kTypeFloat16, sizeof(float) / 2},
|
|
32
|
-
{kTypeFloat32, sizeof(float)},
|
|
33
|
-
{kTypeFloat64, sizeof(double)},
|
|
34
|
-
{kTypeInt8, sizeof(int8_t)},
|
|
35
|
-
{kTypeInt16, sizeof(int16_t)},
|
|
36
|
-
{kTypeInt32, sizeof(int32_t)},
|
|
37
|
-
{kTypeInt64, sizeof(int64_t)},
|
|
38
|
-
{kTypeUint8, sizeof(uint8_t)},
|
|
39
|
-
{kTypeUint16, sizeof(uint16_t)},
|
|
40
|
-
{kTypeUint32, sizeof(uint32_t)},
|
|
41
|
-
{kTypeUint64, sizeof(uint64_t)},
|
|
42
|
-
{kTypeBF16, sizeof(float) / 2},
|
|
43
|
-
{kTypeBool, sizeof(bool)},
|
|
44
|
-
{kTypeComplex64, 64},
|
|
45
|
-
{kTypeComplex128, 128},
|
|
46
|
-
};
|
|
47
|
-
|
|
48
|
-
auto it = kTypeSize.find(type);
|
|
49
|
-
if (it == kTypeSize.end()) {
|
|
50
|
-
MSOP_LOG(EXCEPTION) << "Unsupported type: " << type;
|
|
51
|
-
return 0;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return it->second;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
bool IsOpEnabled(const std::string &op_name);
|
|
58
|
-
} // namespace acme
|
|
59
|
-
} // namespace mindspore
|
|
60
|
-
|
|
61
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_ACME_SRC_UTILS_COMM_UTILS_H_
|
|
@@ -1,224 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
#ifndef ROTARY_POS_EMB_FP32
|
|
17
|
-
#define ROTARY_POS_EMB_FP32
|
|
18
|
-
#include "apply_rotary_pos_emb_base.h"
|
|
19
|
-
template <typename QK_DTYPE, typename COS_DTYPE, bool IF_COS_BROADCAST>
|
|
20
|
-
class RopeFp32 : public RopeBase<QK_DTYPE, COS_DTYPE, IF_COS_BROADCAST> {
|
|
21
|
-
public:
|
|
22
|
-
__aicore__ inline RopeFp32(RopeTilingData *tilingData) : RopeBase<QK_DTYPE, COS_DTYPE, IF_COS_BROADCAST>(tilingData) {
|
|
23
|
-
this->repeatSize_ = 64; // 64 = 256B / sizeof(float)
|
|
24
|
-
this->maxProcessNum_ = 3 * MAX_LEN_FP16; // 3 for fp16 space needed
|
|
25
|
-
this->repeatTimesQ_ = (this->tilingData_->hiddenSizeQ + this->repeatSize_ - 1) / this->repeatSize_;
|
|
26
|
-
this->repeatTimesK_ = (this->tilingData_->hiddenSizeK + this->repeatSize_ - 1) / this->repeatSize_;
|
|
27
|
-
headDimAlign_ = ((this->tilingData_->headDim + ELE_NUM_FP32 - 1) / ELE_NUM_FP32) * ELE_NUM_FP32;
|
|
28
|
-
this->alignHalfHeadDim_ = (this->rotateStride_ * NUM_TWO) % ELE_NUM_FP32;
|
|
29
|
-
this->hiddenSizeAlign_ = ((this->hiddenSize_ + this->repeatSize_ - 1) / this->repeatSize_) * this->repeatSize_;
|
|
30
|
-
this->syncOffset_ =
|
|
31
|
-
(this->tilingData_->headDim % ELE_NUM_FP16 == 0) ? this->hiddenSizeAlign_ : this->headNum_ * headDimAlign_;
|
|
32
|
-
this->offsetExtraGm_ = NUM_TWO * block_idx * this->syncOffset_;
|
|
33
|
-
|
|
34
|
-
sliceSizeTmp_ = (SLICE_SIZE / this->tilingData_->headDim) * this->tilingData_->headDim; // 向下取整
|
|
35
|
-
|
|
36
|
-
// fp16
|
|
37
|
-
this->oriPos_ = 0;
|
|
38
|
-
this->removeBefore_ = this->oriPos_ + sliceSizeTmp_;
|
|
39
|
-
this->padBefore_ = this->removeBefore_ + sliceSizeTmp_;
|
|
40
|
-
resOut_ = this->padBefore_ + sliceSizeTmp_;
|
|
41
|
-
|
|
42
|
-
// fp32
|
|
43
|
-
this->cosPad_ = 0;
|
|
44
|
-
this->sinPad_ = this->cosPad_ + sliceSizeTmp_;
|
|
45
|
-
this->negOne_ = this->sinPad_ + sliceSizeTmp_;
|
|
46
|
-
oriPosF32_ = this->negOne_ + sliceSizeTmp_;
|
|
47
|
-
PadBeforeF32_ = oriPosF32_ + sliceSizeTmp_;
|
|
48
|
-
removeBeforeF32_ = PadBeforeF32_ + sliceSizeTmp_;
|
|
49
|
-
posOneF32_ = removeBeforeF32_ + sliceSizeTmp_;
|
|
50
|
-
resOutFp32_ = posOneF32_ + sliceSizeTmp_;
|
|
51
|
-
|
|
52
|
-
this->pipe_.InitBuffer(qkfp32QueueCO2_, 1,
|
|
53
|
-
(this->tilingData_->maxUbSize - this->batchSize_ * NUM_TWO -
|
|
54
|
-
this->maxProcessNum_ * sizeof(QK_DTYPE))); // 留給fp32的
|
|
55
|
-
AscendC::LocalTensor<COS_DTYPE> qkfp32_perloop_ub = qkfp32QueueCO2_.AllocTensor<COS_DTYPE>();
|
|
56
|
-
qkfp32Ubuf_ = (__ubuf__ COS_DTYPE *)qkfp32_perloop_ub.GetPhyAddr();
|
|
57
|
-
this->pipe_.InitBuffer(outQueueCO2_, 1, ((this->maxProcessNum_) * sizeof(QK_DTYPE)));
|
|
58
|
-
AscendC::LocalTensor<QK_DTYPE> cache_perloop_ub1 = outQueueCO2_.AllocTensor<QK_DTYPE>();
|
|
59
|
-
commonUbuf_ = (__ubuf__ QK_DTYPE *)cache_perloop_ub1.GetPhyAddr();
|
|
60
|
-
|
|
61
|
-
// 判断
|
|
62
|
-
if (this->tilingData_->hiddenSizeQ > sliceSizeTmp_) {
|
|
63
|
-
sliceTimeQ_ = (this->tilingData_->hiddenSizeQ + sliceSizeTmp_ - 1) / sliceSizeTmp_; // 向上取整
|
|
64
|
-
lastSliceSizeQ_ = this->tilingData_->hiddenSizeQ - (sliceTimeQ_ - 1) * sliceSizeTmp_; // 向上取整
|
|
65
|
-
} else {
|
|
66
|
-
sliceTimeQ_ = 1;
|
|
67
|
-
lastSliceSizeQ_ = this->tilingData_->hiddenSizeQ;
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
if (this->tilingData_->hiddenSizeK > sliceSizeTmp_) {
|
|
71
|
-
sliceTimeK_ = (this->tilingData_->hiddenSizeK + sliceSizeTmp_ - 1) / sliceSizeTmp_; // 向上取整
|
|
72
|
-
lastSliceSizeK_ = this->tilingData_->hiddenSizeK - (sliceTimeK_ - 1) * sliceSizeTmp_;
|
|
73
|
-
} else {
|
|
74
|
-
sliceTimeK_ = 1;
|
|
75
|
-
lastSliceSizeK_ = this->tilingData_->hiddenSizeK;
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
template <typename T>
|
|
80
|
-
__aicore__ inline void CastQKDType2F32(uint32_t repeatTimes) {
|
|
81
|
-
vconv_f162f32(qkfp32Ubuf_ + oriPosF32_, commonUbuf_ + this->oriPos_, repeatTimes, 1, 1, DEFAULT_REPEAT_STRIDE,
|
|
82
|
-
DEFAULT_REPEAT_STRIDE / NUM_TWO);
|
|
83
|
-
vconv_f162f32(qkfp32Ubuf_ + removeBeforeF32_, commonUbuf_ + this->removeBefore_, repeatTimes, 1, 1,
|
|
84
|
-
DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE / NUM_TWO);
|
|
85
|
-
vconv_f162f32(qkfp32Ubuf_ + PadBeforeF32_, commonUbuf_ + this->padBefore_, repeatTimes, 1, 1, DEFAULT_REPEAT_STRIDE,
|
|
86
|
-
DEFAULT_REPEAT_STRIDE / NUM_TWO);
|
|
87
|
-
}
|
|
88
|
-
|
|
89
|
-
template <>
|
|
90
|
-
__aicore__ inline void CastQKDType2F32<bfloat16_t>(uint32_t repeatTimes) {
|
|
91
|
-
vconv_bf162f32(qkfp32Ubuf_ + oriPosF32_, commonUbuf_ + this->oriPos_, repeatTimes, 1, 1, DEFAULT_REPEAT_STRIDE,
|
|
92
|
-
DEFAULT_REPEAT_STRIDE / NUM_TWO);
|
|
93
|
-
vconv_bf162f32(qkfp32Ubuf_ + removeBeforeF32_, commonUbuf_ + this->removeBefore_, repeatTimes, 1, 1,
|
|
94
|
-
DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE / NUM_TWO);
|
|
95
|
-
vconv_bf162f32(qkfp32Ubuf_ + PadBeforeF32_, commonUbuf_ + this->padBefore_, repeatTimes, 1, 1,
|
|
96
|
-
DEFAULT_REPEAT_STRIDE, DEFAULT_REPEAT_STRIDE / NUM_TWO);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
template <typename T>
|
|
100
|
-
__aicore__ inline void CastF322QKDType(__gm__ QK_DTYPE *dst, __ubuf__ QK_DTYPE *src1, __ubuf__ float *src,
|
|
101
|
-
uint32_t repeatTimes, uint32_t hiddenSize1) {
|
|
102
|
-
vconv_f322f16(src1, src, repeatTimes, 1, 1, DEFAULT_REPEAT_STRIDE / NUM_TWO, DEFAULT_REPEAT_STRIDE);
|
|
103
|
-
set_flag(PIPE_V, PIPE_MTE3, EVENT_ID1);
|
|
104
|
-
|
|
105
|
-
wait_flag(PIPE_V, PIPE_MTE3, EVENT_ID1);
|
|
106
|
-
copy_ubuf_to_gm(dst, src1, 0, 1, hiddenSize1 / ELE_NUM_FP16, 0, 0);
|
|
107
|
-
}
|
|
108
|
-
|
|
109
|
-
template <>
|
|
110
|
-
__aicore__ inline void CastF322QKDType<bfloat16_t>(__gm__ QK_DTYPE *dst, __ubuf__ QK_DTYPE *src1, __ubuf__ float *src,
|
|
111
|
-
uint32_t repeatTimes, uint32_t hiddenSize1) {
|
|
112
|
-
vconv_f322bf16r(src1, src, repeatTimes, 1, 1, DEFAULT_REPEAT_STRIDE / NUM_TWO, DEFAULT_REPEAT_STRIDE);
|
|
113
|
-
set_flag(PIPE_V, PIPE_MTE3, EVENT_ID1);
|
|
114
|
-
|
|
115
|
-
wait_flag(PIPE_V, PIPE_MTE3, EVENT_ID1);
|
|
116
|
-
copy_ubuf_to_gm(dst, src1, 0, 1, hiddenSize1 / ELE_NUM_FP16, 0, 0);
|
|
117
|
-
}
|
|
118
|
-
__aicore__ inline void Process(__gm__ uint8_t *extraGm) {
|
|
119
|
-
if (this->tilingData_->cosFormat == 1) {
|
|
120
|
-
pipe_barrier((PIPE_ALL));
|
|
121
|
-
this->ExpandCosSin(qkfp32Ubuf_, this->cosGm_, (__gm__ COS_DTYPE *)extraGm);
|
|
122
|
-
this->cosGm_ = (__gm__ COS_DTYPE *)extraGm;
|
|
123
|
-
pipe_barrier((PIPE_ALL));
|
|
124
|
-
this->ExpandCosSin(qkfp32Ubuf_, this->sinGm_,
|
|
125
|
-
(__gm__ COS_DTYPE *)extraGm + this->tilingData_->ntokens * this->tilingData_->headDim);
|
|
126
|
-
this->sinGm_ = (__gm__ COS_DTYPE *)extraGm + this->tilingData_->ntokens * this->tilingData_->headDim;
|
|
127
|
-
extraGm =
|
|
128
|
-
extraGm + this->tilingData_->ntokens * this->tilingData_->headDim * 4; // sizeof(uint8_t) * 2 = sizeof(half)
|
|
129
|
-
pipe_barrier((PIPE_ALL));
|
|
130
|
-
}
|
|
131
|
-
uint32_t headNumTempQ = this->tilingData_->hiddenSizeQ > sliceSizeTmp_
|
|
132
|
-
? (sliceSizeTmp_ / this->tilingData_->headDim)
|
|
133
|
-
: this->tilingData_->headNumQ;
|
|
134
|
-
uint32_t dynamicSliceQ =
|
|
135
|
-
this->tilingData_->hiddenSizeQ > sliceSizeTmp_ ? sliceSizeTmp_ : this->tilingData_->hiddenSizeQ;
|
|
136
|
-
uint32_t headNumTempK = this->tilingData_->hiddenSizeK > sliceSizeTmp_
|
|
137
|
-
? (sliceSizeTmp_ / this->tilingData_->headDim)
|
|
138
|
-
: this->tilingData_->headNumK;
|
|
139
|
-
uint32_t dynamicSliceK =
|
|
140
|
-
this->tilingData_->hiddenSizeK > sliceSizeTmp_ ? sliceSizeTmp_ : this->tilingData_->hiddenSizeK;
|
|
141
|
-
uint32_t repeatTemp = (dynamicSliceQ + this->repeatSize_ - 1) / this->repeatSize_;
|
|
142
|
-
this->ExpandNeg(qkfp32Ubuf_, posOneF32_, headNumTempQ, repeatTemp);
|
|
143
|
-
for (uint32_t zz = 0; zz < this->dynamicRound_; ++zz) { // 每个核 核内只需拷贝一次cos sin(每个头cos sin都一样)
|
|
144
|
-
this->CosSinBroadcast(extraGm, zz, qkfp32Ubuf_,
|
|
145
|
-
dynamicSliceQ); // 一次放得下:hiddensize, 一次放不下:sliceSizeTmp
|
|
146
|
-
for (uint32_t perSlice = 0; perSlice < sliceTimeQ_; ++perSlice) { // 核内每块
|
|
147
|
-
uint32_t dynamicSliceQTemp = (perSlice == sliceTimeQ_ - 1) ? lastSliceSizeQ_ : sliceSizeTmp_;
|
|
148
|
-
headNumTempQ = dynamicSliceQTemp / this->tilingData_->headDim;
|
|
149
|
-
uint32_t repeatTimeOnce = (dynamicSliceQTemp + this->repeatSize_ - 1) / this->repeatSize_;
|
|
150
|
-
this->QkComm(this->qGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeQ +
|
|
151
|
-
zz * this->tilingData_->hiddenSizeQ + perSlice * sliceSizeTmp_,
|
|
152
|
-
extraGm, dynamicSliceQTemp, commonUbuf_, headNumTempQ);
|
|
153
|
-
if (this->alignRotary_ == 0) {
|
|
154
|
-
pipe_barrier((PIPE_V));
|
|
155
|
-
CastQKDType2F32<QK_DTYPE>(repeatTimeOnce);
|
|
156
|
-
|
|
157
|
-
pipe_barrier((PIPE_V));
|
|
158
|
-
this->CalcRopeAlign(qkfp32Ubuf_, repeatTimeOnce, oriPosF32_, removeBeforeF32_, resOutFp32_);
|
|
159
|
-
} else {
|
|
160
|
-
set_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
|
|
161
|
-
wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
|
|
162
|
-
|
|
163
|
-
CastQKDType2F32<QK_DTYPE>(repeatTimeOnce);
|
|
164
|
-
pipe_barrier((PIPE_V));
|
|
165
|
-
this->CalcRope(qkfp32Ubuf_, repeatTimeOnce, oriPosF32_, removeBeforeF32_, PadBeforeF32_, posOneF32_,
|
|
166
|
-
resOutFp32_);
|
|
167
|
-
|
|
168
|
-
} // 帶PIPE_V
|
|
169
|
-
CastF322QKDType<QK_DTYPE>(this->outQGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeQ +
|
|
170
|
-
zz * this->tilingData_->hiddenSizeQ + perSlice * sliceSizeTmp_,
|
|
171
|
-
commonUbuf_ + resOut_, qkfp32Ubuf_ + resOutFp32_, repeatTimeOnce, dynamicSliceQTemp);
|
|
172
|
-
pipe_barrier(PIPE_ALL);
|
|
173
|
-
}
|
|
174
|
-
for (uint32_t perSlice = 0; perSlice < sliceTimeK_; ++perSlice) { // 核内每块
|
|
175
|
-
uint32_t dynamicSliceKTemp = (perSlice == sliceTimeK_ - 1) ? lastSliceSizeK_ : sliceSizeTmp_;
|
|
176
|
-
headNumTempK = dynamicSliceKTemp / this->tilingData_->headDim;
|
|
177
|
-
uint32_t repeatTimeOnce = (dynamicSliceKTemp + this->repeatSize_ - 1) / this->repeatSize_;
|
|
178
|
-
this->QkComm(this->kGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeK +
|
|
179
|
-
zz * this->tilingData_->hiddenSizeK + perSlice * sliceSizeTmp_,
|
|
180
|
-
extraGm, dynamicSliceKTemp, commonUbuf_, headNumTempK);
|
|
181
|
-
if (this->alignRotary_ == 0) {
|
|
182
|
-
pipe_barrier((PIPE_V));
|
|
183
|
-
CastQKDType2F32<QK_DTYPE>(repeatTimeOnce);
|
|
184
|
-
|
|
185
|
-
pipe_barrier((PIPE_V));
|
|
186
|
-
this->CalcRopeAlign(qkfp32Ubuf_, repeatTimeOnce, oriPosF32_, removeBeforeF32_, resOutFp32_);
|
|
187
|
-
} else {
|
|
188
|
-
set_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
|
|
189
|
-
wait_flag(PIPE_MTE2, PIPE_V, EVENT_ID1);
|
|
190
|
-
CastQKDType2F32<QK_DTYPE>(repeatTimeOnce);
|
|
191
|
-
|
|
192
|
-
pipe_barrier((PIPE_V));
|
|
193
|
-
this->CalcRope(qkfp32Ubuf_, repeatTimeOnce, oriPosF32_, removeBeforeF32_, PadBeforeF32_, posOneF32_,
|
|
194
|
-
resOutFp32_);
|
|
195
|
-
} // 帶PIPE_V
|
|
196
|
-
|
|
197
|
-
CastF322QKDType<QK_DTYPE>(this->outKGm_ + block_idx * this->nlCoreRun_ * this->tilingData_->hiddenSizeK +
|
|
198
|
-
zz * this->tilingData_->hiddenSizeK + perSlice * sliceSizeTmp_,
|
|
199
|
-
commonUbuf_ + resOut_, qkfp32Ubuf_ + resOutFp32_, repeatTimeOnce, dynamicSliceKTemp);
|
|
200
|
-
pipe_barrier(PIPE_ALL);
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
private:
|
|
206
|
-
AscendC::TQue<AscendC::QuePosition::VECIN, 1> qkfp32QueueCO2_;
|
|
207
|
-
AscendC::TQue<AscendC::QuePosition::VECIN, 1> outQueueCO2_;
|
|
208
|
-
__ubuf__ QK_DTYPE *commonUbuf_{nullptr};
|
|
209
|
-
__ubuf__ COS_DTYPE *qkfp32Ubuf_{nullptr};
|
|
210
|
-
uint32_t oriPosF32_{0}; // fp32的buf中qk的位置
|
|
211
|
-
uint32_t PadBeforeF32_{0}; // fp32的buf中保存qk[-x : hiddensize - x]
|
|
212
|
-
uint32_t removeBeforeF32_{0}; // fp32的buf中保存qk[x : hiddensize + x]
|
|
213
|
-
uint32_t posOneF32_{0}; // fp32的buf中0 0 0 1 1 1的位置
|
|
214
|
-
uint32_t headDimAlign_; // 对齐的headDim
|
|
215
|
-
uint32_t sliceTimeQ_; // 切分块的次数
|
|
216
|
-
uint32_t lastSliceSizeQ_; // 最后一块的大小
|
|
217
|
-
uint32_t sliceTimeK_;
|
|
218
|
-
uint32_t lastSliceSizeK_;
|
|
219
|
-
uint32_t sliceSizeTmp_;
|
|
220
|
-
uint32_t resOut_;
|
|
221
|
-
uint32_t resOutFp32_;
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
#endif
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_AND_IMPL_H_
|
|
17
|
-
#define MS_KERNELS_INTERNAL_KERNEL_AND_IMPL_H_
|
|
18
|
-
#include "elewise_binary_impl.h"
|
|
19
|
-
namespace mindspore {
|
|
20
|
-
namespace internal {
|
|
21
|
-
class AndImpl : public ElewiseBinaryImpl {
|
|
22
|
-
public:
|
|
23
|
-
AndImpl(const OpParamPtr ¶m) : ElewiseBinaryImpl(param) {}
|
|
24
|
-
virtual ~AndImpl() {}
|
|
25
|
-
int Launch() override;
|
|
26
|
-
};
|
|
27
|
-
} // namespace internal
|
|
28
|
-
} // namespace mindspore
|
|
29
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_AND_IMPL_H_
|
mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_DIV_IMPL_H_
|
|
17
|
-
#define MS_KERNELS_INTERNAL_KERNEL_DIV_IMPL_H_
|
|
18
|
-
#include "elewise_binary_impl.h"
|
|
19
|
-
namespace mindspore {
|
|
20
|
-
namespace internal {
|
|
21
|
-
class DivImpl : public ElewiseBinaryImpl {
|
|
22
|
-
public:
|
|
23
|
-
DivImpl(const OpParamPtr ¶m) : ElewiseBinaryImpl(param) {}
|
|
24
|
-
virtual ~DivImpl() {}
|
|
25
|
-
int Launch() override;
|
|
26
|
-
};
|
|
27
|
-
} // namespace internal
|
|
28
|
-
} // namespace mindspore
|
|
29
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_DIV_IMPL_H_
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_ELEWISE_BINARY_IMPL_H_
|
|
17
|
-
#define MS_KERNELS_INTERNAL_KERNEL_ELEWISE_BINARY_IMPL_H_
|
|
18
|
-
|
|
19
|
-
#include <vector>
|
|
20
|
-
#include "include/internal_kernel.h"
|
|
21
|
-
#include "include/param/elewise_param.h"
|
|
22
|
-
|
|
23
|
-
namespace mindspore {
|
|
24
|
-
namespace internal {
|
|
25
|
-
class ElewiseBinaryImpl : public InternelKernelImpl {
|
|
26
|
-
public:
|
|
27
|
-
ElewiseBinaryImpl(const OpParamPtr ¶m) : InternelKernelImpl(param) {}
|
|
28
|
-
virtual ~ElewiseBinaryImpl() {}
|
|
29
|
-
bool Init(const ValidateInfo &info) override;
|
|
30
|
-
int Launch() { return -1; };
|
|
31
|
-
int Tiling(HostRawBuf &tilingBuf) override;
|
|
32
|
-
void SetStream(const void *stream_ptr) override;
|
|
33
|
-
void SetDeviceTilingBuf(const DeviceRawBuf &tilingBuf) override;
|
|
34
|
-
uint64_t GetTilingBufSize() override;
|
|
35
|
-
std::vector<uint64_t> GetWorkSpaceSize() override;
|
|
36
|
-
int InferShape(const std::vector<DIMS> &input_shapes, std::vector<DIMS> &output_shapes) override;
|
|
37
|
-
virtual int32_t GetMaxUbCount(uint32_t op_dtype);
|
|
38
|
-
bool IsSupported() override;
|
|
39
|
-
|
|
40
|
-
protected:
|
|
41
|
-
void *stream_ptr_ = nullptr;
|
|
42
|
-
uint8_t *device_tiling_ = nullptr;
|
|
43
|
-
uint32_t aligned_factor_ = 128;
|
|
44
|
-
uint32_t ub_dtype = 0;
|
|
45
|
-
};
|
|
46
|
-
} // namespace internal
|
|
47
|
-
} // namespace mindspore
|
|
48
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_ELEWISE_BINARY_IMPL_H_
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
#ifndef MS_KERNELS_INTERNAL_ASCENDC_ELEWISE_BINARY_TILING_H_
|
|
17
|
-
#define MS_KERNELS_INTERNAL_ASCENDC_ELEWISE_BINARY_TILING_H_
|
|
18
|
-
#include "utils/elewise_tiling.h"
|
|
19
|
-
namespace mindspore::internal {
|
|
20
|
-
struct ElewiseBinaryTilingData : public ElewiseTailTilingData {
|
|
21
|
-
uint32_t broadcast_mode_{0};
|
|
22
|
-
uint32_t op_dtype_{0};
|
|
23
|
-
};
|
|
24
|
-
} // namespace mindspore::internal
|
|
25
|
-
#endif // MS_KERNELS_INTERNAL_ASCENDC_ELEWISE_BINARY_TILING_H_
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_ELEWISE_BINARY_AND_KERNEL_H_
|
|
18
|
-
#define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_ELEWISE_BINARY_AND_KERNEL_H_
|
|
19
|
-
|
|
20
|
-
#include "elewise_binary_base.h"
|
|
21
|
-
|
|
22
|
-
template <typename T = int16_t>
|
|
23
|
-
class AndI16 : public ElewiseBaseWide<T> {
|
|
24
|
-
public:
|
|
25
|
-
__aicore__ inline AndI16() { ElewiseBaseWide<T>::SetBinaryFunc(AscendC::And); }
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
template <typename T = uint16_t>
|
|
29
|
-
class AndU16 : public ElewiseBaseWide<T> {
|
|
30
|
-
public:
|
|
31
|
-
__aicore__ inline AndU16() { ElewiseBaseWide<T>::SetBinaryFunc(AscendC::And); }
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
extern "C" __global__ __aicore__ void and_device(GM_ADDR x1, GM_ADDR x2, GM_ADDR y, GM_ADDR tiling, int32_t dtype) {
|
|
35
|
-
if (dtype == 6) { // int16
|
|
36
|
-
AndI16<int16_t> op;
|
|
37
|
-
op.InitBinary(x1, x2, y, tiling);
|
|
38
|
-
op.ProcessBinary();
|
|
39
|
-
} else if (dtype == 7) { // uint16
|
|
40
|
-
AndU16<uint16_t> op;
|
|
41
|
-
op.InitBinary(x1, x2, y, tiling);
|
|
42
|
-
op.ProcessBinary();
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_ELEWISE_BINARY_AND_KERNEL_H_
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Copyright 2024 Huawei Technologies Co., Ltd
|
|
3
|
-
*
|
|
4
|
-
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
* you may not use this file except in compliance with the License.
|
|
6
|
-
* You may obtain a copy of the License at
|
|
7
|
-
*
|
|
8
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
*
|
|
10
|
-
* Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
* See the License for the specific language governing permissions and
|
|
14
|
-
* limitations under the License.
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
#ifndef MS_KERNELS_INTERNAL_KERNEL_ASCENDC_ELEWISE_BINARY_DIV_KERNEL_H_
|
|
18
|
-
#define MS_KERNELS_INTERNAL_KERNEL_ASCENDC_ELEWISE_BINARY_DIV_KERNEL_H_
|
|
19
|
-
|
|
20
|
-
#include "elewise_binary_base.h"
|
|
21
|
-
|
|
22
|
-
template <typename T = half>
|
|
23
|
-
class DivFp16 : public ElewiseBaseWide<T> {
|
|
24
|
-
public:
|
|
25
|
-
__aicore__ inline DivFp16() { ElewiseBaseWide<T>::SetBinaryFunc(AscendC::Div); }
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
template <typename T = float>
|
|
29
|
-
class DivFp32 : public ElewiseBaseWide<T> {
|
|
30
|
-
public:
|
|
31
|
-
__aicore__ inline DivFp32() { ElewiseBaseWide<T>::SetBinaryFunc(AscendC::Div); }
|
|
32
|
-
};
|
|
33
|
-
|
|
34
|
-
extern "C" __global__ __aicore__ void div_device(GM_ADDR x1, GM_ADDR x2, GM_ADDR y, GM_ADDR tiling, int32_t dtype) {
|
|
35
|
-
if (dtype == 1) { // fp16
|
|
36
|
-
DivFp16<half> op;
|
|
37
|
-
op.InitBinary(x1, x2, y, tiling);
|
|
38
|
-
op.ProcessBinary();
|
|
39
|
-
} else if (dtype == 0) { // fp32
|
|
40
|
-
DivFp32<float> op;
|
|
41
|
-
op.InitBinary(x1, x2, y, tiling);
|
|
42
|
-
op.ProcessBinary();
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
#endif // MS_KERNELS_INTERNAL_KERNEL_ASCENDC_ELEWISE_BINARY_DIV_KERNEL_H_
|