mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
- mindspore/__init__.py +1 -2
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +25 -5
- mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +0 -29
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +5 -21
- mindspore/_extends/parse/resources.py +7 -5
- mindspore/_extends/parse/standard_method.py +59 -40
- mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +5 -26
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +1 -1
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/less_batch_normalization.py +6 -9
- mindspore/common/__init__.py +1 -8
- mindspore/common/_register_for_tensor.py +9 -8
- mindspore/common/api.py +65 -275
- mindspore/common/dtype.py +4 -8
- mindspore/common/dump.py +5 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/lazy_inline.py +2 -14
- mindspore/common/parameter.py +15 -14
- mindspore/common/recompute.py +5 -20
- mindspore/common/sparse_tensor.py +6 -21
- mindspore/common/tensor.py +52 -100
- mindspore/communication/__init__.py +11 -6
- mindspore/communication/management.py +94 -92
- mindspore/context.py +18 -180
- mindspore/dataset/engine/datasets.py +46 -69
- mindspore/dataset/engine/datasets_user_defined.py +53 -72
- mindspore/dataset/engine/datasets_vision.py +2 -2
- mindspore/dataset/engine/queue.py +38 -56
- mindspore/dataset/engine/validators.py +5 -11
- mindspore/dataset/vision/__init__.py +5 -5
- mindspore/dataset/vision/c_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +46 -591
- mindspore/dataset/vision/utils.py +1 -121
- mindspore/dataset/vision/validators.py +3 -9
- mindspore/hal/__init__.py +1 -7
- mindspore/hal/device.py +1 -1
- mindspore/include/api/model.h +0 -3
- mindspore/include/dataset/vision.h +2 -54
- mindspore/include/mindapi/base/types.h +0 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/filewriter.py +2 -2
- mindspore/mint/__init__.py +40 -720
- mindspore/mint/nn/__init__.py +7 -89
- mindspore/mint/nn/functional.py +16 -165
- mindspore/mint/optim/adamw.py +16 -15
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +98 -97
- mindspore/nn/extend/basic.py +2 -2
- mindspore/nn/extend/embedding.py +1 -1
- mindspore/nn/extend/layer/normalization.py +5 -7
- mindspore/nn/generator.py +297 -0
- mindspore/nn/layer/activation.py +3 -4
- mindspore/nn/layer/basic.py +16 -79
- mindspore/nn/layer/conv.py +8 -17
- mindspore/nn/layer/embedding.py +4 -1
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +1 -1
- mindspore/nn/layer/pooling.py +0 -5
- mindspore/nn/layer/rnn_cells.py +2 -2
- mindspore/nn/loss/loss.py +19 -19
- mindspore/nn/optim/adasum.py +1 -1
- mindspore/nn/optim/sgd.py +2 -3
- mindspore/nn/probability/distribution/exponential.py +1 -1
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/wrap/cell_wrapper.py +1 -25
- mindspore/nn/wrap/loss_scale.py +1 -24
- mindspore/numpy/array_ops.py +1 -5
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/math_ops.py +8 -8
- mindspore/ops/__init__.py +1 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
- mindspore/ops/_vmap/vmap_array_ops.py +0 -27
- mindspore/ops/_vmap/vmap_math_ops.py +1 -29
- mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
- mindspore/ops/auto_generate/gen_extend_func.py +27 -603
- mindspore/ops/auto_generate/gen_ops_def.py +203 -993
- mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
- mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
- mindspore/ops/composite/base.py +6 -3
- mindspore/ops/composite/math_ops.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/extend/__init__.py +3 -2
- mindspore/ops/extend/array_func.py +51 -10
- mindspore/ops/extend/nn_func.py +78 -2
- mindspore/ops/function/__init__.py +13 -8
- mindspore/ops/function/array_func.py +179 -455
- mindspore/ops/function/clip_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +3 -3
- mindspore/ops/function/math_func.py +103 -117
- mindspore/ops/function/nn_func.py +163 -275
- mindspore/ops/function/other_func.py +2 -2
- mindspore/ops/function/random_func.py +69 -202
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/functional.py +327 -332
- mindspore/ops/operations/__init__.py +3 -13
- mindspore/ops/operations/_grad_ops.py +27 -3
- mindspore/ops/operations/_inner_ops.py +356 -53
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +65 -82
- mindspore/ops/operations/comm_ops.py +93 -784
- mindspore/ops/operations/custom_ops.py +28 -51
- mindspore/ops/operations/debug_ops.py +4 -4
- mindspore/ops/operations/inner_ops.py +2 -2
- mindspore/ops/operations/manually_defined/ops_def.py +4 -304
- mindspore/ops/operations/math_ops.py +50 -3
- mindspore/ops/operations/nn_ops.py +247 -14
- mindspore/ops/operations/other_ops.py +3 -3
- mindspore/ops/operations/random_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +1 -1
- mindspore/ops/primitive.py +8 -9
- mindspore/ops/silent_check.py +5 -5
- mindspore/ops_generate/arg_dtype_cast.py +9 -2
- mindspore/ops_generate/arg_handler.py +0 -26
- mindspore/ops_generate/gen_aclnn_implement.py +4 -1
- mindspore/ops_generate/gen_ops.py +4 -26
- mindspore/ops_generate/gen_pyboost_func.py +12 -41
- mindspore/ops_generate/gen_utils.py +0 -21
- mindspore/ops_generate/pyboost_utils.py +2 -7
- mindspore/ops_generate/template.py +0 -1
- mindspore/parallel/_auto_parallel_context.py +1 -21
- mindspore/parallel/_tensor.py +5 -0
- mindspore/parallel/_transformer/transformer.py +1 -1
- mindspore/parallel/_utils.py +1 -15
- mindspore/parallel/algo_parameter_config.py +3 -1
- mindspore/parallel/checkpoint_transform.py +9 -12
- mindspore/parallel/cluster/process_entity/_api.py +29 -28
- mindspore/parallel/cluster/process_entity/_utils.py +3 -13
- mindspore/parallel/cluster/run.py +16 -13
- mindspore/parallel/parameter_broadcast.py +2 -2
- mindspore/parallel/shard.py +17 -31
- mindspore/profiler/__init__.py +2 -3
- mindspore/profiler/common/util.py +2 -107
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
- mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
- mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
- mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
- mindspore/profiler/parser/minddata_parser.py +3 -72
- mindspore/profiler/profiling.py +59 -176
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/common/namespace.py +5 -5
- mindspore/rewrite/parsers/assign_parser.py +0 -2
- mindspore/rewrite/parsers/class_def_parser.py +4 -8
- mindspore/run_check/_check_version.py +1 -1
- mindspore/scipy/fft.py +3 -1
- mindspore/scipy/linalg.py +3 -2
- mindspore/scipy/ops.py +3 -5
- mindspore/scipy/optimize/__init__.py +2 -2
- mindspore/train/__init__.py +4 -4
- mindspore/train/anf_ir_pb2.py +2 -8
- mindspore/train/callback/__init__.py +2 -5
- mindspore/train/callback/_backup_and_restore.py +2 -2
- mindspore/train/callback/_checkpoint.py +16 -104
- mindspore/train/callback/_landscape.py +1 -1
- mindspore/train/callback/_time_monitor.py +1 -1
- mindspore/train/data_sink.py +4 -5
- mindspore/train/dataset_helper.py +20 -45
- mindspore/train/model.py +38 -266
- mindspore/train/serialization.py +105 -256
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
- mindspore/_extends/pijit/__init__.py +0 -23
- mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
- mindspore/common/file_system.py +0 -48
- mindspore/common/generator.py +0 -260
- mindspore/common/no_inline.py +0 -54
- mindspore/common/np_dtype.py +0 -25
- mindspore/communication/comm_func.py +0 -1140
- mindspore/hal/memory.py +0 -326
- mindspore/lib/libavcodec.so.59 +0 -0
- mindspore/lib/libavdevice.so.59 +0 -0
- mindspore/lib/libavfilter.so.8 +0 -0
- mindspore/lib/libavformat.so.59 +0 -0
- mindspore/lib/libavutil.so.57 +0 -0
- mindspore/lib/libmindspore_np_dtype.so +0 -0
- mindspore/lib/libswresample.so.4 +0 -0
- mindspore/lib/libswscale.so.6 +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
- mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
- mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
- mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
- mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/mint/linalg/__init__.py +0 -22
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/nn/layer/embedding_service_layer.py +0 -393
- mindspore/ops/function/reshard_func.py +0 -102
- mindspore/ops/operations/_infer_ops.py +0 -19
- mindspore/ops/operations/reshard_ops.py +0 -53
- mindspore/profiler/common/process_pool.py +0 -41
- mindspore/profiler/common/singleton.py +0 -28
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/train/callback/_cluster_monitor.py +0 -201
- mindspore/train/callback/_flops_collector.py +0 -238
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""PROF csv data analyze module"""
|
|
16
|
-
import os
|
|
17
|
-
|
|
18
|
-
from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class AscendIntegrateGenerator:
|
|
22
|
-
"""Generate ms profiler output csv"""
|
|
23
|
-
|
|
24
|
-
def __init__(self, mindstudio_profiler_output: str, ascend_ms_output_path: str):
|
|
25
|
-
self._mindstudio_profiler_output = mindstudio_profiler_output
|
|
26
|
-
self._ascend_ms_output_path = ascend_ms_output_path
|
|
27
|
-
|
|
28
|
-
def parse(self):
|
|
29
|
-
"""Generate ms profiler output csv"""
|
|
30
|
-
self._generate_kernel_details()
|
|
31
|
-
|
|
32
|
-
def _generate_kernel_details(self):
|
|
33
|
-
"""Generate kernel_details.csv"""
|
|
34
|
-
header_map = {
|
|
35
|
-
"Op Name": "Name", "OP Type": "Type", "Task Type": "Accelerator Core",
|
|
36
|
-
"Task Start Time(us)": "Start Time(us)", "Task Duration(us)": "Duration(us)",
|
|
37
|
-
"Task Wait Time(us)": "Wait Time(us)",
|
|
38
|
-
}
|
|
39
|
-
op_summary_file_list = FileManager.get_csv_file_list_by_start_name(self._mindstudio_profiler_output,
|
|
40
|
-
"op_summary")
|
|
41
|
-
kernel_details_file = os.path.join(self._ascend_ms_output_path, "kernel_details.csv")
|
|
42
|
-
FileManager.combine_csv_file(op_summary_file_list, kernel_details_file, header_map)
|
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""memory data analyze module"""
|
|
16
|
-
import os
|
|
17
|
-
|
|
18
|
-
from decimal import Decimal
|
|
19
|
-
|
|
20
|
-
from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class AscendMemoryGenerator:
|
|
24
|
-
"""Parsing the memory data of the ascend device"""
|
|
25
|
-
GE_MEMORY_RECORD_HEADERS = ["Device id", "Component", "Timestamp(us)", "Total Allocated(KB)",
|
|
26
|
-
"Total Reserved(KB)", "Device"]
|
|
27
|
-
MS_MEMORY_RECORD_HEADERS = ["Timestamp(ns)", "Total Allocated(Byte)", "Total Reserved(Byte)", "Total Active(Byte)"]
|
|
28
|
-
TARGET_MEMORY_RECORD_HEADERS = ["Component", "Timestamp(us)", "Total Allocated(MB)", "Total Reserved(MB)",
|
|
29
|
-
"Total Active(MB)", "Device Type"]
|
|
30
|
-
|
|
31
|
-
def __init__(self, source_path: str, rank_id: int, mindstudio_profiler_output: str, ascend_ms_output_path: str):
|
|
32
|
-
self._source_path = source_path
|
|
33
|
-
self._rank_id = rank_id
|
|
34
|
-
self._mindstudio_profiler_output = mindstudio_profiler_output
|
|
35
|
-
self._ascend_ms_output_path = ascend_ms_output_path
|
|
36
|
-
self._ge_memory_record = []
|
|
37
|
-
self._ms_memory_record = []
|
|
38
|
-
|
|
39
|
-
def parse(self):
|
|
40
|
-
"""Parse all memory data"""
|
|
41
|
-
self.copy_npu_module_mem_csv()
|
|
42
|
-
self.parse_memory_record()
|
|
43
|
-
|
|
44
|
-
def copy_npu_module_mem_csv(self):
|
|
45
|
-
"""Generate npu_module_mem.csv"""
|
|
46
|
-
npu_module_mem_file_list = FileManager.get_csv_file_list_by_start_name(self._mindstudio_profiler_output,
|
|
47
|
-
"npu_module_mem")
|
|
48
|
-
target_file_path = os.path.join(self._ascend_ms_output_path, "npu_module_mem.csv")
|
|
49
|
-
FileManager.combine_csv_file(npu_module_mem_file_list, target_file_path)
|
|
50
|
-
|
|
51
|
-
def parse_memory_record(self):
|
|
52
|
-
"""Generate memory_record.csv"""
|
|
53
|
-
self._parse_ge_memory_record()
|
|
54
|
-
self._parse_ms_memory_record()
|
|
55
|
-
combined_memory_data = self._combine_ge_ms_memory_record()
|
|
56
|
-
target_file_path = os.path.join(self._ascend_ms_output_path, "memory_record.csv")
|
|
57
|
-
FileManager.create_csv_file(target_file_path, combined_memory_data, self.TARGET_MEMORY_RECORD_HEADERS)
|
|
58
|
-
|
|
59
|
-
def _parse_ge_memory_record(self):
|
|
60
|
-
"""Parse ge memory record data"""
|
|
61
|
-
memory_record_file_list = FileManager.get_csv_file_list_by_start_name(self._mindstudio_profiler_output,
|
|
62
|
-
"memory_record")
|
|
63
|
-
for file in memory_record_file_list:
|
|
64
|
-
data = FileManager.read_csv_file(file)
|
|
65
|
-
if len(data) > 1:
|
|
66
|
-
self._ge_memory_record.extend(data[1:])
|
|
67
|
-
|
|
68
|
-
def _parse_ms_memory_record(self):
|
|
69
|
-
"""Parse mindspore memory record data"""
|
|
70
|
-
memory_record_file = os.path.join(self._source_path, f"cpu_ms_memory_record_{self._rank_id}.txt")
|
|
71
|
-
data = FileManager.read_txt_file(memory_record_file)
|
|
72
|
-
if len(data) > 1:
|
|
73
|
-
self._ms_memory_record.extend(data[1:])
|
|
74
|
-
|
|
75
|
-
def _get_app_reserved_memory(self) -> list:
|
|
76
|
-
"""Get the reserved memory of the application from npu_mem.csv"""
|
|
77
|
-
npu_module_mem_file_list = FileManager.get_csv_file_list_by_start_name(self._mindstudio_profiler_output,
|
|
78
|
-
"npu_mem")
|
|
79
|
-
app_mems = []
|
|
80
|
-
for file in npu_module_mem_file_list:
|
|
81
|
-
md_mems = FileManager.read_csv_file(file)
|
|
82
|
-
for mem in md_mems:
|
|
83
|
-
if mem[1] == "APP":
|
|
84
|
-
app_mems.append(MemoryRecordBean([mem[1], mem[-1].rstrip('\t'), 0.0, float(mem[4]), 0.0,
|
|
85
|
-
f"NPU:{self._rank_id}"]).row)
|
|
86
|
-
|
|
87
|
-
return app_mems
|
|
88
|
-
|
|
89
|
-
def _combine_ge_ms_memory_record(self) -> list:
|
|
90
|
-
"""Combine ge and mindspore memory record data"""
|
|
91
|
-
memory_records = []
|
|
92
|
-
for ge_memory in self._ge_memory_record:
|
|
93
|
-
memory_record = dict(zip(self.GE_MEMORY_RECORD_HEADERS, ge_memory))
|
|
94
|
-
memory_records.append(
|
|
95
|
-
MemoryRecordBean([memory_record.get("Component", "GE"), memory_record.get("Timestamp(us)"),
|
|
96
|
-
memory_record.get("Total Allocated(KB)", 0),
|
|
97
|
-
memory_record.get("Total Reserved(KB)", 0),
|
|
98
|
-
0, memory_record.get("Device")]))
|
|
99
|
-
for ms_memory in self._ms_memory_record:
|
|
100
|
-
memory_record = dict(zip(self.MS_MEMORY_RECORD_HEADERS, ms_memory))
|
|
101
|
-
memory_records.append(
|
|
102
|
-
MemoryRecordBean(["MindSpore", Decimal(memory_record.get("Timestamp(ns)", 0)) / 1000,
|
|
103
|
-
float(memory_record.get("Total Allocated(Byte)", 0)) / 1024,
|
|
104
|
-
float(memory_record.get("Total Reserved(Byte)", 0)) / 1024,
|
|
105
|
-
float(memory_record.get("Total Active(Byte)", 0)) / 1024,
|
|
106
|
-
f"NPU:{self._rank_id}"]))
|
|
107
|
-
memory_records.sort(key=lambda x: x.time_us)
|
|
108
|
-
last_ge_memory, last_ms_memory = MemoryRecordBean([0] * 6), MemoryRecordBean([0] * 6)
|
|
109
|
-
result_data = []
|
|
110
|
-
for memory_record in memory_records:
|
|
111
|
-
result_data.append(memory_record.row)
|
|
112
|
-
last_memory = last_ms_memory if memory_record.is_ge_component() else last_ge_memory
|
|
113
|
-
combined_mem = MemoryRecordBean(["MindSpore+GE", memory_record.time_us,
|
|
114
|
-
memory_record.total_allocated_kb + last_memory.total_allocated_kb,
|
|
115
|
-
memory_record.total_reserved_kb + last_memory.total_reserved_kb,
|
|
116
|
-
memory_record.total_active_kb + last_memory.total_active_kb,
|
|
117
|
-
f"NPU:{self._rank_id}"])
|
|
118
|
-
result_data.append(combined_mem.row)
|
|
119
|
-
if memory_record.is_ge_component():
|
|
120
|
-
last_ge_memory = memory_record
|
|
121
|
-
else:
|
|
122
|
-
last_ms_memory = memory_record
|
|
123
|
-
return result_data + self._get_app_reserved_memory()
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
class MemoryRecordBean:
|
|
127
|
-
"""Memory Record Struct"""
|
|
128
|
-
KB_TO_MB = 1000
|
|
129
|
-
HEADERS = ["Component", "Timestamp(us)", "Total Allocated(KB)", "Total Reserved(KB)",
|
|
130
|
-
"Total Active(KB)", "Device Type"]
|
|
131
|
-
|
|
132
|
-
def __init__(self, data: list):
|
|
133
|
-
self._data = dict(zip(self.HEADERS, data))
|
|
134
|
-
|
|
135
|
-
@property
|
|
136
|
-
def row(self) -> list:
|
|
137
|
-
return [self.component, self.time_us_str, self.total_allocated_mb, self.total_reserved_mb,
|
|
138
|
-
self.total_active_mb, self.device_type]
|
|
139
|
-
|
|
140
|
-
@property
|
|
141
|
-
def component(self) -> str:
|
|
142
|
-
return self._data.get("Component", "")
|
|
143
|
-
|
|
144
|
-
@property
|
|
145
|
-
def time_us_str(self) -> str:
|
|
146
|
-
ts_us = self._data.get("Timestamp(us)", 0)
|
|
147
|
-
return str(ts_us)
|
|
148
|
-
|
|
149
|
-
@property
|
|
150
|
-
def total_allocated_mb(self) -> float:
|
|
151
|
-
return float(self._data.get("Total Allocated(KB)", 0)) / self.KB_TO_MB
|
|
152
|
-
|
|
153
|
-
@property
|
|
154
|
-
def total_reserved_mb(self) -> float:
|
|
155
|
-
return float(self._data.get("Total Reserved(KB)", 0)) / self.KB_TO_MB
|
|
156
|
-
|
|
157
|
-
@property
|
|
158
|
-
def total_active_mb(self) -> float:
|
|
159
|
-
return float(self._data.get("Total Active(KB)", 0)) / self.KB_TO_MB
|
|
160
|
-
|
|
161
|
-
@property
|
|
162
|
-
def device_type(self) -> float:
|
|
163
|
-
return self._data.get("Device Type", "")
|
|
164
|
-
|
|
165
|
-
@property
|
|
166
|
-
def total_allocated_kb(self) -> float:
|
|
167
|
-
return float(self._data.get("Total Allocated(KB)", 0))
|
|
168
|
-
|
|
169
|
-
@property
|
|
170
|
-
def total_reserved_kb(self) -> float:
|
|
171
|
-
return float(self._data.get("Total Reserved(KB)", 0))
|
|
172
|
-
|
|
173
|
-
@property
|
|
174
|
-
def total_active_kb(self) -> float:
|
|
175
|
-
return float(self._data.get("Total Active(KB)", 0))
|
|
176
|
-
|
|
177
|
-
@property
|
|
178
|
-
def time_us(self) -> Decimal:
|
|
179
|
-
return Decimal(self._data.get("Timestamp(us)", 0))
|
|
180
|
-
|
|
181
|
-
def is_ge_component(self):
|
|
182
|
-
"""
|
|
183
|
-
Determine if it is GE
|
|
184
|
-
"""
|
|
185
|
-
return self.component == "GE"
|
|
@@ -1,201 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""ClusterMonitor Callback class."""
|
|
16
|
-
from __future__ import absolute_import
|
|
17
|
-
|
|
18
|
-
import os
|
|
19
|
-
import stat
|
|
20
|
-
import glob
|
|
21
|
-
import time
|
|
22
|
-
from threading import RLock
|
|
23
|
-
|
|
24
|
-
from mindspore.train.callback._callback import Callback
|
|
25
|
-
from mindspore.communication.management import get_rank, get_local_rank
|
|
26
|
-
from mindspore import log as logger
|
|
27
|
-
from mindspore.parallel._auto_parallel_context import _get_auto_parallel_context
|
|
28
|
-
from mindspore.parallel._utils import _get_device_num
|
|
29
|
-
from mindspore.train._utils import get_parameter_redundancy
|
|
30
|
-
|
|
31
|
-
_perf_mutex = RLock()
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def _get_dp_tp_from_redundancy(redundancy_tuple):
|
|
35
|
-
"""From redundancy get dp and tp"""
|
|
36
|
-
dp = []
|
|
37
|
-
tp = []
|
|
38
|
-
for dp_value in redundancy_tuple:
|
|
39
|
-
dp.append(list(dp_value))
|
|
40
|
-
for i in range(len(redundancy_tuple[0])):
|
|
41
|
-
tp.append([v[i] for v in redundancy_tuple])
|
|
42
|
-
return dp, tp
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _get_dp_tp_from_layout(parameter_layout_dict, initial_rank=0):
|
|
46
|
-
"""From layout dict get dp and tp"""
|
|
47
|
-
tp = []
|
|
48
|
-
dp = []
|
|
49
|
-
parameter_redundancy_dict = get_parameter_redundancy(parameter_layout_dict, initial_rank)
|
|
50
|
-
value_len = 0
|
|
51
|
-
for _, value in parameter_redundancy_dict.items():
|
|
52
|
-
if len(value) > value_len:
|
|
53
|
-
value_len = len(value)
|
|
54
|
-
dp, tp = _get_dp_tp_from_redundancy(value)
|
|
55
|
-
return dp, tp
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def _check_perf_config(perf_config):
|
|
59
|
-
"""Check if the format of perf_config is correct."""
|
|
60
|
-
enabled = perf_config.get("enable", None)
|
|
61
|
-
if enabled is None or not isinstance(enabled, bool):
|
|
62
|
-
raise TypeError(f"For cluster monitor, enabled should be bool, but got {type(enabled)}.")
|
|
63
|
-
enable_step_time = perf_config.get("steptime", None)
|
|
64
|
-
if enable_step_time is None or not isinstance(enable_step_time, bool):
|
|
65
|
-
raise TypeError(f"For cluster monitor, enable_step_time should be bool, but got {type(enable_step_time)}.")
|
|
66
|
-
enabled_dtp_group = perf_config.get("dtpGroup", None)
|
|
67
|
-
if enabled_dtp_group is None or not isinstance(enabled_dtp_group, bool):
|
|
68
|
-
raise TypeError(f"For cluster monitor, enabled_dtp_group should be bool, but got {type(enabled_dtp_group)}.")
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def _parse_perf_config():
|
|
72
|
-
"""parse perf config"""
|
|
73
|
-
perf_config = os.getenv("PERF_DUMP_CONFIG")
|
|
74
|
-
perf_config_dict = {}
|
|
75
|
-
if perf_config is None:
|
|
76
|
-
return perf_config_dict
|
|
77
|
-
pairs = perf_config.split(',')
|
|
78
|
-
for pair in pairs:
|
|
79
|
-
key, value = pair.split(':')
|
|
80
|
-
if value.lower() == 'true':
|
|
81
|
-
perf_config_dict[key] = True
|
|
82
|
-
elif value.lower() == 'false':
|
|
83
|
-
perf_config_dict[key] = False
|
|
84
|
-
elif value.isdigit():
|
|
85
|
-
perf_config_dict[key] = int(value)
|
|
86
|
-
else:
|
|
87
|
-
perf_config_dict[key] = value
|
|
88
|
-
_check_perf_config(perf_config_dict)
|
|
89
|
-
return perf_config_dict
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def _remove_pre_log():
|
|
93
|
-
"""Remove the previously saved log files."""
|
|
94
|
-
directory = os.getenv("PERF_DUMP_PATH")
|
|
95
|
-
device_id = get_local_rank()
|
|
96
|
-
pattern = os.path.join(directory, f"perf_ms_*_{device_id}.log")
|
|
97
|
-
files_to_delete = glob.glob(pattern)
|
|
98
|
-
for file_path in files_to_delete:
|
|
99
|
-
if os.path.islink(file_path):
|
|
100
|
-
continue
|
|
101
|
-
try:
|
|
102
|
-
os.remove(file_path)
|
|
103
|
-
except OSError as e:
|
|
104
|
-
logger.warning(f"When CCAE is opening, {file_path} need to be removed, but failed to remove.")
|
|
105
|
-
raise e
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
class ClusterMonitor(Callback):
|
|
109
|
-
"""
|
|
110
|
-
Monitor the cluster in train process.
|
|
111
|
-
"""
|
|
112
|
-
|
|
113
|
-
def __init__(self):
|
|
114
|
-
super(ClusterMonitor, self).__init__()
|
|
115
|
-
self.perf_config = _parse_perf_config()
|
|
116
|
-
self.enabled = self.perf_config.get("enable")
|
|
117
|
-
self.enable_step_time = self.perf_config.get("steptime")
|
|
118
|
-
self.enabled_dtp_group = self.perf_config.get("dtpGroup")
|
|
119
|
-
self.data_time_start = None
|
|
120
|
-
self.data_time_end = None
|
|
121
|
-
self.frame_work = "MindSpore"
|
|
122
|
-
self.ms_sched_host = os.getenv("MS_SCHED_HOST", "127.0.0.1")
|
|
123
|
-
self.ms_sched_port = os.getenv("MS_SCHED_PORT", "8888")
|
|
124
|
-
self.uuid_value = self.ms_sched_host + "_" + self.ms_sched_port
|
|
125
|
-
self.global_rank = get_rank()
|
|
126
|
-
self.process_id = os.getpid()
|
|
127
|
-
self.device_id = get_local_rank()
|
|
128
|
-
self.log_name = "perf_ms" + "_" + str(self.process_id) + "_" + str(self.device_id) + ".log"
|
|
129
|
-
self.log_path = os.getenv("PERF_DUMP_PATH")
|
|
130
|
-
if not self.log_path.endswith(os.path.sep):
|
|
131
|
-
self.log_path += os.path.sep
|
|
132
|
-
self.full_path = self.log_path + self.log_name
|
|
133
|
-
|
|
134
|
-
self.write_dp_tp_flag = True
|
|
135
|
-
self.initial_rank = 0
|
|
136
|
-
|
|
137
|
-
def begin(self, run_context):
|
|
138
|
-
_remove_pre_log()
|
|
139
|
-
pp_num = _get_auto_parallel_context("pipeline_stages")
|
|
140
|
-
device_num = _get_device_num()
|
|
141
|
-
|
|
142
|
-
original_list = list(range(device_num))
|
|
143
|
-
chunk_size = device_num // pp_num
|
|
144
|
-
split_pp_lists = []
|
|
145
|
-
for i in range(0, device_num, chunk_size):
|
|
146
|
-
end_index = i + chunk_size if i + chunk_size <= device_num else device_num
|
|
147
|
-
split_pp_lists.append(original_list[i:end_index])
|
|
148
|
-
|
|
149
|
-
self.initial_rank = (self.global_rank // chunk_size) * chunk_size
|
|
150
|
-
with _perf_mutex:
|
|
151
|
-
dir_path = os.path.dirname(self.full_path)
|
|
152
|
-
if not os.path.exists(dir_path):
|
|
153
|
-
os.makedirs(dir_path)
|
|
154
|
-
if os.path.exists(self.full_path):
|
|
155
|
-
os.chmod(self.full_path, stat.S_IWUSR)
|
|
156
|
-
os.remove(self.full_path)
|
|
157
|
-
with open(self.full_path, 'w') as file:
|
|
158
|
-
log_message = f'UUID:{self.uuid_value}\nFRAMEWORK:{self.frame_work}\nGLOBAL RANKID:{self.global_rank}\n'
|
|
159
|
-
file.write(log_message)
|
|
160
|
-
for _, split_pp_list in enumerate(split_pp_lists):
|
|
161
|
-
file.write(f'PP:{split_pp_list}\n')
|
|
162
|
-
os.chmod(self.full_path, stat.S_IRUSR)
|
|
163
|
-
|
|
164
|
-
def step_begin(self, run_context):
|
|
165
|
-
"""
|
|
166
|
-
Record time at the beginning of step.
|
|
167
|
-
|
|
168
|
-
Args:
|
|
169
|
-
run_context (RunContext): Context of the process running. For more details,
|
|
170
|
-
please refer to :class:`mindspore.train.RunContext`.
|
|
171
|
-
"""
|
|
172
|
-
self.data_time_start = time.time()
|
|
173
|
-
|
|
174
|
-
def step_end(self, run_context):
|
|
175
|
-
"""
|
|
176
|
-
Record time at the end of step.
|
|
177
|
-
|
|
178
|
-
Args:
|
|
179
|
-
run_context (RunContext): Context of the process running. For more details,
|
|
180
|
-
please refer to :class:`mindspore.train.RunContext`.
|
|
181
|
-
"""
|
|
182
|
-
self.data_time_end = time.time()
|
|
183
|
-
if self.enabled and self.enabled_dtp_group and self.write_dp_tp_flag:
|
|
184
|
-
cb_params = run_context.original_args()
|
|
185
|
-
param_layout_dict = cb_params.train_network.parameter_layout_dict
|
|
186
|
-
dp, tp = _get_dp_tp_from_layout(param_layout_dict, self.initial_rank)
|
|
187
|
-
with _perf_mutex:
|
|
188
|
-
os.chmod(self.full_path, stat.S_IWUSR)
|
|
189
|
-
with open(self.full_path, 'a') as file:
|
|
190
|
-
for dp_value in dp:
|
|
191
|
-
file.write(f'dp:{dp_value}\n')
|
|
192
|
-
for tp_value in tp:
|
|
193
|
-
file.write(f'tp:{tp_value}\n')
|
|
194
|
-
os.chmod(self.full_path, stat.S_IRUSR)
|
|
195
|
-
self.write_dp_tp_flag = False
|
|
196
|
-
if self.enabled and self.enable_step_time:
|
|
197
|
-
with _perf_mutex:
|
|
198
|
-
os.chmod(self.full_path, stat.S_IWUSR)
|
|
199
|
-
with open(self.full_path, 'a') as file:
|
|
200
|
-
file.write(f"STEPTIME:{int(self.data_time_start * 1000)},{int(self.data_time_end * 1000)}\n")
|
|
201
|
-
os.chmod(self.full_path, stat.S_IRUSR)
|
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""FLops Utilization collector Callback class."""
|
|
16
|
-
from __future__ import absolute_import
|
|
17
|
-
|
|
18
|
-
import time
|
|
19
|
-
import os
|
|
20
|
-
import stat
|
|
21
|
-
import hashlib
|
|
22
|
-
|
|
23
|
-
from math import floor
|
|
24
|
-
from mindspore import _checkparam as Validator
|
|
25
|
-
from mindspore import log as logger
|
|
26
|
-
from mindspore.train.callback._callback import Callback
|
|
27
|
-
from mindspore.common.api import flops_collection
|
|
28
|
-
|
|
29
|
-
from mindspore.communication.management import (create_group, get_group_size,
|
|
30
|
-
get_rank)
|
|
31
|
-
from mindspore.parallel._auto_parallel_context import auto_parallel_context
|
|
32
|
-
from mindspore.ops import operations as P
|
|
33
|
-
from mindspore.common import Tensor
|
|
34
|
-
from mindspore import context
|
|
35
|
-
import mindspore.nn as nn
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class AllReduceNet(nn.Cell):
|
|
39
|
-
"""
|
|
40
|
-
Used to accumulate flops in pipeline parallel.
|
|
41
|
-
"""
|
|
42
|
-
def __init__(self, group_name):
|
|
43
|
-
super(AllReduceNet, self).__init__()
|
|
44
|
-
self.allreduce_sum = P.AllReduce(op=P.ReduceOp.SUM, group=group_name)
|
|
45
|
-
self.add_flags(skip_auto_parallel_compile=True)
|
|
46
|
-
|
|
47
|
-
def construct(self, x):
|
|
48
|
-
return self.allreduce_sum(x)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class FlopsUtilizationCollector(Callback):
|
|
52
|
-
"""
|
|
53
|
-
The FlopsUtilizationCollector interface counts the model utilization information MFU
|
|
54
|
-
and the hardware utilization information HFU.
|
|
55
|
-
Currently, the API counts only the forward and backward flops of MatMul,
|
|
56
|
-
BatchMatMul, FlashAttentionScore, and Conv2D operators.
|
|
57
|
-
Only used in graph mode with static shape.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
data_size (int): How many steps are the intervals between print information each time.
|
|
61
|
-
computility (int): The peak flops of each compute card. Default: ``1`` .
|
|
62
|
-
full_flops(bool): Whether to count the full model flops. If set full_flops to False,
|
|
63
|
-
FlopsUtilizationCollector would count the shard model flops in each device. Default: ``True`` .
|
|
64
|
-
|
|
65
|
-
Raises:
|
|
66
|
-
TypeError: If data_size is not positive int.
|
|
67
|
-
TypeError: If full_flops is not bool.
|
|
68
|
-
|
|
69
|
-
Examples:
|
|
70
|
-
>>> import numpy as np
|
|
71
|
-
>>> import mindspore.dataset as ds
|
|
72
|
-
>>> from mindspore import nn
|
|
73
|
-
>>> from mindspore.train import Model, FlopsUtilizationCollector
|
|
74
|
-
>>> from mindspore import context
|
|
75
|
-
>>> context.set_context(mode=context.GRAPH_MODE)
|
|
76
|
-
>>> data = {"x": np.float32(np.random.rand(64, 10)), "y": np.random.randint(0, 5, (64,))}
|
|
77
|
-
>>> train_dataset = ds.NumpySlicesDataset(data=data).batch(32)
|
|
78
|
-
>>> net = nn.Dense(10, 5)
|
|
79
|
-
>>> crit = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean')
|
|
80
|
-
>>> opt = nn.Momentum(net.trainable_params(), 0.01, 0.9)
|
|
81
|
-
>>> flops_callback = FlopsUtilizationCollector(train_dataset.get_dataset_size(), computility=10e6)
|
|
82
|
-
>>> model = Model(network=net, optimizer=opt, loss_fn=crit, metrics={"recall"})
|
|
83
|
-
>>> model.train(2, train_dataset, callbacks=[flops_callback])
|
|
84
|
-
Full model flops is 6400, Full hardware flops is 6400, Shard model flops is 6400, Shard hardware flops is 6400
|
|
85
|
-
Train per step time: 135.572 ms, mfu:0.47% hfu:0.47%
|
|
86
|
-
Train per step time: 1.317 ms, mfu:48.59% hfu:48.59%
|
|
87
|
-
"""
|
|
88
|
-
def __init__(self, data_size, computility=1, full_flops=True):
|
|
89
|
-
super(FlopsUtilizationCollector, self).__init__()
|
|
90
|
-
self.step_time = time.time()
|
|
91
|
-
self.computility = computility
|
|
92
|
-
self.full_mfu = 0.0
|
|
93
|
-
self.full_hfu = 0.0
|
|
94
|
-
self.shard_mfu = 0.0
|
|
95
|
-
self.shard_hfu = 0.0
|
|
96
|
-
self.full_model_flops = 0.0
|
|
97
|
-
self.shard_model_flops = 0.0
|
|
98
|
-
self.full_hardware_flops = 0.0
|
|
99
|
-
self.shard_hardware_flops = 0.0
|
|
100
|
-
self.mfu_calculated = False
|
|
101
|
-
self.data_size = data_size
|
|
102
|
-
self.time_step_path = ''
|
|
103
|
-
self.verbose = full_flops
|
|
104
|
-
self.ma = os.environ.get("ENABLE_FLOPS_UTILIZATION_COLLECTOR") == "1"
|
|
105
|
-
Validator.check_bool(full_flops, "verbose")
|
|
106
|
-
Validator.check_positive_int(data_size, "data_size")
|
|
107
|
-
|
|
108
|
-
def step_begin(self, run_context):
|
|
109
|
-
"""
|
|
110
|
-
Record time at the beginning of step.
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
Args:
|
|
114
|
-
run_context (RunContext): Context of the process running. For more details,
|
|
115
|
-
please refer to :class:`mindspore.train.RunContext`.
|
|
116
|
-
"""
|
|
117
|
-
self.step_time = time.time()
|
|
118
|
-
|
|
119
|
-
def _get_pipeline_group(self):
|
|
120
|
-
"""
|
|
121
|
-
Calculate the communication group between all pipeline stages
|
|
122
|
-
"""
|
|
123
|
-
rank = get_rank()
|
|
124
|
-
stage_nums = auto_parallel_context().get_pipeline_stages()
|
|
125
|
-
device_nums = get_group_size()
|
|
126
|
-
per_stage_device_nums = device_nums // stage_nums
|
|
127
|
-
local_stage_rank_id = rank % per_stage_device_nums
|
|
128
|
-
group = range(0, stage_nums)
|
|
129
|
-
rank_list = [local_stage_rank_id + x *
|
|
130
|
-
per_stage_device_nums for x in group]
|
|
131
|
-
rank_str_list = [str(local_stage_rank_id + x *
|
|
132
|
-
per_stage_device_nums) for x in group]
|
|
133
|
-
rank_list_str = "-".join(rank_str_list)
|
|
134
|
-
return rank_list, rank_list_str
|
|
135
|
-
|
|
136
|
-
def step_end(self, run_context):
|
|
137
|
-
"""
|
|
138
|
-
Print mfu and hfu time at the end of step.
|
|
139
|
-
|
|
140
|
-
Args:
|
|
141
|
-
run_context (RunContext): Context of the process running. For more details,
|
|
142
|
-
please refer to :class:`mindspore.train.RunContext`.
|
|
143
|
-
"""
|
|
144
|
-
if context.get_context("mode") != context.GRAPH_MODE:
|
|
145
|
-
logger.warning("FlopsUtilizationCollector now only support graph mode.")
|
|
146
|
-
return
|
|
147
|
-
|
|
148
|
-
step_seconds = (time.time() - self.step_time) * 1000
|
|
149
|
-
if not self.mfu_calculated:
|
|
150
|
-
cb_params = run_context.original_args()
|
|
151
|
-
if cb_params.mode == 'train':
|
|
152
|
-
network = cb_params.train_network
|
|
153
|
-
elif cb_params.mode == 'eval':
|
|
154
|
-
network = cb_params.eval_network
|
|
155
|
-
else:
|
|
156
|
-
logger.warning('FlopsUtilizationCollector only support train and eval mode!')
|
|
157
|
-
return
|
|
158
|
-
full_model_flops, full_hardware_flops, shard_model_flops, \
|
|
159
|
-
shard_hardware_flops, is_dynamic_shape = flops_collection(network.current_phase)
|
|
160
|
-
if is_dynamic_shape:
|
|
161
|
-
logger.warning("FlopsUtilizationCollector now do not support dynamic shape.")
|
|
162
|
-
return
|
|
163
|
-
self.full_mfu = full_model_flops / self.computility
|
|
164
|
-
self.full_hfu = full_hardware_flops / self.computility
|
|
165
|
-
|
|
166
|
-
self.shard_mfu = shard_model_flops / self.computility
|
|
167
|
-
self.shard_hfu = shard_hardware_flops / self.computility
|
|
168
|
-
self.full_model_flops = full_model_flops
|
|
169
|
-
self.full_hardware_flops = full_hardware_flops
|
|
170
|
-
self.shard_model_flops = shard_model_flops
|
|
171
|
-
self.shard_hardware_flops = shard_hardware_flops
|
|
172
|
-
self.mfu_calculated = True
|
|
173
|
-
shard_mf_dir = os.path.realpath(os.getenv('MA_LOG_DIR', './'))
|
|
174
|
-
if self.ma:
|
|
175
|
-
flops_path = os.path.join(shard_mf_dir, "flops.txt")
|
|
176
|
-
self.time_step_path = os.path.join(shard_mf_dir, "time_step.txt")
|
|
177
|
-
if auto_parallel_context().get_parallel_mode() != "stand_alone":
|
|
178
|
-
flops_path = os.path.join(
|
|
179
|
-
shard_mf_dir, "flops_rank_" + str(get_rank())) + ".txt"
|
|
180
|
-
self.time_step_path = os.path.join(
|
|
181
|
-
shard_mf_dir, "time_step_rank_" + str(get_rank())) + ".txt"
|
|
182
|
-
time_stamp = time.time()
|
|
183
|
-
model_flops_log = "flops{{type=\"model_flops\"}} {} {}\n".\
|
|
184
|
-
format(shard_model_flops, int(round(time_stamp * 1000)))
|
|
185
|
-
hardware_flops_log = "flops{{type=\"hardware_flops\"}} {} {}\n".\
|
|
186
|
-
format(shard_hardware_flops, int(round(time_stamp * 1000)))
|
|
187
|
-
flags = os.O_WRONLY | os.O_CREAT
|
|
188
|
-
modes = stat.S_IWUSR | stat.S_IRUSR
|
|
189
|
-
with os.fdopen(os.open(flops_path, flags, modes), 'w') as f:
|
|
190
|
-
f.write(model_flops_log)
|
|
191
|
-
f.write(hardware_flops_log)
|
|
192
|
-
if self.verbose:
|
|
193
|
-
flops_log = f"Full model flops is {full_model_flops}, Full hardware flops is {full_hardware_flops}, " \
|
|
194
|
-
f"Shard model flops is {shard_model_flops}, Shard hardware flops is {shard_hardware_flops}."
|
|
195
|
-
print(flops_log, flush=True)
|
|
196
|
-
if auto_parallel_context().get_pipeline_stages() > 1:
|
|
197
|
-
pipeline_group_list, pipeline_group_name = self._get_pipeline_group()
|
|
198
|
-
auto_parallel_context().set_pipeline_stages(1)
|
|
199
|
-
hashed = hashlib.md5(
|
|
200
|
-
pipeline_group_name.encode()).hexdigest()[:48]
|
|
201
|
-
pipeline_group_name = str(hashed)
|
|
202
|
-
create_group(pipeline_group_name, pipeline_group_list)
|
|
203
|
-
self.full_mfu = AllReduceNet(pipeline_group_name)(
|
|
204
|
-
Tensor([self.full_mfu])).asnumpy()[0]
|
|
205
|
-
self.full_hfu = AllReduceNet(pipeline_group_name)(
|
|
206
|
-
Tensor([self.full_hfu])).asnumpy()[0]
|
|
207
|
-
if auto_parallel_context().get_parallel_mode() != "stand_alone":
|
|
208
|
-
self.full_mfu = self.full_mfu / get_group_size()
|
|
209
|
-
self.full_hfu = self.full_hfu / get_group_size()
|
|
210
|
-
|
|
211
|
-
step_size = self.data_size
|
|
212
|
-
cb_params = run_context.original_args()
|
|
213
|
-
if hasattr(cb_params, "batch_num"):
|
|
214
|
-
batch_num = cb_params.batch_num
|
|
215
|
-
if isinstance(batch_num, int) and batch_num > 0:
|
|
216
|
-
step_size = cb_params.batch_num
|
|
217
|
-
Validator.check_positive_int(step_size)
|
|
218
|
-
if cb_params.dataset_sink_mode:
|
|
219
|
-
step_seconds = step_seconds / step_size
|
|
220
|
-
time_stamp = time.time()
|
|
221
|
-
train_log = "time_monitor{{type=\"per_step_time\"}} {} {}".format(step_seconds, int(round(time_stamp * 1000)))
|
|
222
|
-
if self.ma:
|
|
223
|
-
flags = os.O_WRONLY | os.O_CREAT
|
|
224
|
-
modes = stat.S_IWUSR | stat.S_IRUSR
|
|
225
|
-
with os.fdopen(os.open(self.time_step_path, flags, modes), 'w') as f:
|
|
226
|
-
f.write(train_log + '\n')
|
|
227
|
-
train_log = "{} per step time: {:5.3f} ms".format(
|
|
228
|
-
cb_params.mode.title(), step_seconds)
|
|
229
|
-
if self.verbose:
|
|
230
|
-
mfu = 1000 * self.full_mfu / step_seconds
|
|
231
|
-
hfu = 1000 * self.full_hfu / step_seconds
|
|
232
|
-
|
|
233
|
-
def floored_percentage(index, val, digits):
|
|
234
|
-
val *= 10 ** (digits + 2)
|
|
235
|
-
return index + '{1:.{0}f}%'.format(digits, floor(val) / 10 ** digits)
|
|
236
|
-
train_log += floored_percentage(' mfu:', mfu, 2)
|
|
237
|
-
train_log += floored_percentage(' hfu:', hfu, 2)
|
|
238
|
-
print(train_log, flush=True)
|