mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
- mindspore/__init__.py +1 -2
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +25 -5
- mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +0 -29
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +5 -21
- mindspore/_extends/parse/resources.py +7 -5
- mindspore/_extends/parse/standard_method.py +59 -40
- mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +5 -26
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +1 -1
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/less_batch_normalization.py +6 -9
- mindspore/common/__init__.py +1 -8
- mindspore/common/_register_for_tensor.py +9 -8
- mindspore/common/api.py +65 -275
- mindspore/common/dtype.py +4 -8
- mindspore/common/dump.py +5 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/lazy_inline.py +2 -14
- mindspore/common/parameter.py +15 -14
- mindspore/common/recompute.py +5 -20
- mindspore/common/sparse_tensor.py +6 -21
- mindspore/common/tensor.py +52 -100
- mindspore/communication/__init__.py +11 -6
- mindspore/communication/management.py +94 -92
- mindspore/context.py +18 -180
- mindspore/dataset/engine/datasets.py +46 -69
- mindspore/dataset/engine/datasets_user_defined.py +53 -72
- mindspore/dataset/engine/datasets_vision.py +2 -2
- mindspore/dataset/engine/queue.py +38 -56
- mindspore/dataset/engine/validators.py +5 -11
- mindspore/dataset/vision/__init__.py +5 -5
- mindspore/dataset/vision/c_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +46 -591
- mindspore/dataset/vision/utils.py +1 -121
- mindspore/dataset/vision/validators.py +3 -9
- mindspore/hal/__init__.py +1 -7
- mindspore/hal/device.py +1 -1
- mindspore/include/api/model.h +0 -3
- mindspore/include/dataset/vision.h +2 -54
- mindspore/include/mindapi/base/types.h +0 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/filewriter.py +2 -2
- mindspore/mint/__init__.py +40 -720
- mindspore/mint/nn/__init__.py +7 -89
- mindspore/mint/nn/functional.py +16 -165
- mindspore/mint/optim/adamw.py +16 -15
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +98 -97
- mindspore/nn/extend/basic.py +2 -2
- mindspore/nn/extend/embedding.py +1 -1
- mindspore/nn/extend/layer/normalization.py +5 -7
- mindspore/nn/generator.py +297 -0
- mindspore/nn/layer/activation.py +3 -4
- mindspore/nn/layer/basic.py +16 -79
- mindspore/nn/layer/conv.py +8 -17
- mindspore/nn/layer/embedding.py +4 -1
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +1 -1
- mindspore/nn/layer/pooling.py +0 -5
- mindspore/nn/layer/rnn_cells.py +2 -2
- mindspore/nn/loss/loss.py +19 -19
- mindspore/nn/optim/adasum.py +1 -1
- mindspore/nn/optim/sgd.py +2 -3
- mindspore/nn/probability/distribution/exponential.py +1 -1
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/wrap/cell_wrapper.py +1 -25
- mindspore/nn/wrap/loss_scale.py +1 -24
- mindspore/numpy/array_ops.py +1 -5
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/math_ops.py +8 -8
- mindspore/ops/__init__.py +1 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
- mindspore/ops/_vmap/vmap_array_ops.py +0 -27
- mindspore/ops/_vmap/vmap_math_ops.py +1 -29
- mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
- mindspore/ops/auto_generate/gen_extend_func.py +27 -603
- mindspore/ops/auto_generate/gen_ops_def.py +203 -993
- mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
- mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
- mindspore/ops/composite/base.py +6 -3
- mindspore/ops/composite/math_ops.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/extend/__init__.py +3 -2
- mindspore/ops/extend/array_func.py +51 -10
- mindspore/ops/extend/nn_func.py +78 -2
- mindspore/ops/function/__init__.py +13 -8
- mindspore/ops/function/array_func.py +179 -455
- mindspore/ops/function/clip_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +3 -3
- mindspore/ops/function/math_func.py +103 -117
- mindspore/ops/function/nn_func.py +163 -275
- mindspore/ops/function/other_func.py +2 -2
- mindspore/ops/function/random_func.py +69 -202
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/functional.py +327 -332
- mindspore/ops/operations/__init__.py +3 -13
- mindspore/ops/operations/_grad_ops.py +27 -3
- mindspore/ops/operations/_inner_ops.py +356 -53
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +65 -82
- mindspore/ops/operations/comm_ops.py +93 -784
- mindspore/ops/operations/custom_ops.py +28 -51
- mindspore/ops/operations/debug_ops.py +4 -4
- mindspore/ops/operations/inner_ops.py +2 -2
- mindspore/ops/operations/manually_defined/ops_def.py +4 -304
- mindspore/ops/operations/math_ops.py +50 -3
- mindspore/ops/operations/nn_ops.py +247 -14
- mindspore/ops/operations/other_ops.py +3 -3
- mindspore/ops/operations/random_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +1 -1
- mindspore/ops/primitive.py +8 -9
- mindspore/ops/silent_check.py +5 -5
- mindspore/ops_generate/arg_dtype_cast.py +9 -2
- mindspore/ops_generate/arg_handler.py +0 -26
- mindspore/ops_generate/gen_aclnn_implement.py +4 -1
- mindspore/ops_generate/gen_ops.py +4 -26
- mindspore/ops_generate/gen_pyboost_func.py +12 -41
- mindspore/ops_generate/gen_utils.py +0 -21
- mindspore/ops_generate/pyboost_utils.py +2 -7
- mindspore/ops_generate/template.py +0 -1
- mindspore/parallel/_auto_parallel_context.py +1 -21
- mindspore/parallel/_tensor.py +5 -0
- mindspore/parallel/_transformer/transformer.py +1 -1
- mindspore/parallel/_utils.py +1 -15
- mindspore/parallel/algo_parameter_config.py +3 -1
- mindspore/parallel/checkpoint_transform.py +9 -12
- mindspore/parallel/cluster/process_entity/_api.py +29 -28
- mindspore/parallel/cluster/process_entity/_utils.py +3 -13
- mindspore/parallel/cluster/run.py +16 -13
- mindspore/parallel/parameter_broadcast.py +2 -2
- mindspore/parallel/shard.py +17 -31
- mindspore/profiler/__init__.py +2 -3
- mindspore/profiler/common/util.py +2 -107
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
- mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
- mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
- mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
- mindspore/profiler/parser/minddata_parser.py +3 -72
- mindspore/profiler/profiling.py +59 -176
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/common/namespace.py +5 -5
- mindspore/rewrite/parsers/assign_parser.py +0 -2
- mindspore/rewrite/parsers/class_def_parser.py +4 -8
- mindspore/run_check/_check_version.py +1 -1
- mindspore/scipy/fft.py +3 -1
- mindspore/scipy/linalg.py +3 -2
- mindspore/scipy/ops.py +3 -5
- mindspore/scipy/optimize/__init__.py +2 -2
- mindspore/train/__init__.py +4 -4
- mindspore/train/anf_ir_pb2.py +2 -8
- mindspore/train/callback/__init__.py +2 -5
- mindspore/train/callback/_backup_and_restore.py +2 -2
- mindspore/train/callback/_checkpoint.py +16 -104
- mindspore/train/callback/_landscape.py +1 -1
- mindspore/train/callback/_time_monitor.py +1 -1
- mindspore/train/data_sink.py +4 -5
- mindspore/train/dataset_helper.py +20 -45
- mindspore/train/model.py +38 -266
- mindspore/train/serialization.py +105 -256
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
- mindspore/_extends/pijit/__init__.py +0 -23
- mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
- mindspore/common/file_system.py +0 -48
- mindspore/common/generator.py +0 -260
- mindspore/common/no_inline.py +0 -54
- mindspore/common/np_dtype.py +0 -25
- mindspore/communication/comm_func.py +0 -1140
- mindspore/hal/memory.py +0 -326
- mindspore/lib/libavcodec.so.59 +0 -0
- mindspore/lib/libavdevice.so.59 +0 -0
- mindspore/lib/libavfilter.so.8 +0 -0
- mindspore/lib/libavformat.so.59 +0 -0
- mindspore/lib/libavutil.so.57 +0 -0
- mindspore/lib/libmindspore_np_dtype.so +0 -0
- mindspore/lib/libswresample.so.4 +0 -0
- mindspore/lib/libswscale.so.6 +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
- mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
- mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
- mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
- mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/mint/linalg/__init__.py +0 -22
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/nn/layer/embedding_service_layer.py +0 -393
- mindspore/ops/function/reshard_func.py +0 -102
- mindspore/ops/operations/_infer_ops.py +0 -19
- mindspore/ops/operations/reshard_ops.py +0 -53
- mindspore/profiler/common/process_pool.py +0 -41
- mindspore/profiler/common/singleton.py +0 -28
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/train/callback/_cluster_monitor.py +0 -201
- mindspore/train/callback/_flops_collector.py +0 -238
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -158,7 +158,7 @@ def generate_pyboost_op_source_code(work_path, op_proto, template_paths, convert
|
|
|
158
158
|
operator_name = converter.functional_name
|
|
159
159
|
call_args_tensor = []
|
|
160
160
|
for type, arg_name in zip(converter.call_args_types, converter.call_args):
|
|
161
|
-
if type in ("
|
|
161
|
+
if type in ("TensorPtr", "std::optional<TensorPtr>"):
|
|
162
162
|
call_args_tensor.append(arg_name)
|
|
163
163
|
|
|
164
164
|
for call_tpl, src_tpl, view_tpl, cus_tpl, gen_path in zip(template_paths.op_call_template_path,
|
|
@@ -181,7 +181,6 @@ def generate_pyboost_op_source_code(work_path, op_proto, template_paths, convert
|
|
|
181
181
|
get_cube_math_type = ''
|
|
182
182
|
real_output = ', ' + converter.op_outputs
|
|
183
183
|
proto_operator_name = op_proto.operator_name
|
|
184
|
-
register_custom_kernel = ''
|
|
185
184
|
if is_ascend and op_proto.ascend != 'default':
|
|
186
185
|
call_impl = cus_tpl.replace(call_args=converter.call_args,
|
|
187
186
|
return_values=converter.call_func_outputs,
|
|
@@ -196,7 +195,6 @@ def generate_pyboost_op_source_code(work_path, op_proto, template_paths, convert
|
|
|
196
195
|
)
|
|
197
196
|
customize_include = "#include \"plugin/device/cpu/kernel/pyboost/customize/{}.h\"".format(
|
|
198
197
|
operator_name.lower())
|
|
199
|
-
register_custom_kernel = "MS_REG_PYBOOST_CPU_CUSTOM_KERNEL({});".format(op_name_str)
|
|
200
198
|
elif is_gpu and op_proto.gpu != 'default':
|
|
201
199
|
call_impl = cus_tpl.replace(call_args=converter.call_args,
|
|
202
200
|
return_values=converter.call_func_outputs,
|
|
@@ -204,7 +202,6 @@ def generate_pyboost_op_source_code(work_path, op_proto, template_paths, convert
|
|
|
204
202
|
)
|
|
205
203
|
customize_include = "#include \"plugin/device/gpu/kernel/pyboost/customize/{}.h\"".format(
|
|
206
204
|
operator_name.lower())
|
|
207
|
-
register_custom_kernel = "MS_REG_PYBOOST_GPU_CUSTOM_KERNEL({});".format(op_name_str)
|
|
208
205
|
elif op_proto.is_view:
|
|
209
206
|
set_output_abs = "SetOutputAbstract();"
|
|
210
207
|
if converter.call_func_outputs == "outputs_":
|
|
@@ -254,8 +251,7 @@ def generate_pyboost_op_source_code(work_path, op_proto, template_paths, convert
|
|
|
254
251
|
call_args_with_type=converter.call_args_with_types,
|
|
255
252
|
return_type=converter.cpp_func_return,
|
|
256
253
|
customize_include=customize_include,
|
|
257
|
-
call_impl=call_impl
|
|
258
|
-
register_custom_kernel=register_custom_kernel)
|
|
254
|
+
call_impl=call_impl)
|
|
259
255
|
op_header_dir_path = os.path.join(work_path, gen_path)
|
|
260
256
|
tmp_op_source_file_path = os.path.join(op_header_dir_path, "tmp_" + operator_name.lower() + ".cc")
|
|
261
257
|
dst_op_source_file_path = os.path.join(op_header_dir_path, operator_name.lower() + ".cc")
|
|
@@ -300,16 +296,8 @@ def generate_pyboost_op_return_code(op_proto):
|
|
|
300
296
|
def generate_pyboost_op_func_return_type(op_proto):
|
|
301
297
|
""" generate_pyboost_op_func_return_type """
|
|
302
298
|
returns_type = []
|
|
303
|
-
type_convert_to_base = {
|
|
304
|
-
'std::vector<tensor::TensorPtr>': 'std::vector<tensor::BaseTensorPtr>',
|
|
305
|
-
'tensor::TensorPtr': 'tensor::BaseTensorPtr'
|
|
306
|
-
}
|
|
307
299
|
for return_obj in op_proto.returns:
|
|
308
|
-
|
|
309
|
-
if temp_return in type_convert_to_base:
|
|
310
|
-
returns_type.append(type_convert_to_base[temp_return])
|
|
311
|
-
else:
|
|
312
|
-
raise Exception("Not return found")
|
|
300
|
+
returns_type.append(get_return_type(return_obj.arg_dtype))
|
|
313
301
|
if len(returns_type) == 1:
|
|
314
302
|
cpp_func_return = returns_type[0]
|
|
315
303
|
elif len(returns_type) > 1:
|
|
@@ -429,7 +417,6 @@ def generate_pyboost_functions(work_path, yaml_data):
|
|
|
429
417
|
convert_stub_str = ''
|
|
430
418
|
optional_to_value_str = ''
|
|
431
419
|
need_contiguous = 'true'
|
|
432
|
-
value_str = '_value'
|
|
433
420
|
if op_proto.is_view:
|
|
434
421
|
# view/aclnn op no need to contiguous tensor.
|
|
435
422
|
need_contiguous = 'false'
|
|
@@ -445,7 +432,7 @@ def generate_pyboost_functions(work_path, yaml_data):
|
|
|
445
432
|
need_contiguous=need_contiguous)
|
|
446
433
|
cast_output = cast_str + convert_stub_output_name
|
|
447
434
|
|
|
448
|
-
convert_optional_to_value_name = op_arg.arg_name +
|
|
435
|
+
convert_optional_to_value_name = op_arg.arg_name + "_value"
|
|
449
436
|
optional_to_value_str += \
|
|
450
437
|
convert_optional_to_value_template.replace(input=cast_output,
|
|
451
438
|
output=convert_optional_to_value_name)
|
|
@@ -461,35 +448,19 @@ def generate_pyboost_functions(work_path, yaml_data):
|
|
|
461
448
|
grad_arg = cast_str + convert_stub_output_name
|
|
462
449
|
cast_arg = grad_arg
|
|
463
450
|
elif pyboost_utils.is_tensor_list(op_arg):
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
convert_optional_to_value_name = op_arg.arg_name + value_str
|
|
473
|
-
optional_to_value_str += \
|
|
474
|
-
convert_optional_to_value_template.replace(input=cast_output,
|
|
475
|
-
output=convert_optional_to_value_name)
|
|
476
|
-
call_arg = convert_stub_output_name
|
|
477
|
-
grad_arg = convert_optional_to_value_name
|
|
478
|
-
cast_arg = cast_output
|
|
479
|
-
else:
|
|
480
|
-
convert_stub_output_name = op_arg.arg_name + "_tensor_list"
|
|
481
|
-
convert_stub_str += convert_to_tensor_list_template.replace(input=op_arg.arg_name,
|
|
482
|
-
output=convert_stub_output_name,
|
|
483
|
-
need_contiguous=need_contiguous)
|
|
484
|
-
call_arg = convert_stub_output_name
|
|
485
|
-
grad_arg = cast_str + convert_stub_output_name
|
|
486
|
-
cast_arg = grad_arg
|
|
451
|
+
convert_stub_output_name = op_arg.arg_name + "_tensor_list"
|
|
452
|
+
convert_stub_str += convert_to_tensor_list_template.replace(input=op_arg.arg_name,
|
|
453
|
+
output=convert_stub_output_name,
|
|
454
|
+
need_contiguous=need_contiguous)
|
|
455
|
+
call_arg = convert_stub_output_name
|
|
456
|
+
grad_arg = cast_str + convert_stub_output_name
|
|
457
|
+
cast_arg = grad_arg
|
|
487
458
|
else:
|
|
488
459
|
call_arg = op_arg.arg_name
|
|
489
460
|
grad_arg = cast_str + op_arg.arg_name
|
|
490
461
|
cast_arg = grad_arg
|
|
491
462
|
if is_optional_param(op_arg):
|
|
492
|
-
convert_optional_to_value_name = op_arg.arg_name +
|
|
463
|
+
convert_optional_to_value_name = op_arg.arg_name + "_value"
|
|
493
464
|
optional_to_value_str += \
|
|
494
465
|
convert_optional_to_value_template.replace(input=call_arg,
|
|
495
466
|
output=convert_optional_to_value_name)
|
|
@@ -76,12 +76,10 @@ def get_type_str(type_str):
|
|
|
76
76
|
'tuple[float]',
|
|
77
77
|
'tuple[bool]',
|
|
78
78
|
'tuple[tensor]',
|
|
79
|
-
'tuple[str]',
|
|
80
79
|
'list[int]',
|
|
81
80
|
'list[float]',
|
|
82
81
|
'list[bool]',
|
|
83
82
|
'list[tensor]',
|
|
84
|
-
'list[str]',
|
|
85
83
|
'tensor',
|
|
86
84
|
'type',
|
|
87
85
|
}
|
|
@@ -147,25 +145,6 @@ def merge_files(origin_dir, merged_file_path, file_format):
|
|
|
147
145
|
merge_files_to_one_file(op_yaml_file_names, merged_file_path)
|
|
148
146
|
|
|
149
147
|
|
|
150
|
-
def merge_files_append(origin_dir, merged_file_path, file_format):
|
|
151
|
-
"""
|
|
152
|
-
Merge multiple files into one file.
|
|
153
|
-
origin_dir: indicates the origin file directory.
|
|
154
|
-
merged_file_path: indicates the merged file path.
|
|
155
|
-
file_format: indicates the format of regular matching.
|
|
156
|
-
Files whose names meet the regular matching in 'origin_dir' directory will be merged into one file.
|
|
157
|
-
"""
|
|
158
|
-
file_paths = glob.glob(os.path.join(origin_dir, file_format))
|
|
159
|
-
merged_content = ''
|
|
160
|
-
file_paths.sort()
|
|
161
|
-
for file_path in file_paths:
|
|
162
|
-
with open(file_path, 'r') as file:
|
|
163
|
-
merged_content += file.read()
|
|
164
|
-
merged_content += '\n'
|
|
165
|
-
with open(merged_file_path, 'a') as file:
|
|
166
|
-
file.write(merged_content)
|
|
167
|
-
|
|
168
|
-
|
|
169
148
|
def safe_load_yaml(yaml_file_path):
|
|
170
149
|
"""
|
|
171
150
|
Load yaml dictionary from file.
|
|
@@ -85,11 +85,9 @@ def get_convert_type_str(dtype: str, optional):
|
|
|
85
85
|
'tuple[int]': 'ToIntListOptional<py::tuple>',
|
|
86
86
|
'tuple[float]': 'ToFloatListOptional<py::tuple>',
|
|
87
87
|
'tuple[bool]': 'ToBoolListOptional<py::tuple>',
|
|
88
|
-
'tuple[tensor]': 'ToTensorListOptional<py::tuple>',
|
|
89
88
|
'list[int]': 'ToIntListOptional<py::list>',
|
|
90
89
|
'list[float]': 'ToFloatListOptional<py::list>',
|
|
91
90
|
'list[bool]': 'ToBoolListOptional<py::list>',
|
|
92
|
-
'list[tensor]': 'ToTensorListOptional<py::list>',
|
|
93
91
|
}
|
|
94
92
|
if optional:
|
|
95
93
|
if dtype in optional_type_convert:
|
|
@@ -181,7 +179,7 @@ def get_input_dtype(dtype: str, optional):
|
|
|
181
179
|
'bool': 'BoolImmPtr',
|
|
182
180
|
'number': 'ScalarPtr',
|
|
183
181
|
'str': 'StringImmPtr',
|
|
184
|
-
'tensor': '
|
|
182
|
+
'tensor': 'TensorPtr',
|
|
185
183
|
'tuple[int]': value_tuple,
|
|
186
184
|
'tuple[float]': value_tuple,
|
|
187
185
|
'tuple[bool]': value_tuple,
|
|
@@ -198,11 +196,10 @@ def get_input_dtype(dtype: str, optional):
|
|
|
198
196
|
'bool': 'std::optional<BoolImmPtr>',
|
|
199
197
|
'number': 'std::optional<ScalarPtr>',
|
|
200
198
|
'str': 'std::optional<StringImmPtr>',
|
|
201
|
-
'tensor': 'std::optional<
|
|
199
|
+
'tensor': 'std::optional<TensorPtr>',
|
|
202
200
|
'tuple[int]': value_tuple_optional,
|
|
203
201
|
'tuple[float]': value_tuple_optional,
|
|
204
202
|
'tuple[bool]': value_tuple_optional,
|
|
205
|
-
'tuple[tensor]': value_tuple_optional,
|
|
206
203
|
}
|
|
207
204
|
if optional:
|
|
208
205
|
if dtype in optional_type_convert:
|
|
@@ -284,8 +281,6 @@ def get_tuple_input_convert(arg_name, arg_type):
|
|
|
284
281
|
:return:
|
|
285
282
|
"""
|
|
286
283
|
cpp_type = tuple_input_to_cpp_type(arg_type)
|
|
287
|
-
if cpp_type == "TensorPtr":
|
|
288
|
-
cpp_type = "BaseTensorPtr"
|
|
289
284
|
return f"std::vector<{cpp_type}> {arg_name}_vector = ConvertValueTupleToVector<{cpp_type}>({arg_name});\n"
|
|
290
285
|
|
|
291
286
|
|
|
@@ -242,19 +242,6 @@ class _AutoParallelContext:
|
|
|
242
242
|
self.check_context_handle()
|
|
243
243
|
return self._context_handle.get_pipeline_stage_split_num()
|
|
244
244
|
|
|
245
|
-
def set_auto_pipeline(self, auto_pipeline):
|
|
246
|
-
"""Set the pipeline stage number to automatic"""
|
|
247
|
-
if not isinstance(auto_pipeline, bool):
|
|
248
|
-
raise TypeError("For 'set_auto_parallel_context', the argument 'auto_pipeline' "
|
|
249
|
-
"must be bool, but got the type : {}.".format(type(auto_pipeline)))
|
|
250
|
-
self.check_context_handle()
|
|
251
|
-
self._context_handle.set_auto_pipeline(auto_pipeline)
|
|
252
|
-
|
|
253
|
-
def get_auto_pipeline(self):
|
|
254
|
-
"""Get whether the pipeline stage number is automatic"""
|
|
255
|
-
self.check_context_handle()
|
|
256
|
-
return self._context_handle.get_auto_pipeline()
|
|
257
|
-
|
|
258
245
|
def set_pipeline_result_broadcast(self, pipeline_result_broadcast):
|
|
259
246
|
"""
|
|
260
247
|
Set the value of enabling pipeline result broadcast. Default: ``False``.
|
|
@@ -584,7 +571,7 @@ class _AutoParallelContext:
|
|
|
584
571
|
self.check_context_handle()
|
|
585
572
|
dir_path = os.path.dirname(strategy_ckpt_save_file)
|
|
586
573
|
if dir_path and not os.path.exists(dir_path):
|
|
587
|
-
os.makedirs(dir_path
|
|
574
|
+
os.makedirs(dir_path)
|
|
588
575
|
self._context_handle.set_strategy_ckpt_save_file(strategy_ckpt_save_file)
|
|
589
576
|
|
|
590
577
|
def get_strategy_ckpt_save_file(self):
|
|
@@ -1242,7 +1229,6 @@ _set_auto_parallel_context_func_map = {
|
|
|
1242
1229
|
"gradient_fp32_sync": auto_parallel_context().set_gradient_fp32_sync,
|
|
1243
1230
|
"loss_repeated_mean": auto_parallel_context().set_loss_repeated_mean,
|
|
1244
1231
|
"pipeline_stages": auto_parallel_context().set_pipeline_stages,
|
|
1245
|
-
"auto_pipeline": auto_parallel_context().set_auto_pipeline,
|
|
1246
1232
|
"pipeline_result_broadcast": auto_parallel_context().set_pipeline_result_broadcast,
|
|
1247
1233
|
"pipeline_segments": auto_parallel_context().set_pipeline_segments,
|
|
1248
1234
|
"parallel_mode": auto_parallel_context().set_parallel_mode,
|
|
@@ -1275,7 +1261,6 @@ _get_auto_parallel_context_func_map = {
|
|
|
1275
1261
|
"gradient_fp32_sync": auto_parallel_context().get_gradient_fp32_sync,
|
|
1276
1262
|
"loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean,
|
|
1277
1263
|
"pipeline_stages": auto_parallel_context().get_pipeline_stages,
|
|
1278
|
-
"auto_pipeline": auto_parallel_context().get_auto_pipeline,
|
|
1279
1264
|
"pipeline_result_broadcast": auto_parallel_context().get_pipeline_result_broadcast,
|
|
1280
1265
|
"pipeline_interleave": auto_parallel_context().get_pipeline_interleave,
|
|
1281
1266
|
"pipeline_scheduler": auto_parallel_context().get_pipeline_scheduler,
|
|
@@ -1364,9 +1349,6 @@ def _set_auto_parallel_context(**kwargs):
|
|
|
1364
1349
|
the devices are distributed alone the pipeline. The total devices will be divided into
|
|
1365
1350
|
'pipeline_stags' stages. This currently could only be used when
|
|
1366
1351
|
parallel mode semi_auto_parallel is enabled. Default: 0
|
|
1367
|
-
auto_pipeline (bool): Set the pipeline stage number to automatic. Its value will be selected between 1 and the
|
|
1368
|
-
parameter `pipeline_stages`. This option requires the `parallel_mode` to be ``auto_parallel``
|
|
1369
|
-
and the `search_mode` to be ``recursive_programming``. Default: ``False`` .
|
|
1370
1352
|
pipeline_result_broadcast (bool): A switch that broadcast the last stage result to all other stage in pipeline
|
|
1371
1353
|
parallel inference. Default: ``False`` .
|
|
1372
1354
|
communi_parallel_mode (str): There are tree kinds of communication parallel modes, "all_group_parallel",
|
|
@@ -1410,7 +1392,6 @@ def _set_auto_parallel_context(**kwargs):
|
|
|
1410
1392
|
and `size`. Config is same as `allgather`.
|
|
1411
1393
|
|
|
1412
1394
|
|
|
1413
|
-
|
|
1414
1395
|
Raises:
|
|
1415
1396
|
ValueError: If input key is not attribute in auto parallel context.
|
|
1416
1397
|
"""
|
|
@@ -1458,7 +1439,6 @@ def _reset_auto_parallel_context():
|
|
|
1458
1439
|
- auto_parallel_search_mode: 'recursive_programming
|
|
1459
1440
|
- sharding_propagation: False
|
|
1460
1441
|
- pipeline_stages: 0
|
|
1461
|
-
- auto_pipeline: False
|
|
1462
1442
|
- pipeline_result_broadcast: False
|
|
1463
1443
|
- gradient_accumulation_shard: True
|
|
1464
1444
|
- fusion_threshold: 64
|
mindspore/parallel/_tensor.py
CHANGED
|
@@ -19,6 +19,7 @@ from __future__ import absolute_import
|
|
|
19
19
|
import copy
|
|
20
20
|
import numpy as np
|
|
21
21
|
from mindspore.common.tensor import Tensor
|
|
22
|
+
from mindspore.common import dtype as mstype
|
|
22
23
|
from mindspore.communication.management import get_rank, get_group_size
|
|
23
24
|
from mindspore._c_expression import TensorTransform
|
|
24
25
|
|
|
@@ -223,6 +224,10 @@ def _load_tensor(tensor, dev_mat, tensor_map, full_shape=None, rank_id=-1):
|
|
|
223
224
|
rank = rank_id
|
|
224
225
|
tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
|
|
225
226
|
tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
|
|
227
|
+
if tensor.dtype == mstype.bfloat16:
|
|
228
|
+
from mindspore.ops.operations import Cast
|
|
229
|
+
cpu_cast = Cast().set_device("CPU")
|
|
230
|
+
tensor = cpu_cast(tensor, mstype.float32)
|
|
226
231
|
np_tensor = tensor.asnumpy()
|
|
227
232
|
if full_shape:
|
|
228
233
|
np_tensor = np_tensor.reshape(full_shape)
|
|
@@ -400,7 +400,7 @@ class FeedForward(Cell):
|
|
|
400
400
|
>>> from mindspore.nn.transformer import FeedForward
|
|
401
401
|
>>> from mindspore import dtype as mstype
|
|
402
402
|
>>> from mindspore import Tensor, nn
|
|
403
|
-
>>>
|
|
403
|
+
>>> import mindspore.ops as ops
|
|
404
404
|
>>> model = FeedForward(hidden_size=15, ffn_hidden_size=30, dropout_rate=0.1)
|
|
405
405
|
>>> tensor = Tensor(np.ones((2, 20, 15)), mstype.float32)
|
|
406
406
|
>>> output = model(tensor)
|
mindspore/parallel/_utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2023
|
|
1
|
+
# Copyright 2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -27,8 +27,6 @@ from mindspore.common.seed import get_seed
|
|
|
27
27
|
from mindspore._c_expression import GraphExecutor_
|
|
28
28
|
from mindspore.parallel._tensor import _load_tensor_by_layout
|
|
29
29
|
|
|
30
|
-
SUPPORTED_TUPLE_IN_TUPLE_STRATEGY = ["GroupedMatmul", "FusedInferAttentionScore"]
|
|
31
|
-
|
|
32
30
|
|
|
33
31
|
def _get_parallel_mode():
|
|
34
32
|
"""Get parallel mode."""
|
|
@@ -94,18 +92,6 @@ def _need_to_full():
|
|
|
94
92
|
|
|
95
93
|
def _slice_parameter(parameter, phase, layout):
|
|
96
94
|
"""Slice python parameter obj according to the layout."""
|
|
97
|
-
is_train_phase = phase.startswith('train')
|
|
98
|
-
is_prefill_phase = phase.startswith('prefill')
|
|
99
|
-
if layout is not None and parameter.from_ckpt and not is_train_phase:
|
|
100
|
-
is_opt_shard_group = layout[5]
|
|
101
|
-
if not parameter.sliced and is_prefill_phase and is_opt_shard_group:
|
|
102
|
-
rank = get_rank()
|
|
103
|
-
new_tensor = _load_tensor_by_layout(parameter, layout, rank)
|
|
104
|
-
parameter.set_data(new_tensor, True)
|
|
105
|
-
return
|
|
106
|
-
layout_shape = layout[2]
|
|
107
|
-
parameter.shape = tuple(layout_shape)
|
|
108
|
-
return
|
|
109
95
|
graph_executor = GraphExecutor_.get_instance()
|
|
110
96
|
new_param = parameter.init_data(layout, set_sliced=True)
|
|
111
97
|
parameter = new_param
|
|
@@ -227,7 +227,9 @@ get_algo_parameters_config_func_map = {
|
|
|
227
227
|
enable_algo_approxi=bool, algo_approxi_epsilon=float)
|
|
228
228
|
def set_algo_parameters(**kwargs):
|
|
229
229
|
"""
|
|
230
|
-
Set parameters in the algorithm for parallel strategy searching.
|
|
230
|
+
Set parameters in the algorithm for parallel strategy searching. See a typical use in
|
|
231
|
+
`test_auto_parallel_resnet.py
|
|
232
|
+
<https://gitee.com/mindspore/mindspore/blob/master/tests/ut/python/parallel/test_auto_parallel_resnet.py>`_.
|
|
231
233
|
|
|
232
234
|
Note:
|
|
233
235
|
The attribute name is required. This interface works ONLY in AUTO_PARALLEL mode.
|
|
@@ -409,22 +409,19 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
409
409
|
src_layout_map = _extract_layout_map(src_strategy_file)
|
|
410
410
|
dst_layout_map = _extract_layout_map(dst_strategy_file)
|
|
411
411
|
pipeline_stage_num = _extract_pipeline_stage_num(src_strategy_file)
|
|
412
|
-
dst_stage_num = _extract_pipeline_stage_num(dst_strategy_file)
|
|
413
412
|
if src_layout_map:
|
|
414
|
-
src_param_keys = {param_name for param_name in src_layout_map if
|
|
415
|
-
not param_name.startswith(("accu_grads", "adam_v", "adam_m"))}
|
|
413
|
+
src_param_keys = {param_name for param_name in src_layout_map if not param_name.startswith("accu_grads")}
|
|
416
414
|
if dst_layout_map:
|
|
417
|
-
dst_param_keys = {param_name for param_name in dst_layout_map if
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
415
|
+
dst_param_keys = {param_name for param_name in dst_layout_map if not param_name.startswith("accu_grads")}
|
|
416
|
+
if src_layout_map and dst_layout_map and pipeline_stage_num == 1 \
|
|
417
|
+
and src_param_keys.issubset(dst_param_keys) and len(src_param_keys) < len(dst_param_keys):
|
|
418
|
+
dst_stage_num = _extract_pipeline_stage_num(dst_strategy_file)
|
|
419
|
+
if dst_stage_num > 1:
|
|
420
|
+
raise NotImplementedError("When using unmerged src strategy, dst strategy doesn't \
|
|
421
|
+
support strategy with pipeline parallel.")
|
|
424
422
|
_transform_checkpoint_by_stage(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
425
423
|
src_strategy_file, dst_strategy_file)
|
|
426
424
|
else:
|
|
427
|
-
ms.log.info("Transform checkpoints by all pipeline stage.")
|
|
428
425
|
_transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
429
426
|
src_strategy_file, dst_strategy_file)
|
|
430
427
|
|
|
@@ -451,7 +448,7 @@ def _sync_params(name, param, layout):
|
|
|
451
448
|
self.is_send = is_send
|
|
452
449
|
self.ret = ms.Tensor([0])
|
|
453
450
|
|
|
454
|
-
from mindspore.ops import Send, Receive
|
|
451
|
+
from mindspore.ops.operations._inner_ops import Send, Receive
|
|
455
452
|
if self.is_send:
|
|
456
453
|
self.send = Send(sr_tag=sr_tag, dest_rank=peer_rank)
|
|
457
454
|
else:
|
|
@@ -117,7 +117,6 @@ class _ProcessManager:
|
|
|
117
117
|
self.join = args.join
|
|
118
118
|
self.cluster_time_out = args.cluster_time_out
|
|
119
119
|
self.bind_core = args.bind_core
|
|
120
|
-
self.rank_table_file = args.rank_table_file
|
|
121
120
|
|
|
122
121
|
self.sim_level = args.sim_level
|
|
123
122
|
self.sim_rank_id = args.sim_rank_id
|
|
@@ -128,14 +127,6 @@ class _ProcessManager:
|
|
|
128
127
|
self.worker_num = 1
|
|
129
128
|
self.local_worker_num = 1
|
|
130
129
|
os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
|
|
131
|
-
elif os.getenv("MS_SIMULATION_LEVEL"):
|
|
132
|
-
# If simulation level env is set, load RANK_ID and RANK_SIZE envs.
|
|
133
|
-
self.worker_num = 1
|
|
134
|
-
self.local_worker_num = 1
|
|
135
|
-
self.is_simulation = True
|
|
136
|
-
self.sim_rank_id = os.getenv("RANK_ID", "0")
|
|
137
|
-
if os.getenv("RANK_SIZE"):
|
|
138
|
-
self.exported_rank_size = os.getenv("RANK_SIZE")
|
|
139
130
|
|
|
140
131
|
self.cmd = args.task_script
|
|
141
132
|
self.cmd_args = args.task_script_args
|
|
@@ -161,10 +152,6 @@ class _ProcessManager:
|
|
|
161
152
|
|
|
162
153
|
"""
|
|
163
154
|
os.environ["RANK_SIZE"] = str(self.exported_rank_size)
|
|
164
|
-
if self.rank_table_file != "":
|
|
165
|
-
os.environ["RANK_TABLE_FILE"] = self.rank_table_file
|
|
166
|
-
logger.warning(f"msrun launching distributed job with user configured rank table file path:"
|
|
167
|
-
f"{self.rank_table_file}")
|
|
168
155
|
if self.is_scale:
|
|
169
156
|
response_message = _send_scale_num(self.scheduler_url, self.scale_num)
|
|
170
157
|
is_first_manager = response_message
|
|
@@ -231,8 +218,8 @@ class _ProcessManager:
|
|
|
231
218
|
raise RuntimeError("Fail to get cpu number from /proc/cpuinfo.")
|
|
232
219
|
if self.bind_core:
|
|
233
220
|
avg = int(cpu_num) // self.local_worker_num
|
|
234
|
-
cpu_start = avg * i
|
|
235
|
-
cpu_end =
|
|
221
|
+
cpu_start = avg * i + 1
|
|
222
|
+
cpu_end = avg * (i + 1)
|
|
236
223
|
cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, cpu_start, cpu_end)
|
|
237
224
|
else:
|
|
238
225
|
cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
|
|
@@ -328,17 +315,31 @@ class _ProcessManager:
|
|
|
328
315
|
"""
|
|
329
316
|
scheduler_log_path = os.path.join(self.log_dir, "scheduler.log")
|
|
330
317
|
time_out_node_ids = []
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
318
|
+
with open(scheduler_log_path, "r") as log:
|
|
319
|
+
scheduler_log = log.read()
|
|
320
|
+
# Filter out abnormal logs.
|
|
321
|
+
time_out_node_log = re.findall(r"node: .* is timed out", scheduler_log)
|
|
322
|
+
|
|
323
|
+
# Filter out node ids of the processes which exit abnormally.
|
|
324
|
+
def node_id_splitter(id):
|
|
325
|
+
return re.split(" is timed out", re.split("node: ", id)[1])[0]
|
|
326
|
+
for id in time_out_node_log:
|
|
327
|
+
time_out_node_ids.append(node_id_splitter(id))
|
|
328
|
+
|
|
329
|
+
# If 'time_out_node_ids' is not empty, only analyze logs of these time out nodes.
|
|
330
|
+
# Unless get the error logs of all workers.
|
|
331
|
+
if time_out_node_ids:
|
|
332
|
+
os.system(f"cat {scheduler_log_path}|grep -E 'ERROR|CRITICAL|Traceback|Error' -C 5")
|
|
342
333
|
logger.error(f"Time out nodes are {time_out_node_ids}")
|
|
343
|
-
|
|
344
|
-
|
|
334
|
+
# Get the logs which have these timeout node ids.
|
|
335
|
+
def grepper(id):
|
|
336
|
+
return subprocess.getoutput(f"grep -rn 'This node {id}' {self.log_dir}"" | awk -F: '{print $1}'")
|
|
337
|
+
log_names = []
|
|
338
|
+
for id in time_out_node_ids:
|
|
339
|
+
log_names.append(grepper(id))
|
|
340
|
+
for log in log_names:
|
|
341
|
+
logger.error(f"cat log {log} error info and tail log:"
|
|
342
|
+
"==========================")
|
|
343
|
+
os.system(f"cat {log}|grep -E 'ERROR|CRITICAL|Traceback|Error' -C 5")
|
|
344
|
+
else:
|
|
345
|
+
os.system(f"grep -rn -E 'ERROR|CRITICAL|Traceback|Error' -C 5 {self.log_dir}")
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
"""Utils for ms_run"""
|
|
16
16
|
import os
|
|
17
17
|
import json
|
|
18
|
-
import socket
|
|
19
18
|
import requests
|
|
20
19
|
import mindspore.log as logger
|
|
21
20
|
|
|
@@ -37,7 +36,7 @@ def _generate_cmd_args_list(cmd, cmd_args):
|
|
|
37
36
|
"""
|
|
38
37
|
Generates arguments list for 'Popen'. It consists of a binary file name and subsequential arguments.
|
|
39
38
|
"""
|
|
40
|
-
if cmd not in ['python', 'pytest'
|
|
39
|
+
if cmd not in ['python', 'pytest']:
|
|
41
40
|
# If user don't set binary file name, defaulty use 'python' to launch the job.
|
|
42
41
|
return ['python'] + [cmd] + cmd_args
|
|
43
42
|
return [cmd] + cmd_args
|
|
@@ -50,7 +49,7 @@ def _generate_cmd_args_list_with_core(cmd, cmd_args, cpu_start, cpu_end):
|
|
|
50
49
|
# Bind cpu cores to this process.
|
|
51
50
|
taskset_args = ['taskset'] + ['-c'] + [str(cpu_start) + '-' + str(cpu_end)]
|
|
52
51
|
final_cmd = []
|
|
53
|
-
if cmd not in ['python', 'pytest'
|
|
52
|
+
if cmd not in ['python', 'pytest']:
|
|
54
53
|
# If user don't set binary file name, defaulty use 'python' to launch the job.
|
|
55
54
|
final_cmd = taskset_args + ['python'] + [cmd] + cmd_args
|
|
56
55
|
else:
|
|
@@ -76,20 +75,11 @@ def _is_local_ip(ip_address):
|
|
|
76
75
|
p = os.popen("ip -j addr")
|
|
77
76
|
addr_info_str = p.read()
|
|
78
77
|
p.close()
|
|
79
|
-
if not addr_info_str:
|
|
80
|
-
# This means this host has no "ip -j addr" command.
|
|
81
|
-
# We use socket module to get local ip address.
|
|
82
|
-
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
83
|
-
s.connect((ip_address, 0))
|
|
84
|
-
current_ip = s.getsockname()[0]
|
|
85
|
-
s.close()
|
|
86
|
-
return current_ip == ip_address
|
|
87
|
-
|
|
88
78
|
addr_infos = json.loads(addr_info_str)
|
|
89
79
|
for info in addr_infos:
|
|
90
80
|
for addr in info["addr_info"]:
|
|
91
81
|
if addr["local"] == ip_address:
|
|
92
|
-
logger.info(f"IP address found on this node. Address info:{addr}.
|
|
82
|
+
logger.info(f"IP address found on this node. Address info:{addr}.")
|
|
93
83
|
return True
|
|
94
84
|
return False
|
|
95
85
|
|
|
@@ -24,37 +24,47 @@ def get_args():
|
|
|
24
24
|
"""
|
|
25
25
|
parser = ArgumentParser()
|
|
26
26
|
parser.add_argument(
|
|
27
|
-
"--worker_num",
|
|
27
|
+
"--worker_num",
|
|
28
|
+
type=int,
|
|
29
|
+
default=8,
|
|
28
30
|
help="the total number of nodes participating in the training, an integer variable, "
|
|
29
31
|
"with a default value of 8."
|
|
30
32
|
)
|
|
31
33
|
parser.add_argument(
|
|
32
34
|
"--local_worker_num",
|
|
33
|
-
type=int,
|
|
35
|
+
type=int,
|
|
36
|
+
default=8,
|
|
34
37
|
help="the number of nodes participating in local training, an integer variable, "
|
|
35
38
|
"with a default value of 8."
|
|
36
39
|
)
|
|
37
40
|
parser.add_argument(
|
|
38
41
|
"--master_addr",
|
|
39
|
-
default="127.0.0.1",
|
|
42
|
+
default="127.0.0.1",
|
|
43
|
+
type=str,
|
|
40
44
|
help="specifies the IP address of the scheduler and its data type is string."
|
|
41
45
|
" Allowed values: valid IP addresses."
|
|
42
46
|
)
|
|
43
47
|
parser.add_argument(
|
|
44
|
-
"--master_port",
|
|
48
|
+
"--master_port",
|
|
49
|
+
default=8118,
|
|
50
|
+
type=int,
|
|
45
51
|
help="specifies the port number of the scheduler, and its data type is integer."
|
|
46
52
|
" Allowed values: port numbers within the range of 1024 to 65535 that are not "
|
|
47
53
|
"already in use."
|
|
48
54
|
)
|
|
49
55
|
parser.add_argument(
|
|
50
|
-
"--node_rank",
|
|
56
|
+
"--node_rank",
|
|
57
|
+
default=-1,
|
|
58
|
+
type=int,
|
|
51
59
|
help="specifies the rank of current physical node, and its data type is integer."
|
|
52
60
|
" This parameter is used for rank id assignment for each process on the node."
|
|
53
61
|
" If not set, MindSpore will assign rank ids automatically and"
|
|
54
62
|
" rank id of each process on the same node will be continuous."
|
|
55
63
|
)
|
|
56
64
|
parser.add_argument(
|
|
57
|
-
"--log_dir",
|
|
65
|
+
"--log_dir",
|
|
66
|
+
default="",
|
|
67
|
+
type=str,
|
|
58
68
|
help="specifies the log output file path."
|
|
59
69
|
)
|
|
60
70
|
parser.add_argument(
|
|
@@ -95,13 +105,6 @@ def get_args():
|
|
|
95
105
|
type=int,
|
|
96
106
|
help="specifies simulation process's rank id. Only one process is spawned in simulation scenario."
|
|
97
107
|
)
|
|
98
|
-
parser.add_argument(
|
|
99
|
-
"--rank_table_file",
|
|
100
|
-
default="",
|
|
101
|
-
type=str,
|
|
102
|
-
help="specifies rank table file path. This path is not used to initialize distributed job in "
|
|
103
|
-
"'rank table file manner' but to help support other features."
|
|
104
|
-
)
|
|
105
108
|
parser.add_argument(
|
|
106
109
|
"task_script",
|
|
107
110
|
type=str,
|
|
@@ -131,7 +131,7 @@ def parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
|
|
|
131
131
|
param_redundancy_reversed.setdefault(item, []).append(key)
|
|
132
132
|
if not param_redundancy_reversed:
|
|
133
133
|
return
|
|
134
|
-
if cur_rank not in single_params:
|
|
134
|
+
if not cur_rank not in single_params:
|
|
135
135
|
return
|
|
136
136
|
net_param_dict = net.parameters_dict()
|
|
137
137
|
ms.set_auto_parallel_context(parallel_mode="hybrid_parallel")
|
|
@@ -140,7 +140,7 @@ def parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
|
|
|
140
140
|
allreduce_input = []
|
|
141
141
|
for param in params:
|
|
142
142
|
if param not in net_param_dict:
|
|
143
|
-
raise ValueError(
|
|
143
|
+
raise ValueError("For parameter broadcast, the param: {param} can not be found.")
|
|
144
144
|
real_param = net_param_dict[param]
|
|
145
145
|
if param not in single_params[cur_rank]:
|
|
146
146
|
real_param.set_data(Tensor(np.zeros(real_param.shape), dtype=real_param.dtype))
|