mindspore 2.6.0rc1__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +65 -84
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +58 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +178 -53
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +377 -203
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +117 -131
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +67 -55
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +70 -24
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +429 -23
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +140 -104
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +491 -623
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +4 -6
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +54 -13
- mindspore/ops/auto_generate/gen_extend_func.py +27 -145
- mindspore/ops/auto_generate/gen_ops_def.py +1027 -347
- mindspore/ops/auto_generate/gen_ops_prim.py +2341 -1117
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +19 -102
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +77 -572
- mindspore/ops/function/nn_func.py +46 -94
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +4 -4
- mindspore/ops/functional_overload.py +594 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +5 -51
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +303 -177
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +8 -40
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +63 -15
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +16 -23
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +35 -14
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +13 -7
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +12 -12
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +10 -25
- mindspore/parallel/transform_safetensors.py +469 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +31 -32
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +17 -7
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +98 -21
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +133 -69
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/RECORD +403 -442
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""Profiler host information parser"""
|
|
16
|
-
import os
|
|
17
|
-
import json
|
|
18
|
-
from decimal import Decimal
|
|
19
|
-
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
20
|
-
from mindspore.profiler.parser.ascend_analysis.constant import Constant
|
|
21
|
-
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class GPUProfilerInfoParser:
|
|
25
|
-
"""Parse files that record information, such as profiler_info.json"""
|
|
26
|
-
|
|
27
|
-
_freq = 2600000000
|
|
28
|
-
_system_time = 0
|
|
29
|
-
_system_cnt = 0
|
|
30
|
-
_s_to_ns = 1e9
|
|
31
|
-
# profiler information related files
|
|
32
|
-
_source_path = None
|
|
33
|
-
_loaded_frequency = False
|
|
34
|
-
_rank_id = 0
|
|
35
|
-
|
|
36
|
-
@classmethod
|
|
37
|
-
def init_source_path(cls, source_path: str):
|
|
38
|
-
"""initialize the path of PROF_* directory."""
|
|
39
|
-
source_path = validate_and_normalize_path(source_path)
|
|
40
|
-
cls._source_path = source_path
|
|
41
|
-
|
|
42
|
-
@classmethod
|
|
43
|
-
def init_rank_id(cls, rank_id: int):
|
|
44
|
-
"""initialize the rank id."""
|
|
45
|
-
cls._rank_id = rank_id
|
|
46
|
-
|
|
47
|
-
@classmethod
|
|
48
|
-
def get_local_time(cls, syscnt: int) -> Decimal:
|
|
49
|
-
"""Convert syscnt to local time."""
|
|
50
|
-
if not cls._loaded_frequency:
|
|
51
|
-
profiler_info_path = os.path.join(cls._source_path, f"profiler_info_{cls._rank_id}.json")
|
|
52
|
-
if not os.path.isfile(profiler_info_path):
|
|
53
|
-
raise RuntimeError(f"Can`t find the file {profiler_info_path}, please check !")
|
|
54
|
-
with os.fdopen(os.open(profiler_info_path, os.O_RDONLY, 0o600),
|
|
55
|
-
'r') as fr:
|
|
56
|
-
profiler_info_data = json.load(fr)
|
|
57
|
-
cls._system_cnt = profiler_info_data.get('system_cnt')
|
|
58
|
-
cls._system_time = profiler_info_data.get('system_time')
|
|
59
|
-
ProfilerInfo.set_system_time(cls._system_cnt)
|
|
60
|
-
ProfilerInfo.set_system_cnt(cls._system_time)
|
|
61
|
-
cls._loaded_frequency = True
|
|
62
|
-
|
|
63
|
-
start_ns = cls._get_timestamp(syscnt)
|
|
64
|
-
return Decimal(start_ns).quantize(Decimal('0.000')) * Decimal(Constant.NS_TO_US).quantize(Decimal('0.000'))
|
|
65
|
-
|
|
66
|
-
@classmethod
|
|
67
|
-
def _get_timestamp(cls, syscnt: int):
|
|
68
|
-
"""Convert syscnt to time stamp."""
|
|
69
|
-
ratio = cls._freq / cls._s_to_ns
|
|
70
|
-
# The unit of timestamp is ns
|
|
71
|
-
timestamp = (syscnt - cls._system_cnt) / ratio + cls._system_time
|
|
72
|
-
return timestamp
|
|
@@ -1,573 +0,0 @@
|
|
|
1
|
-
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""The parser for parsing hccl files."""
|
|
16
|
-
import csv
|
|
17
|
-
import json
|
|
18
|
-
import os
|
|
19
|
-
import stat
|
|
20
|
-
from enum import Enum
|
|
21
|
-
import numpy as np
|
|
22
|
-
|
|
23
|
-
from mindspore.profiler.common.exceptions.exceptions import \
|
|
24
|
-
ProfilerPathErrorException, ProfilerFileNotFoundException, \
|
|
25
|
-
ProfilerDirNotFoundException, ProfilerRawFileException
|
|
26
|
-
from mindspore import log as logger
|
|
27
|
-
from mindspore.profiler.common.validator.validate_path import \
|
|
28
|
-
validate_and_normalize_path
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class CommunicationInfo(Enum):
|
|
32
|
-
"""
|
|
33
|
-
Communication related enumeration types.
|
|
34
|
-
|
|
35
|
-
Enum:
|
|
36
|
-
RDMA: Communication link between servers in cluster training.
|
|
37
|
-
SDMA: Communication link inside server in cluster training.
|
|
38
|
-
LOCAL: The operation of this card has no transmission process.
|
|
39
|
-
RDMASEND: Communication operator of RDMA link.
|
|
40
|
-
REDUCE_INLINE: Communication operator of SDMA link.
|
|
41
|
-
MEMCPY: Communication operator of SDMA link.
|
|
42
|
-
NOTIFY_RECORD: Communication operator of SDMA link.
|
|
43
|
-
NOTIFY_WAIT: operator of LOCAL.
|
|
44
|
-
"""
|
|
45
|
-
RDMA = 'RDMA'
|
|
46
|
-
SDMA = 'SDMA'
|
|
47
|
-
LOCAL = 'LOCAL'
|
|
48
|
-
RDMASEND = 'RDMASend'
|
|
49
|
-
REDUCE_INLINE = 'Reduce Inline'
|
|
50
|
-
MEMCPY = 'Memcpy'
|
|
51
|
-
NOTIFY_RECORD = 'Notify Record'
|
|
52
|
-
NOTIFY_WAIT = 'Notify Wait'
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class HcclParser:
|
|
56
|
-
"""
|
|
57
|
-
The parser for parsing hccl file.
|
|
58
|
-
|
|
59
|
-
Args:
|
|
60
|
-
source_dir (str): The hccl source dir.
|
|
61
|
-
device_id (str): The device ID.
|
|
62
|
-
rank_id (str): The rank ID.
|
|
63
|
-
output_path (str): The directory of the parsed file. Default: `./`.
|
|
64
|
-
|
|
65
|
-
Raises:
|
|
66
|
-
ProfilerPathErrorException: If the hccl file path or the output path is invalid.
|
|
67
|
-
ProfilerFileNotFoundException: If the hccl file or the output dir does not exist.
|
|
68
|
-
"""
|
|
69
|
-
_parsed_hccl_file_name = 'hccl_raw_{}.csv'
|
|
70
|
-
_col_names = ['step_num', 'communication_cost', 'wait_cost', 'link_info', 'communication_operator_cost']
|
|
71
|
-
|
|
72
|
-
def __init__(self, source_dir, device_id, rank_id, output_path):
|
|
73
|
-
self._dev_id = device_id
|
|
74
|
-
self._rank_id = rank_id
|
|
75
|
-
self._source_dir = source_dir
|
|
76
|
-
self._save_path = self._get_save_path(output_path)
|
|
77
|
-
self._step_trace_info = self._get_step_trace_info(output_path)
|
|
78
|
-
self._communication_operator_name_mapping_info = self._get_communication_operator_name_mapping_info()
|
|
79
|
-
|
|
80
|
-
@staticmethod
|
|
81
|
-
def _divide_communication_info_by_thread(trace_events: list):
|
|
82
|
-
"""Divide information by thread."""
|
|
83
|
-
threads_dict = dict()
|
|
84
|
-
for item in trace_events:
|
|
85
|
-
thread_id = item.get("tid")
|
|
86
|
-
if thread_id not in threads_dict.keys():
|
|
87
|
-
threads_dict[thread_id] = [item]
|
|
88
|
-
else:
|
|
89
|
-
threads_dict[thread_id].append(item)
|
|
90
|
-
return threads_dict
|
|
91
|
-
|
|
92
|
-
@staticmethod
|
|
93
|
-
def _calculate_adma_link_info(trace_event: list):
|
|
94
|
-
"""
|
|
95
|
-
Calculate RDMA link info.
|
|
96
|
-
|
|
97
|
-
When the link is RDMA,it is necessary to match three consecutive operators RDMASend, RDMASend \
|
|
98
|
-
and Notify Wait,and take the sum of the time of the three operators as one communication time.
|
|
99
|
-
"""
|
|
100
|
-
rdma_communication_time = 0
|
|
101
|
-
rdma_communication_size = 0
|
|
102
|
-
rdma_communication_wait_time = 0
|
|
103
|
-
start_index = 0
|
|
104
|
-
end_index = len(trace_event) - 1
|
|
105
|
-
while start_index < end_index:
|
|
106
|
-
first_task_type = trace_event[start_index].get("args").get("task type")
|
|
107
|
-
if first_task_type == CommunicationInfo.RDMASEND.value and start_index < end_index - 1:
|
|
108
|
-
second_task_type = trace_event[start_index + 1].get("args").get("task type")
|
|
109
|
-
third_task_type = trace_event[start_index + 2].get("args").get("task type")
|
|
110
|
-
if second_task_type == CommunicationInfo.RDMASEND.value and \
|
|
111
|
-
third_task_type == CommunicationInfo.NOTIFY_WAIT.value:
|
|
112
|
-
rdma_send_cost = trace_event[start_index].get("dur", 0)
|
|
113
|
-
notify_record_cost = trace_event[start_index + 1].get("dur", 0)
|
|
114
|
-
notify_wait_cost = trace_event[start_index + 2].get("dur", 0)
|
|
115
|
-
rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
|
|
116
|
-
rdma_communication_wait_time += notify_wait_cost
|
|
117
|
-
rdma_size = trace_event[start_index].get("args").get("size")
|
|
118
|
-
if rdma_size:
|
|
119
|
-
rdma_size = rdma_size if isinstance(rdma_size, int) else int(rdma_size, 16)
|
|
120
|
-
else:
|
|
121
|
-
rdma_size = 0
|
|
122
|
-
notify_record_size = trace_event[start_index + 1].get("args").get("size")
|
|
123
|
-
if notify_record_size:
|
|
124
|
-
notify_record_size = notify_record_size if isinstance(notify_record_size, int) \
|
|
125
|
-
else int(notify_record_size, 16)
|
|
126
|
-
else:
|
|
127
|
-
notify_record_size = 0
|
|
128
|
-
rdma_communication_size += rdma_size + notify_record_size
|
|
129
|
-
start_index += 2
|
|
130
|
-
start_index += 1
|
|
131
|
-
|
|
132
|
-
# The unit of rdma_communication_wait_time is ms.
|
|
133
|
-
# The unit of rdma_bandwidth is KB/s.
|
|
134
|
-
# The unit of rdma_communication_size is k_byte and The unit of rdma_communication_time is ms.
|
|
135
|
-
rdma_communication_wait_time = rdma_communication_wait_time / 1e3
|
|
136
|
-
rdma_communication_size = rdma_communication_size / 1e3
|
|
137
|
-
rdma_communication_time = rdma_communication_time / 1e3
|
|
138
|
-
rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
|
|
139
|
-
if rdma_communication_size else 0
|
|
140
|
-
|
|
141
|
-
return [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time]
|
|
142
|
-
|
|
143
|
-
@staticmethod
|
|
144
|
-
def _calculate_notify_wait_time(trace_event: list):
|
|
145
|
-
"""Calculate notify wait time."""
|
|
146
|
-
total_notify_wait_time = 0
|
|
147
|
-
for item in trace_event:
|
|
148
|
-
task_type = item.get("args").get("task type")
|
|
149
|
-
if task_type == CommunicationInfo.NOTIFY_WAIT.value:
|
|
150
|
-
total_notify_wait_time += item.get("dur", 0)
|
|
151
|
-
# The unit of total_notify_wait_time is ms.
|
|
152
|
-
total_notify_wait_time = total_notify_wait_time / 1e3
|
|
153
|
-
return total_notify_wait_time
|
|
154
|
-
|
|
155
|
-
@staticmethod
|
|
156
|
-
def _parser_link_dict(result_dict, src_dst_key, src_dst_value):
|
|
157
|
-
"""Parser link info to dict."""
|
|
158
|
-
if src_dst_key not in result_dict.keys():
|
|
159
|
-
result_dict[src_dst_key] = dict()
|
|
160
|
-
for link_key, link_value in src_dst_value.items():
|
|
161
|
-
if link_key not in result_dict[src_dst_key].keys():
|
|
162
|
-
result_dict[src_dst_key][link_key] = list()
|
|
163
|
-
result_dict[src_dst_key][link_key].append(link_value)
|
|
164
|
-
|
|
165
|
-
@staticmethod
|
|
166
|
-
def _calculate_link_value(link_info: list, calculate_type):
|
|
167
|
-
"""Calculate link average or total value."""
|
|
168
|
-
result_dict = dict()
|
|
169
|
-
for item in link_info:
|
|
170
|
-
for src_dst_key, src_dst_value in item.items():
|
|
171
|
-
HcclParser._parser_link_dict(result_dict, src_dst_key, src_dst_value)
|
|
172
|
-
for src_dst_key, src_dst_value in result_dict.items():
|
|
173
|
-
for link_key, _ in src_dst_value.items():
|
|
174
|
-
if calculate_type == 'average':
|
|
175
|
-
result_dict[src_dst_key][link_key] = np.mean(result_dict[src_dst_key][link_key], axis=0).tolist()
|
|
176
|
-
if calculate_type == 'total':
|
|
177
|
-
result_dict[src_dst_key][link_key] = np.sum(result_dict[src_dst_key][link_key], axis=0).tolist()
|
|
178
|
-
|
|
179
|
-
return result_dict
|
|
180
|
-
|
|
181
|
-
def parse(self):
|
|
182
|
-
"""Parse communication info."""
|
|
183
|
-
self._parse_and_save(self._source_dir)
|
|
184
|
-
|
|
185
|
-
def _parse_communication_cost(self, operators_cost_info, info, operators_dict):
|
|
186
|
-
"""Parse communication cost."""
|
|
187
|
-
for k, v in operators_cost_info.items():
|
|
188
|
-
for item in v:
|
|
189
|
-
# index0:step_num
|
|
190
|
-
if info[0] == item[0]:
|
|
191
|
-
operators_dict[k] = item
|
|
192
|
-
|
|
193
|
-
def _parse_and_save(self, dir_path):
|
|
194
|
-
"""Parse and save communication info."""
|
|
195
|
-
communication_info_cache = list()
|
|
196
|
-
operators_cost_info = self._get_communication_operators_cost_info(dir_path)
|
|
197
|
-
for _, v in operators_cost_info.items():
|
|
198
|
-
for item in v:
|
|
199
|
-
communication_info_cache.append(item)
|
|
200
|
-
communication_info_cache = self._merge_communication_info_by_step_num(communication_info_cache)
|
|
201
|
-
for info in communication_info_cache:
|
|
202
|
-
operators_dict = dict()
|
|
203
|
-
self._parse_communication_cost(operators_cost_info, info, operators_dict)
|
|
204
|
-
info.append(operators_dict)
|
|
205
|
-
# Calculate device communication average.
|
|
206
|
-
device_communication_average_value = self._calculate_communication_average_value(communication_info_cache)
|
|
207
|
-
# Calculate operator communication average.
|
|
208
|
-
operators_average_value = dict()
|
|
209
|
-
for k, v in operators_cost_info.items():
|
|
210
|
-
average_value = self._calculate_communication_average_value(v)
|
|
211
|
-
# The symbol '-' is used to indicate that the line is average information.
|
|
212
|
-
average_value.insert(0, '-')
|
|
213
|
-
operators_average_value[k] = average_value
|
|
214
|
-
device_communication_average_value.append(operators_average_value)
|
|
215
|
-
# The symbol '-' is used to indicate that the line is average information.
|
|
216
|
-
device_communication_average_value.insert(0, '-')
|
|
217
|
-
with open(self._save_path, 'w', newline='') as save_file:
|
|
218
|
-
csv_writer = csv.writer(save_file)
|
|
219
|
-
csv_writer.writerow(self._col_names)
|
|
220
|
-
for item in communication_info_cache:
|
|
221
|
-
# item[3]:link_info which is a dictionary that needs to be encoded before it is written to a CSV file.
|
|
222
|
-
# item[4]:it is a dictionary that needs to be encoded before it is written to a CSV file.
|
|
223
|
-
item[3] = json.dumps(item[3])
|
|
224
|
-
item[4] = json.dumps(item[4])
|
|
225
|
-
csv_writer.writerow(item)
|
|
226
|
-
# device_communication_average_value[3]: average value for link info
|
|
227
|
-
# device_communication_average_value[4]: average value for operator info
|
|
228
|
-
device_communication_average_value[3] = json.dumps(device_communication_average_value[3])
|
|
229
|
-
device_communication_average_value[4] = json.dumps(device_communication_average_value[4])
|
|
230
|
-
|
|
231
|
-
csv_writer.writerow(device_communication_average_value)
|
|
232
|
-
os.chmod(self._save_path, stat.S_IREAD | stat.S_IWRITE)
|
|
233
|
-
|
|
234
|
-
def _get_save_path(self, output_path):
|
|
235
|
-
"""
|
|
236
|
-
Get the save path.
|
|
237
|
-
|
|
238
|
-
Args:
|
|
239
|
-
output_path (str): The output dir.
|
|
240
|
-
|
|
241
|
-
Returns:
|
|
242
|
-
str, the save path.
|
|
243
|
-
"""
|
|
244
|
-
output_path = self._validate_dir_path(output_path)
|
|
245
|
-
return os.path.join(
|
|
246
|
-
output_path, self._parsed_hccl_file_name.format(self._rank_id)
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
def _get_step_trace_info(self, source_dir):
|
|
250
|
-
"""Get the start and end timestamps in a step and communication operators names."""
|
|
251
|
-
file_path = os.path.join(
|
|
252
|
-
source_dir,
|
|
253
|
-
f'step_trace_raw_{self._rank_id}_detail_time.csv'
|
|
254
|
-
)
|
|
255
|
-
try:
|
|
256
|
-
file_path = validate_and_normalize_path(file_path)
|
|
257
|
-
except RuntimeError as err:
|
|
258
|
-
logger.warning('file path is invalid.')
|
|
259
|
-
raise ProfilerPathErrorException('file path is invalid.') from err
|
|
260
|
-
if not os.path.isfile(file_path):
|
|
261
|
-
logger.warning('The step trace file <%s> not found.', file_path)
|
|
262
|
-
raise ProfilerFileNotFoundException(file_path)
|
|
263
|
-
|
|
264
|
-
with open(file_path, 'r') as src_file:
|
|
265
|
-
csv_reader = csv.reader(src_file)
|
|
266
|
-
# The first row of step trace file is like: step_num, start_point,...,communication_operator_name.
|
|
267
|
-
# The position number of the first communication operator name is 9.
|
|
268
|
-
communication_operators_names = next(csv_reader)[9:]
|
|
269
|
-
|
|
270
|
-
# index_0:step_num, index_1:start_point, index_2:end_point
|
|
271
|
-
# The unit of time stamp is 10ns. To convert it to μs, you need to divide it by 100.
|
|
272
|
-
step_timestamps_info = [
|
|
273
|
-
[info[0], float(info[1]) / 100, float(info[2]) / 100]
|
|
274
|
-
for info in csv_reader if info[0].isdigit()
|
|
275
|
-
]
|
|
276
|
-
|
|
277
|
-
return [communication_operators_names, step_timestamps_info]
|
|
278
|
-
|
|
279
|
-
def _get_communication_operator_name_mapping_info(self):
|
|
280
|
-
"""Get the name of communication operators mapping between hccl and step trace."""
|
|
281
|
-
dir_path = self._validate_dir_path(self._source_dir)
|
|
282
|
-
# The name of the operator in hccl is like: operatorName_{Ordered_number}_xx_xx.
|
|
283
|
-
operators_names_in_hccl = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
|
|
284
|
-
operators_names_in_hccl_set = set({i.split('_')[0] for i in operators_names_in_hccl})
|
|
285
|
-
op_names_in_hccl_dic = dict()
|
|
286
|
-
for item in operators_names_in_hccl_set:
|
|
287
|
-
op_names_in_hccl_dic[item] = sorted([i for i in operators_names_in_hccl if i.split('_')[0] == item],
|
|
288
|
-
key=lambda x: int(x.split('_')[1]))
|
|
289
|
-
|
|
290
|
-
# The op_info in step trace is like: [op_name,op_name_start_point,op_name_end_point]
|
|
291
|
-
# The name of the operator in step trace can be obtained every three.
|
|
292
|
-
# The name of the operator in step trace is like: stream_xx_xx_operatorName-opxx.
|
|
293
|
-
operators_names_in_step_trace = [self._step_trace_info[0][i]
|
|
294
|
-
for i in range(0, len(self._step_trace_info[0]), 3)]
|
|
295
|
-
op_names_in_step_trace_set = set({op_name.split('/')[-1].split('-')[0].split('_')[-1]
|
|
296
|
-
for op_name in operators_names_in_step_trace})
|
|
297
|
-
op_names_in_step_trace_dic = dict()
|
|
298
|
-
for item in op_names_in_step_trace_set:
|
|
299
|
-
op_names_in_step_trace_dic[item] = [
|
|
300
|
-
op_name for op_name in operators_names_in_step_trace
|
|
301
|
-
if op_name.split('/')[-1].split('-')[0].split('_')[-1] == item
|
|
302
|
-
]
|
|
303
|
-
|
|
304
|
-
communication_operator_mapping_info = dict()
|
|
305
|
-
for hccl_key, hccl_value in op_names_in_hccl_dic.items():
|
|
306
|
-
for step_trace_key, step_trace_value in op_names_in_step_trace_dic.items():
|
|
307
|
-
# the step_trace_key format is: operatorName
|
|
308
|
-
if hccl_key.lower() == step_trace_key.lower().split('/')[-1]:
|
|
309
|
-
communication_operator_mapping_info[hccl_key] = list(zip(hccl_value, step_trace_value))
|
|
310
|
-
|
|
311
|
-
logger.info("Communication operator name mapping info is %s", communication_operator_mapping_info)
|
|
312
|
-
|
|
313
|
-
return communication_operator_mapping_info
|
|
314
|
-
|
|
315
|
-
def _calculate_the_step_by_timestamp(self, timestamp):
|
|
316
|
-
"""Calculate the step according to the timestamp."""
|
|
317
|
-
# index0:communication_operator_name, index1:step_timestamps_info
|
|
318
|
-
step_timestamps_info = self._step_trace_info[1]
|
|
319
|
-
step_timestamps_len = len(step_timestamps_info)
|
|
320
|
-
# index_0:step_num, index_1:start_point, index_2:end_point
|
|
321
|
-
if timestamp < step_timestamps_info[0][1]:
|
|
322
|
-
step_num = "1"
|
|
323
|
-
elif step_timestamps_info[step_timestamps_len - 1][2] < timestamp:
|
|
324
|
-
step_num = step_timestamps_info[step_timestamps_len - 1][0]
|
|
325
|
-
else:
|
|
326
|
-
for item in step_timestamps_info:
|
|
327
|
-
if item[1] <= timestamp < item[2]:
|
|
328
|
-
step_num = item[0]
|
|
329
|
-
return step_num
|
|
330
|
-
|
|
331
|
-
def _get_communication_operators_cost_info(self, dir_path):
|
|
332
|
-
"""Obtain time-consuming information of all communication operators."""
|
|
333
|
-
operators_cost_info = dict()
|
|
334
|
-
dir_path = self._validate_dir_path(dir_path)
|
|
335
|
-
operators_dir = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
|
|
336
|
-
operator_dir_path = [os.path.join(dir_path, operator_dir) for operator_dir in operators_dir]
|
|
337
|
-
for operator_dir in operator_dir_path:
|
|
338
|
-
operator_cost = self._calculate_communication_operator_cost(operator_dir)
|
|
339
|
-
operator_name = os.path.basename(operator_dir)
|
|
340
|
-
op_mapping_info = self._communication_operator_name_mapping_info.get(operator_name.split('_')[0], [])
|
|
341
|
-
# index1: operator name in step trace.
|
|
342
|
-
op_mapping_name = [item[1] for item in op_mapping_info if item[0] == operator_name]
|
|
343
|
-
if not op_mapping_name:
|
|
344
|
-
logger.warning("The mapping relationship between op name in hccl and op name in step trace "
|
|
345
|
-
"cannot be found. Use op name in hccl to show the name of the communication operator.")
|
|
346
|
-
else:
|
|
347
|
-
operator_name = op_mapping_name[0]
|
|
348
|
-
operators_cost_info[operator_name] = operator_cost
|
|
349
|
-
return operators_cost_info
|
|
350
|
-
|
|
351
|
-
def _calculate_communication_operator_cost(self, dir_path):
|
|
352
|
-
"""Calculate communication operator cost. Such as allReduce_1,allReduce_2."""
|
|
353
|
-
dir_path = self._validate_dir_path(dir_path)
|
|
354
|
-
files = [entry.name for entry in os.scandir(dir_path) if entry.is_file()]
|
|
355
|
-
files_path = [os.path.join(dir_path, file) for file in files]
|
|
356
|
-
operator_cost = list(map(self._calculate_communication_operator_iter_cost, files_path))
|
|
357
|
-
# Add the same step_num merge.
|
|
358
|
-
steps_operator_cost = self._merge_communication_info_by_step_num(operator_cost)
|
|
359
|
-
return steps_operator_cost
|
|
360
|
-
|
|
361
|
-
def _merge_communication_info_by_step_num(self, communication_info: list):
|
|
362
|
-
"""According to step num to merge communication info."""
|
|
363
|
-
steps_communication_info = list()
|
|
364
|
-
info_set = set()
|
|
365
|
-
for item in communication_info:
|
|
366
|
-
# index0:step_num,index1:communication_cost,index2:communication_wait_cost,index3:link_info
|
|
367
|
-
if item[0].isdigit():
|
|
368
|
-
info_set.add(int(item[0]))
|
|
369
|
-
info_set = sorted(info_set)
|
|
370
|
-
for item in info_set:
|
|
371
|
-
item = str(item)
|
|
372
|
-
step_communication_info = [info for info in communication_info if info[0] == item]
|
|
373
|
-
step_communication_cost = sum([i[1] for i in step_communication_info])
|
|
374
|
-
step_communication_wait_cost = sum([i[2] for i in step_communication_info])
|
|
375
|
-
step_communication_link = self._calculate_link_value([i[3] for i in step_communication_info], "total")
|
|
376
|
-
steps_communication_info.append([item, step_communication_cost,
|
|
377
|
-
step_communication_wait_cost, step_communication_link])
|
|
378
|
-
return steps_communication_info
|
|
379
|
-
|
|
380
|
-
def _calculate_communication_operator_iter_cost(self, file_path):
|
|
381
|
-
"""Calculate the time-consuming of communication operator in one execution round."""
|
|
382
|
-
|
|
383
|
-
def _inner_calculate_communication_operator_iter_cost(events):
|
|
384
|
-
total_notify_wait = HcclParser._calculate_notify_wait_time(events)
|
|
385
|
-
# Divide information by src dst rank_id.
|
|
386
|
-
src_dst_dict = self._divide_communication_info_by_src_dst_rank(events)
|
|
387
|
-
src_dst_link_info = self._calculate_src_dst_link_info(src_dst_dict)
|
|
388
|
-
communication_cost, communication_wait = self._calculate_device_communication_cost(src_dst_link_info)
|
|
389
|
-
total_notify_wait -= communication_wait
|
|
390
|
-
return [communication_cost, total_notify_wait, src_dst_link_info]
|
|
391
|
-
|
|
392
|
-
file_path = self._validate_file_path(file_path)
|
|
393
|
-
with open(file_path, 'r') as src_file:
|
|
394
|
-
try:
|
|
395
|
-
operator_info = json.load(src_file)
|
|
396
|
-
except (json.JSONDecodeError, TypeError) as err:
|
|
397
|
-
logger.warning(err)
|
|
398
|
-
raise ProfilerRawFileException('Fail to parse operator file.') from err
|
|
399
|
-
trace_events = operator_info.get("traceEvents")
|
|
400
|
-
operator_timestamp = trace_events[0].get("ts", 0)
|
|
401
|
-
step_id = self._calculate_the_step_by_timestamp(operator_timestamp)
|
|
402
|
-
# Statistics of communication operators in all streams.
|
|
403
|
-
total_communication_operator_iter_cost = \
|
|
404
|
-
_inner_calculate_communication_operator_iter_cost(trace_events)
|
|
405
|
-
# Statistics of communication operators in mainstream.
|
|
406
|
-
threads_dict = self._divide_communication_info_by_thread(trace_events)
|
|
407
|
-
# The largest value is mainstream.
|
|
408
|
-
major_thread = sorted(threads_dict, reverse=True)[0]
|
|
409
|
-
major_thread_trace_events = threads_dict.get(major_thread)
|
|
410
|
-
mainstream_communication_operator_iter_cost = \
|
|
411
|
-
_inner_calculate_communication_operator_iter_cost(major_thread_trace_events)
|
|
412
|
-
# index0:communication_cost,index1:communication_wait_cost,index2:link_info
|
|
413
|
-
return [step_id, mainstream_communication_operator_iter_cost[0],
|
|
414
|
-
mainstream_communication_operator_iter_cost[1],
|
|
415
|
-
total_communication_operator_iter_cost[2]]
|
|
416
|
-
|
|
417
|
-
def _divide_communication_info_by_src_dst_rank(self, trace_event: list):
|
|
418
|
-
"""Divide information by src rank id and dst rank id"""
|
|
419
|
-
src_dst_dict = dict()
|
|
420
|
-
for item in trace_event:
|
|
421
|
-
src_rank = item.get("args").get("src rank")
|
|
422
|
-
dst_rank = item.get("args").get("dst rank")
|
|
423
|
-
if src_rank is None or dst_rank is None:
|
|
424
|
-
continue
|
|
425
|
-
|
|
426
|
-
# When the SDMA operation is in the card,
|
|
427
|
-
# the source card or destination card is 0xffffffff, and it needs to be converted to localrank.
|
|
428
|
-
if int(src_rank) == int('0xffffffff', 16):
|
|
429
|
-
src_rank = dst_rank
|
|
430
|
-
|
|
431
|
-
if int(dst_rank) == int('0xffffffff', 16):
|
|
432
|
-
dst_rank = src_rank
|
|
433
|
-
|
|
434
|
-
if item.get("args").get("transport type") == CommunicationInfo.LOCAL.value:
|
|
435
|
-
item["args"]["src rank"] = dst_rank
|
|
436
|
-
item["args"]["dst rank"] = src_rank
|
|
437
|
-
src_dst_key = str(dst_rank) + '-' + str(src_rank)
|
|
438
|
-
else:
|
|
439
|
-
src_dst_key = str(src_rank) + '-' + str(dst_rank)
|
|
440
|
-
|
|
441
|
-
if src_dst_key not in src_dst_dict.keys():
|
|
442
|
-
src_dst_dict[src_dst_key] = [item]
|
|
443
|
-
else:
|
|
444
|
-
src_dst_dict[src_dst_key].append(item)
|
|
445
|
-
return src_dst_dict
|
|
446
|
-
|
|
447
|
-
def _divide_communication_info_by_link_type(self, trace_event: list):
|
|
448
|
-
"""Divide information by link type."""
|
|
449
|
-
link_type_dict = dict()
|
|
450
|
-
for item in trace_event:
|
|
451
|
-
link_type_key = item.get("args").get("transport type")
|
|
452
|
-
if link_type_key is None:
|
|
453
|
-
continue
|
|
454
|
-
if link_type_key in (CommunicationInfo.RDMA.value, CommunicationInfo.SDMA.value):
|
|
455
|
-
task_type = item.get("args").get("task type")
|
|
456
|
-
# Filter out the Notify Record operator in SDMA, because it does not transmit the actual amount of data.
|
|
457
|
-
if task_type == CommunicationInfo.NOTIFY_RECORD.value:
|
|
458
|
-
continue
|
|
459
|
-
if link_type_dict.get(link_type_key):
|
|
460
|
-
link_type_dict[link_type_key].append(item)
|
|
461
|
-
else:
|
|
462
|
-
link_type_dict[link_type_key] = [item]
|
|
463
|
-
if link_type_key == CommunicationInfo.LOCAL.value:
|
|
464
|
-
if link_type_dict.get(CommunicationInfo.RDMA.value):
|
|
465
|
-
link_type_dict[CommunicationInfo.RDMA.value].append(item)
|
|
466
|
-
return link_type_dict
|
|
467
|
-
|
|
468
|
-
def _calculate_device_communication_cost(self, src_dst_link_info: dict):
|
|
469
|
-
"""Calculate notify wait time."""
|
|
470
|
-
total_communication_time = 0
|
|
471
|
-
total_wait_time = 0
|
|
472
|
-
for src_dst_value in src_dst_link_info.values():
|
|
473
|
-
for link_type_value in src_dst_value.values():
|
|
474
|
-
# time_cost:0,size_cost:1,brand_width:2,wait_time:3
|
|
475
|
-
total_communication_time += link_type_value[0]
|
|
476
|
-
if len(link_type_value) > 3:
|
|
477
|
-
total_wait_time += link_type_value[3]
|
|
478
|
-
return total_communication_time, total_wait_time
|
|
479
|
-
|
|
480
|
-
def _parse_link_cost(self, result_dict, key, link_type_dict):
|
|
481
|
-
"""Parse link cost."""
|
|
482
|
-
for link_type_key, link_type_value in link_type_dict.items():
|
|
483
|
-
if link_type_key == CommunicationInfo.RDMA.value:
|
|
484
|
-
# Divide information by thread.
|
|
485
|
-
rdma_infos = []
|
|
486
|
-
threads_dict = self._divide_communication_info_by_thread(link_type_value)
|
|
487
|
-
for thread_value in threads_dict.values():
|
|
488
|
-
rdma_info = self._calculate_adma_link_info(thread_value)
|
|
489
|
-
rdma_infos.append(rdma_info)
|
|
490
|
-
rdma_total_cost = np.sum(rdma_infos, axis=0).tolist()
|
|
491
|
-
result_dict[key][link_type_key] = rdma_total_cost
|
|
492
|
-
if link_type_key == CommunicationInfo.SDMA.value:
|
|
493
|
-
sdma_total_cost = self._calculate_sdma_link_info(link_type_value)
|
|
494
|
-
result_dict[key][link_type_key] = sdma_total_cost
|
|
495
|
-
|
|
496
|
-
def _calculate_src_dst_link_info(self, src_dst_dict: dict):
|
|
497
|
-
"""Calculate src dst link info."""
|
|
498
|
-
result_dict = dict()
|
|
499
|
-
for k, v in src_dst_dict.items():
|
|
500
|
-
# Divide information by link type.
|
|
501
|
-
link_type_dict = self._divide_communication_info_by_link_type(v)
|
|
502
|
-
if not link_type_dict:
|
|
503
|
-
continue
|
|
504
|
-
result_dict[k] = dict()
|
|
505
|
-
self._parse_link_cost(result_dict, k, link_type_dict)
|
|
506
|
-
return result_dict
|
|
507
|
-
|
|
508
|
-
def _calculate_sdma_link_info(self, trace_event: list):
|
|
509
|
-
"""
|
|
510
|
-
Calculate SDMA link info.
|
|
511
|
-
|
|
512
|
-
When the link is SDMA, the communication time of the primary link is the sum of the execution time\
|
|
513
|
-
of Reduce inline and Memcpy operators.
|
|
514
|
-
"""
|
|
515
|
-
sdma_communication_time = 0
|
|
516
|
-
sdma_communication_size = 0
|
|
517
|
-
|
|
518
|
-
for item in trace_event:
|
|
519
|
-
task_type = item.get("args").get("task type")
|
|
520
|
-
if task_type in (CommunicationInfo.REDUCE_INLINE.value, CommunicationInfo.MEMCPY.value):
|
|
521
|
-
sdma_communication_time += item.get("dur", 0)
|
|
522
|
-
sdma_size = item.get("args").get("size")
|
|
523
|
-
if sdma_size:
|
|
524
|
-
sdma_size = sdma_size if isinstance(sdma_size, int) else int(sdma_size, 16)
|
|
525
|
-
else:
|
|
526
|
-
sdma_size = 0
|
|
527
|
-
|
|
528
|
-
sdma_communication_size += sdma_size
|
|
529
|
-
|
|
530
|
-
# The unit of sdma_bandwidth is KB/s.
|
|
531
|
-
# The unit of sdma_communication_size is k_byte and The unit of sdma_communication_time is ms.
|
|
532
|
-
sdma_communication_time = sdma_communication_time / 1e3
|
|
533
|
-
sdma_communication_size = sdma_communication_size / 1e3
|
|
534
|
-
sdma_bandwidth = sdma_communication_size / (sdma_communication_time / 1e3) \
|
|
535
|
-
if sdma_communication_size else 0
|
|
536
|
-
return [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
|
|
537
|
-
|
|
538
|
-
def _calculate_communication_average_value(self, communication_info: list):
|
|
539
|
-
"""Calculate communication average value."""
|
|
540
|
-
communication_info_size = len(communication_info)
|
|
541
|
-
if communication_info_size == 0:
|
|
542
|
-
return []
|
|
543
|
-
# index1: communication_cost,index2:wait_cost,index3:link_info
|
|
544
|
-
communication_cost_average = sum([i[1] for i in communication_info]) / communication_info_size
|
|
545
|
-
wait_cost_average = sum([i[2] for i in communication_info]) / communication_info_size
|
|
546
|
-
link_info = [i[3] for i in communication_info]
|
|
547
|
-
calculate_type = 'average'
|
|
548
|
-
link_average_info = HcclParser._calculate_link_value(link_info, calculate_type)
|
|
549
|
-
return [communication_cost_average, wait_cost_average, link_average_info]
|
|
550
|
-
|
|
551
|
-
def _validate_file_path(self, file_path):
|
|
552
|
-
"""Validate file path."""
|
|
553
|
-
try:
|
|
554
|
-
file_path = validate_and_normalize_path(file_path)
|
|
555
|
-
except RuntimeError as err:
|
|
556
|
-
logger.warning('file path is invalid.')
|
|
557
|
-
raise ProfilerPathErrorException('file path is invalid.') from err
|
|
558
|
-
if not os.path.isfile(file_path):
|
|
559
|
-
logger.warning('The file <%s> not found.', file_path)
|
|
560
|
-
raise ProfilerFileNotFoundException(file_path)
|
|
561
|
-
return file_path
|
|
562
|
-
|
|
563
|
-
def _validate_dir_path(self, dir_path):
|
|
564
|
-
"""Validate dir path."""
|
|
565
|
-
try:
|
|
566
|
-
dir_path = validate_and_normalize_path(dir_path)
|
|
567
|
-
except RuntimeError as err:
|
|
568
|
-
logger.warning('dir path is invalid.')
|
|
569
|
-
raise ProfilerPathErrorException('dir path is invalid.') from err
|
|
570
|
-
if not os.path.isdir(dir_path):
|
|
571
|
-
logger.warning('The dir <%s> not found.', dir_path)
|
|
572
|
-
raise ProfilerDirNotFoundException(dir_path)
|
|
573
|
-
return dir_path
|