mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0rc1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +36 -61
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +32 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +76 -15
- mindspore/common/api.py +193 -112
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +48 -83
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +38 -23
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +32 -2
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +208 -5
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +122 -98
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +325 -499
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
- mindspore/ops/auto_generate/gen_extend_func.py +1 -51
- mindspore/ops/auto_generate/gen_ops_def.py +463 -257
- mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +7 -94
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +21 -367
- mindspore/ops/function/nn_func.py +26 -41
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +0 -2
- mindspore/ops/functional_overload.py +463 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +235 -172
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +5 -6
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +36 -4
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +4 -2
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +12 -5
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +3 -1
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +1 -1
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +2 -2
- mindspore/parallel/transform_safetensors.py +462 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +21 -30
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +6 -2
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +71 -13
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +51 -33
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +352 -390
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,545 +0,0 @@
|
|
|
1
|
-
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""The integrator for integrating parsed profiling files."""
|
|
16
|
-
|
|
17
|
-
import os.path
|
|
18
|
-
import glob
|
|
19
|
-
import json
|
|
20
|
-
import stat
|
|
21
|
-
from decimal import Decimal
|
|
22
|
-
import numpy as np
|
|
23
|
-
from mindspore import log as logger
|
|
24
|
-
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
|
|
25
|
-
from mindspore.profiler.parser.base_timeline_generator import BaseTimelineGenerator
|
|
26
|
-
from mindspore.profiler.parser.integrator import DeviceTarget
|
|
27
|
-
from mindspore.profiler.parser.ascend_analysis.fwk_cann_parser import FwkCANNParser
|
|
28
|
-
from mindspore.profiler.common.util import get_newest_file
|
|
29
|
-
from mindspore.profiler.parser.ascend_analysis.constant import Constant
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
33
|
-
"""Generate ascend Timeline data from file."""
|
|
34
|
-
_timeline_display_filename = 'ascend_timeline_display_{}.json'
|
|
35
|
-
_timeline_summary_filename = 'ascend_timeline_summary_{}.json'
|
|
36
|
-
_cluster_analyse_filename = 'ascend_cluster_analyse_{}_{}_{}_{}.csv'
|
|
37
|
-
|
|
38
|
-
def __init__(self, profiling_dir, source_path, mindstudio_profiler_output, rank_id, rank_size, mode,
|
|
39
|
-
step_list=None):
|
|
40
|
-
super().__init__(DeviceTarget.ASCEND.value, mode)
|
|
41
|
-
self._profiling_dir = profiling_dir
|
|
42
|
-
self._source_path = source_path
|
|
43
|
-
self._mindstudio_profiler_output = mindstudio_profiler_output
|
|
44
|
-
self._rank_id = rank_id
|
|
45
|
-
self._rank_size = rank_size
|
|
46
|
-
self._timeline_display_filename = self._timeline_display_filename.format(rank_id)
|
|
47
|
-
self._timeline_summary_filename = self._timeline_summary_filename.format(rank_id)
|
|
48
|
-
self._timeline_data = []
|
|
49
|
-
self._step_list = step_list
|
|
50
|
-
|
|
51
|
-
self.step_time_list_df = np.dtype(
|
|
52
|
-
[('Iteration ID', object), ('Steps', object), ('Iteration Start', float), ('Iteration Time', float)])
|
|
53
|
-
|
|
54
|
-
self.aicpu_time_list_dt = np.dtype(
|
|
55
|
-
[('Op Name', object), ('Stream ID', int), ('Task Start Time', float), ('Task Duration', float)])
|
|
56
|
-
|
|
57
|
-
def parse_cluster_data(self, op_summary, steptrace):
|
|
58
|
-
"""
|
|
59
|
-
Parse cluster data and timeline summary data.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
op_summary: op data
|
|
63
|
-
steptrace: step data
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
logger.info('parse cluster data...')
|
|
67
|
-
if isinstance(op_summary, np.ndarray) and op_summary.shape[0] == 0 or \
|
|
68
|
-
not isinstance(op_summary, np.ndarray) and not op_summary:
|
|
69
|
-
return
|
|
70
|
-
timeline_list = op_summary[~np.isin(op_summary['Task Type'], ['AI_CPU', 'HCCL'])][
|
|
71
|
-
['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
|
|
72
|
-
|
|
73
|
-
timeline_list = timeline_list.tolist()
|
|
74
|
-
|
|
75
|
-
if not isinstance(steptrace, np.ndarray) or steptrace.shape[0] == 0 or not steptrace.tolist():
|
|
76
|
-
iteration_time = op_summary[-1]['Task Start Time'] - op_summary[0]['Task Start Time'] + op_summary[-1][
|
|
77
|
-
'Task Duration'] + op_summary[-1]['Task Wait Time']
|
|
78
|
-
step_time_list = [['1', 'Steps', op_summary[0]['Task Start Time'], iteration_time]]
|
|
79
|
-
else:
|
|
80
|
-
step_time_list = np.empty((len(steptrace),), dtype=self.step_time_list_df)
|
|
81
|
-
step_time_list['Iteration ID'] = \
|
|
82
|
-
np.char.add("Model ID: ",
|
|
83
|
-
np.char.add(steptrace['Model ID'].astype(str),
|
|
84
|
-
np.char.add(" Iteration ID: ",
|
|
85
|
-
steptrace['Iteration ID'].astype(str))))
|
|
86
|
-
step_time_list['Steps'] = 'Steps'
|
|
87
|
-
step_time_list['Iteration Start'] = steptrace['Iteration End'] - steptrace['Iteration Time']
|
|
88
|
-
step_time_list['Iteration Time'] = steptrace['Iteration Time']
|
|
89
|
-
step_time_list = step_time_list.tolist()
|
|
90
|
-
|
|
91
|
-
# Add AI CPU data into timeline temp list and sort by start time.
|
|
92
|
-
aicpu_op = op_summary[op_summary['Task Type'] == 'AI_CPU']
|
|
93
|
-
if aicpu_op.size:
|
|
94
|
-
aicpu_time_list = np.empty((len(aicpu_op),), dtype=self.aicpu_time_list_dt)
|
|
95
|
-
aicpu_time_list['Op Name'] = aicpu_op['Op Name']
|
|
96
|
-
aicpu_time_list['Stream ID'] = aicpu_op['Stream ID']
|
|
97
|
-
aicpu_time_list['Task Start Time'] = aicpu_op['Task Start Time']
|
|
98
|
-
aicpu_time_list['Task Duration'] = aicpu_op['Task Duration'] + aicpu_op['Task Wait Time']
|
|
99
|
-
aicpu_time_list = aicpu_time_list.tolist()
|
|
100
|
-
timeline_list.extend(aicpu_time_list)
|
|
101
|
-
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
102
|
-
|
|
103
|
-
# Add AllReduce info to timeline temp list and sort by start time.
|
|
104
|
-
communication_info = op_summary[op_summary['Task Type'] == 'HCCL'][
|
|
105
|
-
['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
|
|
106
|
-
if communication_info.size:
|
|
107
|
-
communication_info = communication_info.tolist()
|
|
108
|
-
communication_info.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
109
|
-
logger.debug('AllReduce info found. Start adding info into timeline...')
|
|
110
|
-
self._get_cluster_timeline(timeline_list, communication_info, step_time_list)
|
|
111
|
-
|
|
112
|
-
# Update timeline summary info
|
|
113
|
-
timeline_summary = op_summary[['Op Name', 'Stream ID', 'Task Duration']]
|
|
114
|
-
self._timeline_summary['total_time'] = np.sum(timeline_summary['Task Duration'])
|
|
115
|
-
self._timeline_summary['num_of_streams'] = int(
|
|
116
|
-
len(np.unique(timeline_summary['Stream ID'], return_counts=True)[0]))
|
|
117
|
-
self._timeline_summary['num_of_ops'] = int(len(np.unique(timeline_summary['Op Name'], return_counts=True)[0]))
|
|
118
|
-
self._timeline_summary['op_exe_times'] = int(len(timeline_summary))
|
|
119
|
-
if self._timeline_summary['op_exe_times'] != 0:
|
|
120
|
-
self._timeline_summary['max_scope_name_num'] = int(np.max(
|
|
121
|
-
[len(x) for x in np.char.split(timeline_summary['Op Name'].astype(str), sep='/')]))
|
|
122
|
-
else:
|
|
123
|
-
self._timeline_summary['max_scope_name_num'] = 0
|
|
124
|
-
logger.info('Finished parse cluster data...')
|
|
125
|
-
|
|
126
|
-
def write_timeline_display(self):
|
|
127
|
-
"""Write timeline display"""
|
|
128
|
-
logger.info('Writing timeline file...')
|
|
129
|
-
display_file_path = os.path.join(
|
|
130
|
-
self._profiling_dir,
|
|
131
|
-
self._timeline_display_filename
|
|
132
|
-
)
|
|
133
|
-
try:
|
|
134
|
-
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as fw:
|
|
135
|
-
json.dump(self._timeline_data, fw, indent=self.indent)
|
|
136
|
-
os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
137
|
-
logger.info('Finished file writing!')
|
|
138
|
-
except (IOError, OSError) as err:
|
|
139
|
-
logger.critical('Error occurred when write timeline display file: %s', err)
|
|
140
|
-
raise ProfilerIOException() from err
|
|
141
|
-
|
|
142
|
-
def parse_timeline_data(self, pretty=False):
|
|
143
|
-
"""
|
|
144
|
-
Get detail timeline
|
|
145
|
-
Returns:
|
|
146
|
-
json, the content of timeline data.
|
|
147
|
-
"""
|
|
148
|
-
logger.info("Start parse timeline data...")
|
|
149
|
-
self._pretty = pretty
|
|
150
|
-
timeline_data = []
|
|
151
|
-
all_scope_data = []
|
|
152
|
-
|
|
153
|
-
# get msprof data
|
|
154
|
-
msprof_file_name = fr'{self._mindstudio_profiler_output}/msprof_*.json'
|
|
155
|
-
file_list_msprof = glob.glob(msprof_file_name)
|
|
156
|
-
msprof_timeline = []
|
|
157
|
-
if not file_list_msprof:
|
|
158
|
-
logger.error('Could not find msprof_*.json file in %s', self._mindstudio_profiler_output)
|
|
159
|
-
else:
|
|
160
|
-
msprof_timeline = self._parse_msprof_data(get_newest_file(file_list_msprof))
|
|
161
|
-
|
|
162
|
-
# get cpu op
|
|
163
|
-
cpu_op_file_name = fr'{self._profiling_dir}/cpu_op_execute_timestamp_{self._rank_id}.txt'
|
|
164
|
-
file_list = glob.glob(cpu_op_file_name)
|
|
165
|
-
if not file_list:
|
|
166
|
-
logger.warning('Could not find cpu op file in %s', self._profiling_dir)
|
|
167
|
-
else:
|
|
168
|
-
cpu_timeline, scope_data = self.parse_cpu_timeline(file_list)
|
|
169
|
-
timeline_data.extend(cpu_timeline)
|
|
170
|
-
all_scope_data.extend(scope_data)
|
|
171
|
-
|
|
172
|
-
oprange_name = self._op_range_name.format(self._rank_id)
|
|
173
|
-
fwk_file_path = fr'{self._profiling_dir}/{self._framework_dir}/{oprange_name}'
|
|
174
|
-
if os.path.exists(fwk_file_path):
|
|
175
|
-
# It is faster not to submit to the pool
|
|
176
|
-
result = self._parse_fwk_device_data(msprof_timeline)
|
|
177
|
-
all_scope_data.extend(result.get('scope_data', []))
|
|
178
|
-
timeline_data.extend(result.get("trace_data", []))
|
|
179
|
-
self._kernel_events = result.get("kernels", [])
|
|
180
|
-
else:
|
|
181
|
-
# get Ascend Hardware for scope
|
|
182
|
-
scope_data = self._parse_ascend_hardware_scope(msprof_timeline)
|
|
183
|
-
all_scope_data.extend(scope_data)
|
|
184
|
-
timeline_data.extend(msprof_timeline)
|
|
185
|
-
|
|
186
|
-
# parse scope info
|
|
187
|
-
scope_timeline = self._parse_scope_info(all_scope_data)
|
|
188
|
-
timeline_data.extend(scope_timeline)
|
|
189
|
-
|
|
190
|
-
logger.info("All timeline data parse complete.")
|
|
191
|
-
self._timeline_data = timeline_data
|
|
192
|
-
return timeline_data
|
|
193
|
-
|
|
194
|
-
def parse_cpu_timeline(self, file_list):
|
|
195
|
-
"""Load cpu operator data from file"""
|
|
196
|
-
ms_to_us = 1e3
|
|
197
|
-
ns_to_us = 1e-3
|
|
198
|
-
new_pid = Constant.CPU_OP
|
|
199
|
-
process_list = [{"name": "process_name",
|
|
200
|
-
"pid": new_pid,
|
|
201
|
-
"args": {
|
|
202
|
-
"name": f"CPU OP"
|
|
203
|
-
},
|
|
204
|
-
"ph": "M"
|
|
205
|
-
}, {"name": "process_sort_index", "pid": new_pid,
|
|
206
|
-
"args": {"sort_index": new_pid}, "ph": "M"}
|
|
207
|
-
]
|
|
208
|
-
tid_set = set()
|
|
209
|
-
thread_list = []
|
|
210
|
-
new_timeline = []
|
|
211
|
-
scope_data = []
|
|
212
|
-
try:
|
|
213
|
-
flags = os.O_RDONLY
|
|
214
|
-
for file_path in file_list:
|
|
215
|
-
with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr:
|
|
216
|
-
for line in fr:
|
|
217
|
-
op_list = line.strip().split(';')
|
|
218
|
-
op_full_name = op_list[0]
|
|
219
|
-
time_arr = op_list[-1]
|
|
220
|
-
time_arr = time_arr.split(" ")
|
|
221
|
-
for time in time_arr:
|
|
222
|
-
ts, dur, tid = time.split(",") # origin unit of ts is ns and dur is ms.
|
|
223
|
-
ts = Decimal(ts).quantize(Decimal('0.000')) * Decimal(ns_to_us).quantize(
|
|
224
|
-
Decimal('0.000')) # cast to us
|
|
225
|
-
dur = Decimal(dur).quantize(Decimal('0.000')) * Decimal(ms_to_us).quantize(
|
|
226
|
-
Decimal('0.000')) # cast to us
|
|
227
|
-
|
|
228
|
-
if op_full_name and op_full_name.startswith(Constant.TOP_SCOPE_NAMES):
|
|
229
|
-
te = ts + dur
|
|
230
|
-
scope_data.append((op_full_name.split('/')[:-1], ts, te))
|
|
231
|
-
|
|
232
|
-
if int(tid) not in tid_set:
|
|
233
|
-
tid_set.add(int(tid))
|
|
234
|
-
thread_list.append({"name": "thread_name",
|
|
235
|
-
"pid": new_pid,
|
|
236
|
-
"tid": int(tid),
|
|
237
|
-
"ph": "M",
|
|
238
|
-
'args': {'name': f'thread {tid}'}
|
|
239
|
-
})
|
|
240
|
-
|
|
241
|
-
new_timeline.append({'name': op_list[0],
|
|
242
|
-
'pid': new_pid,
|
|
243
|
-
'tid': int(tid),
|
|
244
|
-
'ph': 'X',
|
|
245
|
-
'ts': str(ts),
|
|
246
|
-
'dur': float(dur) * ms_to_us,
|
|
247
|
-
'args':
|
|
248
|
-
{'type': op_list[1]}
|
|
249
|
-
})
|
|
250
|
-
break
|
|
251
|
-
|
|
252
|
-
return process_list + thread_list + new_timeline, scope_data
|
|
253
|
-
|
|
254
|
-
except (IOError, OSError, json.JSONDecodeError) as err:
|
|
255
|
-
logger.error('parse_cann_data failed! please theck. detail: %s', err)
|
|
256
|
-
return []
|
|
257
|
-
|
|
258
|
-
def _parse_fwk_device_data(self, cann_kernel_data):
|
|
259
|
-
"""
|
|
260
|
-
Get framework op range trace data, flow events and hardware kernel events
|
|
261
|
-
"""
|
|
262
|
-
fwkcann_parser = FwkCANNParser(self._source_path, cann_kernel_data, self._rank_id, self._step_list)
|
|
263
|
-
fwk_link_data = fwkcann_parser.generate_trace_data()
|
|
264
|
-
kernels = fwkcann_parser.kernels
|
|
265
|
-
scope_data = fwkcann_parser.scope_data_with_flow + fwkcann_parser.scope_data_without_flow
|
|
266
|
-
result = {"trace_data": fwk_link_data, "kernels": kernels, "scope_data": scope_data}
|
|
267
|
-
return result
|
|
268
|
-
|
|
269
|
-
def _parse_msprof_data(self, file_list):
|
|
270
|
-
"""
|
|
271
|
-
parse msprof.json file
|
|
272
|
-
:param file_list:
|
|
273
|
-
:return:
|
|
274
|
-
"""
|
|
275
|
-
flags = os.O_RDONLY
|
|
276
|
-
raw_data = []
|
|
277
|
-
try:
|
|
278
|
-
for file_path in file_list:
|
|
279
|
-
with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr:
|
|
280
|
-
raw_data.extend(json.load(fr))
|
|
281
|
-
|
|
282
|
-
if not raw_data:
|
|
283
|
-
logger.error('Could not found msprof data in file list: %s .', file_list)
|
|
284
|
-
|
|
285
|
-
return raw_data
|
|
286
|
-
|
|
287
|
-
except (IOError, OSError, json.JSONDecodeError) as err:
|
|
288
|
-
logger.error('_parse_msprof_data failed! please theck. detail: %s', err)
|
|
289
|
-
return []
|
|
290
|
-
|
|
291
|
-
def _parse_ascend_hardware_scope(self, msprof_timeline):
|
|
292
|
-
"""
|
|
293
|
-
parse ascend hardware scope
|
|
294
|
-
"""
|
|
295
|
-
scope_data = []
|
|
296
|
-
try:
|
|
297
|
-
|
|
298
|
-
for event in msprof_timeline:
|
|
299
|
-
if event.get("ph") == "M":
|
|
300
|
-
continue
|
|
301
|
-
|
|
302
|
-
op_full_name = event.get('name')
|
|
303
|
-
if op_full_name and op_full_name.startswith(Constant.TOP_SCOPE_NAMES):
|
|
304
|
-
ts = Decimal(event.get('ts')).quantize(Decimal('0.000'))
|
|
305
|
-
te = ts + Decimal(event.get('dur')).quantize(Decimal('0.000'))
|
|
306
|
-
scope_data.append((op_full_name.split('/')[:-1], ts, te))
|
|
307
|
-
|
|
308
|
-
return scope_data
|
|
309
|
-
|
|
310
|
-
except (IOError, OSError, json.JSONDecodeError) as err:
|
|
311
|
-
logger.error('_parse_ascend_hardware_scope failed! please theck. detail: %s', err)
|
|
312
|
-
return []
|
|
313
|
-
|
|
314
|
-
def _parse_scope_info(self, scope_data):
|
|
315
|
-
"""Parse scope info of op"""
|
|
316
|
-
if not scope_data:
|
|
317
|
-
return []
|
|
318
|
-
new_pid = Constant.SCOPE_LAYLER
|
|
319
|
-
scope_data.sort(key=lambda x: x[1])
|
|
320
|
-
process_list = [
|
|
321
|
-
{"name": "process_name",
|
|
322
|
-
"pid": new_pid,
|
|
323
|
-
"args": {
|
|
324
|
-
"name": f"Scope Layer"
|
|
325
|
-
},
|
|
326
|
-
"ph": "M"},
|
|
327
|
-
{"name": "process_sort_index",
|
|
328
|
-
"pid": new_pid,
|
|
329
|
-
"args": {"sort_index": new_pid},
|
|
330
|
-
"ph": "M"}
|
|
331
|
-
]
|
|
332
|
-
|
|
333
|
-
new_events = []
|
|
334
|
-
layer_stack = []
|
|
335
|
-
for layer_name in scope_data[0][0]:
|
|
336
|
-
layer_stack.append([layer_name, scope_data[0][1], scope_data[0][2]])
|
|
337
|
-
|
|
338
|
-
for op in scope_data[1:]:
|
|
339
|
-
if op[1] < layer_stack[0][2]:
|
|
340
|
-
# 并行算子只保留前面的
|
|
341
|
-
continue
|
|
342
|
-
flag = True # 判断上层是否合并, 上层不合并下层也不合并
|
|
343
|
-
for layer_depth, layer_name in enumerate(op[0]):
|
|
344
|
-
if layer_depth >= len(layer_stack):
|
|
345
|
-
layer_stack.append([layer_name, op[1], op[2]])
|
|
346
|
-
else:
|
|
347
|
-
if layer_stack[layer_depth][0] == layer_name and flag:
|
|
348
|
-
layer_stack[layer_depth][2] = op[2] # 合并
|
|
349
|
-
else:
|
|
350
|
-
ts = layer_stack[layer_depth][1]
|
|
351
|
-
new_events.append({
|
|
352
|
-
"name": layer_stack[layer_depth][0],
|
|
353
|
-
"pid": new_pid,
|
|
354
|
-
"tid": layer_depth,
|
|
355
|
-
"ph": "X",
|
|
356
|
-
"ts": str(ts),
|
|
357
|
-
"dur": float(layer_stack[layer_depth][2] - layer_stack[layer_depth][1])
|
|
358
|
-
})
|
|
359
|
-
layer_stack[layer_depth] = [layer_name, op[1], op[2]]
|
|
360
|
-
flag = False
|
|
361
|
-
|
|
362
|
-
thread_list = []
|
|
363
|
-
for index, layer in enumerate(layer_stack):
|
|
364
|
-
thread_list.extend([{
|
|
365
|
-
"name": "thread_name",
|
|
366
|
-
"pid": new_pid,
|
|
367
|
-
"tid": index,
|
|
368
|
-
"args": {
|
|
369
|
-
"name": f"layer{index}"
|
|
370
|
-
},
|
|
371
|
-
"ph": "M"
|
|
372
|
-
}, {
|
|
373
|
-
"name": "thread_sort_index",
|
|
374
|
-
"pid": new_pid,
|
|
375
|
-
"tid": index,
|
|
376
|
-
"args": {"sort_index": index},
|
|
377
|
-
"ph": "M"
|
|
378
|
-
}])
|
|
379
|
-
if layer:
|
|
380
|
-
ts = layer[1]
|
|
381
|
-
new_events.append({
|
|
382
|
-
"name": layer[0],
|
|
383
|
-
"pid": new_pid,
|
|
384
|
-
"tid": index,
|
|
385
|
-
"ph": "X",
|
|
386
|
-
"ts": str(ts),
|
|
387
|
-
"dur": float(layer[2] - layer[1])
|
|
388
|
-
})
|
|
389
|
-
|
|
390
|
-
return process_list + thread_list + new_events
|
|
391
|
-
|
|
392
|
-
def _produce_two_separated_timeline(self, timeline, op_name):
|
|
393
|
-
"""Produce two separated timeline based on op_name."""
|
|
394
|
-
timeline_include_op_name = []
|
|
395
|
-
timeline_exclude_op_name = []
|
|
396
|
-
for time_item in timeline:
|
|
397
|
-
if op_name in time_item[self._op_name_idx]:
|
|
398
|
-
timeline_include_op_name.append(time_item)
|
|
399
|
-
else:
|
|
400
|
-
timeline_exclude_op_name.append(time_item)
|
|
401
|
-
return timeline_include_op_name, timeline_exclude_op_name
|
|
402
|
-
|
|
403
|
-
def _get_cluster_timeline(self, aicore_info, comm_info, step_info):
|
|
404
|
-
"""
|
|
405
|
-
Analyse the cluster communication and computation data, and write result to file.
|
|
406
|
-
|
|
407
|
-
To analyse the cluster performance bottleneck based on timeline, define the time of a training
|
|
408
|
-
step as "t_total", propose five metrics as follows:
|
|
409
|
-
1) The time that "receive" operators not overlapped by others(t1)
|
|
410
|
-
2) The time that is consumed inside the stage(t_total - t1)
|
|
411
|
-
3) The time that "communication" operators not overlapped by others(t2)
|
|
412
|
-
4) The time that consumed by computation(t_total - t2)
|
|
413
|
-
5) The time that "collective communication" operators not overlapped by others(t3)
|
|
414
|
-
In pipeline parallel mode, we can locate slow stage based on t_total - t1. Inside each stage,
|
|
415
|
-
we can locate slow card based on t_total - t2. The value of t1 indicates the degree that
|
|
416
|
-
communication time between stages slow down the training. The value of t3 indicates the degree
|
|
417
|
-
that communication inside each stage slow down the training.
|
|
418
|
-
"""
|
|
419
|
-
is_pipeline_parallel = False
|
|
420
|
-
comm_timeline = self._get_merged_time_list(
|
|
421
|
-
comm_info, display_name="communication"
|
|
422
|
-
)
|
|
423
|
-
aicore_timeline = self._get_merged_time_list(
|
|
424
|
-
aicore_info, get_interval_time=True
|
|
425
|
-
)
|
|
426
|
-
# Consider if the overlap will be 0 or not.
|
|
427
|
-
comm_not_overlapped_timeline = self._get_intersection_time(
|
|
428
|
-
aicore_timeline[0], comm_timeline[0]
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
# Process receive part.
|
|
432
|
-
all_timeline = aicore_info + comm_info
|
|
433
|
-
all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
434
|
-
receive_timeline = self._produce_two_separated_timeline(
|
|
435
|
-
all_timeline, "Receive-op"
|
|
436
|
-
)
|
|
437
|
-
if receive_timeline[0]:
|
|
438
|
-
is_pipeline_parallel = True
|
|
439
|
-
receive_op_merged_timeline = self._get_merged_time_list(receive_timeline[0])[0]
|
|
440
|
-
timeline_exclude_receive_op_interval = self._get_merged_time_list(
|
|
441
|
-
receive_timeline[1], get_interval_time=True
|
|
442
|
-
)[0]
|
|
443
|
-
receive_op_not_overlapped_timeline = self._get_intersection_time(
|
|
444
|
-
timeline_exclude_receive_op_interval, receive_op_merged_timeline
|
|
445
|
-
)
|
|
446
|
-
|
|
447
|
-
# Process collective communication part.
|
|
448
|
-
collective_comm_timeline = self._produce_two_separated_timeline(
|
|
449
|
-
comm_info, "Receive-op"
|
|
450
|
-
)[-1]
|
|
451
|
-
|
|
452
|
-
collective_comm_not_overlapped_timeline = self._get_intersection_time(
|
|
453
|
-
aicore_timeline[0], self._get_merged_time_list(collective_comm_timeline)[0]
|
|
454
|
-
)
|
|
455
|
-
|
|
456
|
-
self._parse_cluster_metrices(step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline,
|
|
457
|
-
collective_comm_not_overlapped_timeline, is_pipeline_parallel)
|
|
458
|
-
|
|
459
|
-
def _parse_cluster_metrices(self, step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline,
|
|
460
|
-
collective_comm_not_overlapped_timeline, is_pipeline_parallel):
|
|
461
|
-
"""Write the cluster metrices"""
|
|
462
|
-
# Compute these five metrics mentioned above per step.
|
|
463
|
-
recieve_alone_time = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
|
|
464
|
-
time_info = {"stage_time": [], "computation_time": []}
|
|
465
|
-
comm_alone_time = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
|
|
466
|
-
collective_comm_alone_time = self._compute_time_inside_step(
|
|
467
|
-
collective_comm_not_overlapped_timeline, step_info
|
|
468
|
-
)
|
|
469
|
-
step_num = len(step_info)
|
|
470
|
-
for step in range(step_num):
|
|
471
|
-
try:
|
|
472
|
-
if is_pipeline_parallel:
|
|
473
|
-
time_info.get("stage_time").append(step_info[step][self._duration_idx] - recieve_alone_time[step])
|
|
474
|
-
except IndexError as err:
|
|
475
|
-
logger.error(err)
|
|
476
|
-
|
|
477
|
-
try:
|
|
478
|
-
time_info.get("computation_time").append(step_info[step][self._duration_idx] - comm_alone_time[step])
|
|
479
|
-
except IndexError as err:
|
|
480
|
-
logger.error(err)
|
|
481
|
-
|
|
482
|
-
metrices_per_step_list = [
|
|
483
|
-
time_info.get("computation_time"), comm_alone_time, time_info.get("stage_time"),
|
|
484
|
-
recieve_alone_time, collective_comm_alone_time
|
|
485
|
-
]
|
|
486
|
-
if step_num > 1:
|
|
487
|
-
for metric in metrices_per_step_list:
|
|
488
|
-
metric.append(sum(metric[1:]) / (step_num - 1))
|
|
489
|
-
|
|
490
|
-
try:
|
|
491
|
-
self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Ascend", self._rank_id)
|
|
492
|
-
except (IOError, OSError) as err:
|
|
493
|
-
logger.warning(err)
|
|
494
|
-
raise ProfilerIOException from err
|
|
495
|
-
|
|
496
|
-
def _compute_time_inside_step(self, metric_timeline, step_time_list):
|
|
497
|
-
"""Compute per step time of metric_timeline."""
|
|
498
|
-
per_step_time_list = [0 for _ in range(len(step_time_list))]
|
|
499
|
-
step = 0
|
|
500
|
-
step_end_time = step_time_list[step][self._start_time_idx] + step_time_list[step][self._duration_idx]
|
|
501
|
-
for time_item in metric_timeline:
|
|
502
|
-
start_time = time_item[self._start_time_idx]
|
|
503
|
-
if start_time > step_end_time:
|
|
504
|
-
step += 1
|
|
505
|
-
if step >= len(step_time_list):
|
|
506
|
-
logger.warning("Compute profiler compute_time_inside_step time, "
|
|
507
|
-
"find the data length is more than step count, "
|
|
508
|
-
"maybe current graph has multi sub graph, skip the last data.")
|
|
509
|
-
break
|
|
510
|
-
step_end_time = step_time_list[step][self._start_time_idx] + step_time_list[step][self._duration_idx]
|
|
511
|
-
per_step_time_list[step] += time_item[self._duration_idx]
|
|
512
|
-
|
|
513
|
-
return per_step_time_list
|
|
514
|
-
|
|
515
|
-
def _get_intersection_time(self, first_time_list, second_time_list,
|
|
516
|
-
display_name="communication_not_overlapped"):
|
|
517
|
-
"""Get intersection time of two time list."""
|
|
518
|
-
first_list_idx, second_list_idx = 0, 0
|
|
519
|
-
first_list_len = len(first_time_list)
|
|
520
|
-
second_list_len = len(second_time_list)
|
|
521
|
-
intersection_segment_display_list = []
|
|
522
|
-
|
|
523
|
-
while first_list_idx < first_list_len and \
|
|
524
|
-
second_list_idx < second_list_len:
|
|
525
|
-
intersection_start = max(
|
|
526
|
-
first_time_list[first_list_idx][self._start_time_idx],
|
|
527
|
-
second_time_list[second_list_idx][self._start_time_idx]
|
|
528
|
-
)
|
|
529
|
-
intersection_end = min(
|
|
530
|
-
first_time_list[first_list_idx][self._duration_idx],
|
|
531
|
-
second_time_list[second_list_idx][self._duration_idx]
|
|
532
|
-
)
|
|
533
|
-
if intersection_start < intersection_end:
|
|
534
|
-
tid = self._tid_dict.get(display_name, [0, 0])
|
|
535
|
-
intersection_segment_display_list.append(
|
|
536
|
-
[display_name, tid[0],
|
|
537
|
-
intersection_start, intersection_end - intersection_start, tid[1]]
|
|
538
|
-
)
|
|
539
|
-
if first_time_list[first_list_idx][self._duration_idx] >= \
|
|
540
|
-
second_time_list[second_list_idx][self._duration_idx]:
|
|
541
|
-
second_list_idx += 1
|
|
542
|
-
else:
|
|
543
|
-
first_list_idx += 1
|
|
544
|
-
|
|
545
|
-
return intersection_segment_display_list
|