mindspore 2.6.0rc1__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +37 -62
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +43 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +77 -16
- mindspore/common/api.py +238 -113
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +81 -81
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +58 -40
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +33 -3
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +212 -9
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +137 -101
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +328 -502
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +3 -3
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
- mindspore/ops/auto_generate/gen_extend_func.py +23 -141
- mindspore/ops/auto_generate/gen_ops_def.py +727 -321
- mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +9 -96
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +33 -540
- mindspore/ops/function/nn_func.py +28 -74
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +571 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +2 -2
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +294 -174
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +7 -39
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +47 -8
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +11 -8
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +14 -7
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +11 -7
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +6 -7
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +3 -4
- mindspore/parallel/transform_safetensors.py +463 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +30 -32
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +14 -4
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +97 -16
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +135 -55
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +356 -394
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,697 +0,0 @@
|
|
|
1
|
-
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""The integrator for integrating parsed profiling files."""
|
|
16
|
-
import json
|
|
17
|
-
import os
|
|
18
|
-
import csv
|
|
19
|
-
|
|
20
|
-
from mindspore import log as logger
|
|
21
|
-
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, ProfilerFileNotFoundException, \
|
|
22
|
-
ProfilerParamValueErrorException
|
|
23
|
-
from mindspore.profiler.parser.container import TimelineContainer
|
|
24
|
-
from mindspore.profiler.parser.base_timeline_generator import BaseTimelineGenerator
|
|
25
|
-
from mindspore.profiler.parser.integrator import DeviceTarget
|
|
26
|
-
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
27
|
-
from mindspore.profiler.parser.gpu_analysis.fwk_file_parser import GPUFwkFileParser
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
31
|
-
"""Generate gpu Timeline data from file."""
|
|
32
|
-
_display_filename = 'gpu_timeline_display_{}.json'
|
|
33
|
-
_timeline_summary_filename = 'gpu_timeline_summary_{}.json'
|
|
34
|
-
_output_op_execute_time_file_path = "gpu_op_execute_timestamp_{}.txt"
|
|
35
|
-
_output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt"
|
|
36
|
-
_output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv"
|
|
37
|
-
_step_trace_original_filename = 'step_trace_profiling_{}.txt'
|
|
38
|
-
_cluster_analyse_filename = 'gpu_cluster_analyse_{}_{}_{}_{}.csv'
|
|
39
|
-
_activity_keys_list = []
|
|
40
|
-
|
|
41
|
-
def __init__(self, profiling_dir, device_id, rank_size, model):
|
|
42
|
-
super().__init__(DeviceTarget.GPU.value, model)
|
|
43
|
-
self._device_id = device_id
|
|
44
|
-
self._rank_size = rank_size
|
|
45
|
-
self._profiling_dir = profiling_dir
|
|
46
|
-
self._timeline_meta = []
|
|
47
|
-
self._display_filename = self._display_filename.format(device_id)
|
|
48
|
-
self._timeline_summary_filename = self._timeline_summary_filename.format(device_id)
|
|
49
|
-
self._tid_dict = {
|
|
50
|
-
"receive_op_not_overlapped": (self._RECEIVE_ALONE, self._OP_OVERLAP_PID),
|
|
51
|
-
"exclude_receive_op": (self._ALLREDUCE_ALONE, self._OP_OVERLAP_PID),
|
|
52
|
-
"computation_op": (self._MERGED_COMPUTATION_TID, self._OP_OVERLAP_PID),
|
|
53
|
-
"communication_not_overlapped": (self._PURE_COMMUNICATION_TID, self._OP_OVERLAP_PID),
|
|
54
|
-
"communication": (self._MERGED_COMMUNICATION_TID, self._OP_OVERLAP_PID),
|
|
55
|
-
"free_time": (self._FREE_TIME_TID, self._OP_OVERLAP_PID)
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
def init_timeline(self, reduce_op_type):
|
|
59
|
-
"""Init timeline metadata, adding all collected info."""
|
|
60
|
-
timeline_list = self._load_timeline_data(reduce_op_type)
|
|
61
|
-
|
|
62
|
-
# Init a dict for counting the num of streams.
|
|
63
|
-
stream_count_dict = {}
|
|
64
|
-
for timeline in timeline_list:
|
|
65
|
-
self._parse_timeline_data(timeline, 0)
|
|
66
|
-
# Updating the collection of streams.
|
|
67
|
-
if len(timeline) == 4:
|
|
68
|
-
self._update_num_of_streams(timeline, stream_count_dict)
|
|
69
|
-
|
|
70
|
-
# Update timeline summary info
|
|
71
|
-
self._timeline_summary['num_of_streams'] += len(stream_count_dict)
|
|
72
|
-
|
|
73
|
-
def check_op_name(self, op_name):
|
|
74
|
-
"""
|
|
75
|
-
Check whether the operator name exists.
|
|
76
|
-
|
|
77
|
-
Args:
|
|
78
|
-
op_name (str): The operator name or operator name prefix.
|
|
79
|
-
|
|
80
|
-
Returns:
|
|
81
|
-
bool, `True` if the operator name does exist, else `False`.
|
|
82
|
-
"""
|
|
83
|
-
if not op_name:
|
|
84
|
-
raise ProfilerParamValueErrorException('The op_name should exist.')
|
|
85
|
-
for op_time_info in self._timeline_meta:
|
|
86
|
-
full_op_name = op_time_info['name']
|
|
87
|
-
if full_op_name and full_op_name.startswith(op_name):
|
|
88
|
-
return True
|
|
89
|
-
return False
|
|
90
|
-
|
|
91
|
-
def is_gpu_kernel_async_launch(self):
|
|
92
|
-
"""Recognize the solution that launch the gpu kernel async."""
|
|
93
|
-
step_trace_profiling_path = self._get_and_validate_path(
|
|
94
|
-
self._step_trace_original_filename
|
|
95
|
-
)
|
|
96
|
-
try:
|
|
97
|
-
with open(step_trace_profiling_path, 'r') as f_obj:
|
|
98
|
-
line = next(f_obj)
|
|
99
|
-
first_string = line.strip().split()[0]
|
|
100
|
-
# the data format of launch the gpu kernel async is "Default/op1,160123 op-name"
|
|
101
|
-
# otherwise, the data format is "Default/op1 160123,12 "
|
|
102
|
-
return bool(len(first_string.split(',')) == 2)
|
|
103
|
-
except (IOError, OSError) as err:
|
|
104
|
-
logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
|
|
105
|
-
raise ProfilerIOException() from err
|
|
106
|
-
except StopIteration:
|
|
107
|
-
logger.warning('No step trace data exists.')
|
|
108
|
-
return False
|
|
109
|
-
|
|
110
|
-
def parse_fwk_data(self):
|
|
111
|
-
"""
|
|
112
|
-
Get framework op range trace data
|
|
113
|
-
"""
|
|
114
|
-
|
|
115
|
-
fwk_parser = GPUFwkFileParser(self._profiling_dir, self._device_id)
|
|
116
|
-
fwk_data = fwk_parser.get_op_range_data()
|
|
117
|
-
self._fwk_json = fwk_parser.get_fwk_trace_data(fwk_data)
|
|
118
|
-
|
|
119
|
-
def write_fwk_timeline(self):
|
|
120
|
-
display_file_path = os.path.join(self._profiling_dir, self._display_filename)
|
|
121
|
-
timeline_data = self._fwk_json
|
|
122
|
-
if os.path.exists(display_file_path):
|
|
123
|
-
with os.fdopen(os.open(display_file_path, os.O_RDONLY, 0o600), 'r') as fr:
|
|
124
|
-
device_data = fr.read()
|
|
125
|
-
timeline_data.extend(json.loads(device_data))
|
|
126
|
-
|
|
127
|
-
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as fw:
|
|
128
|
-
json.dump(timeline_data, fw)
|
|
129
|
-
|
|
130
|
-
def _get_and_validate_path(self, file_name):
|
|
131
|
-
"""Generate op or activity file path from file name, and validate this path."""
|
|
132
|
-
file_path = os.path.join(
|
|
133
|
-
self._profiling_dir,
|
|
134
|
-
file_name.format(self._device_id)
|
|
135
|
-
)
|
|
136
|
-
file_path = validate_and_normalize_path(file_path)
|
|
137
|
-
if not os.path.exists(file_path):
|
|
138
|
-
logger.critical(f"Failed to find parsed timeline file {file_path}.")
|
|
139
|
-
raise ProfilerFileNotFoundException('parsed timeline file')
|
|
140
|
-
|
|
141
|
-
return file_path
|
|
142
|
-
|
|
143
|
-
def _parse_timeline_data(self, timeline, min_cycle_counter):
|
|
144
|
-
"""Parse timeline data."""
|
|
145
|
-
# factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
|
|
146
|
-
factor = 1000
|
|
147
|
-
op_meta = TimelineContainer(timeline)
|
|
148
|
-
timeline_dict = {}
|
|
149
|
-
timeline_dict['name'] = op_meta.op_name.split('/')[-1]
|
|
150
|
-
timeline_dict['ph'] = 'X'
|
|
151
|
-
timeline_dict['tid'] = op_meta.stream_id
|
|
152
|
-
timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) / factor
|
|
153
|
-
dur = op_meta.duration
|
|
154
|
-
timeline_dict['dur'] = dur # unit is us
|
|
155
|
-
if op_meta.pid is None:
|
|
156
|
-
timeline_dict['pid'] = int(f'2{self._device_id}')
|
|
157
|
-
else:
|
|
158
|
-
timeline_dict['pid'] = op_meta.pid
|
|
159
|
-
if op_meta.stream_id == "Scope Name":
|
|
160
|
-
# remove the level of scope name which has a format like "0-conv2-Conv2d".
|
|
161
|
-
timeline_dict['name'] = "-".join(op_meta.op_name.split('-')[1:])
|
|
162
|
-
timeline_dict['scope_level'] = int(op_meta.op_name.split('-')[0])
|
|
163
|
-
elif op_meta.stream_id[:len(self._host_cpu_op_label)] == self._host_cpu_op_label:
|
|
164
|
-
timeline_dict['pid'] = self._HOST_CPU_PID
|
|
165
|
-
|
|
166
|
-
if len(timeline) > 4:
|
|
167
|
-
# len(timeline) > 4 refers to activity data, else op data.
|
|
168
|
-
# Add args for activity data
|
|
169
|
-
args_dict = {}
|
|
170
|
-
for ix, value in enumerate(timeline[4:]):
|
|
171
|
-
args_dict[self._activity_keys_list[ix]] = value
|
|
172
|
-
timeline_dict['args'] = args_dict
|
|
173
|
-
timeline_dict['tid'] = f"Stream #{timeline_dict.get('tid', '0')}"
|
|
174
|
-
elif op_meta.stream_id not in ["Scope Name", "Steps"]:
|
|
175
|
-
# Update total time of operator execution.
|
|
176
|
-
self._timeline_summary['total_time'] += dur / factor
|
|
177
|
-
self._timeline_summary['op_exe_times'] += 1
|
|
178
|
-
|
|
179
|
-
self._update_format_meta_data(timeline_dict)
|
|
180
|
-
self._timeline_meta.append(timeline_dict)
|
|
181
|
-
|
|
182
|
-
def _load_timeline_data(self, reduce_op_type):
|
|
183
|
-
"""Load timeline data from file."""
|
|
184
|
-
op_file_path = self._get_and_validate_path(
|
|
185
|
-
self._output_op_execute_time_file_path)
|
|
186
|
-
|
|
187
|
-
timeline_list, communication_info = self._load_op_data(op_file_path, reduce_op_type)
|
|
188
|
-
communication_info.sort(key=lambda x: float(x[2]))
|
|
189
|
-
# Add host cpu op timeline.
|
|
190
|
-
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id, self._model)
|
|
191
|
-
cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data()
|
|
192
|
-
if cpu_timeline_list:
|
|
193
|
-
self._clock_synchronize_to_gpu(cpu_timeline_list)
|
|
194
|
-
timeline_list.extend(cpu_timeline_list)
|
|
195
|
-
timeline_list.sort(key=lambda x: float(x[2]))
|
|
196
|
-
self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
|
|
197
|
-
self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
|
|
198
|
-
|
|
199
|
-
# Generate step time.
|
|
200
|
-
factor_start_time_uint_to_duration = 1e-3
|
|
201
|
-
self._set_step_start_and_end_op_name(timeline_list)
|
|
202
|
-
# Fit gpu kernel async launch solution.
|
|
203
|
-
if self.is_gpu_kernel_async_launch():
|
|
204
|
-
step_time_list = self._get_step_time_list_from_step_trace()
|
|
205
|
-
else:
|
|
206
|
-
step_time_list = self._get_step_time_list(timeline_list, factor_start_time_uint_to_duration)
|
|
207
|
-
|
|
208
|
-
# Add Scope Name.
|
|
209
|
-
default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default",
|
|
210
|
-
factor_start_time_uint_to_duration)
|
|
211
|
-
gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients",
|
|
212
|
-
factor_start_time_uint_to_duration)
|
|
213
|
-
recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default",
|
|
214
|
-
factor_start_time_uint_to_duration)
|
|
215
|
-
cuda_op_timeline = self._load_activity_data()
|
|
216
|
-
|
|
217
|
-
# Add AllReduce info to timeline temp list and sort by start time.
|
|
218
|
-
if communication_info:
|
|
219
|
-
logger.debug('Allreduce info found, Start adding info to timeline...')
|
|
220
|
-
cluster_related_timeline = self._get_cluster_timeline(
|
|
221
|
-
timeline_list, cuda_op_timeline[1], communication_info, step_time_list)
|
|
222
|
-
timeline_list.extend(cluster_related_timeline)
|
|
223
|
-
timeline_list.extend(communication_info)
|
|
224
|
-
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
225
|
-
|
|
226
|
-
timeline_list.extend(default_scope_name_time_list)
|
|
227
|
-
timeline_list.extend(gradient_scope_name_time_list)
|
|
228
|
-
timeline_list.extend(recompute_scope_name_time_list)
|
|
229
|
-
timeline_list.extend(step_time_list)
|
|
230
|
-
|
|
231
|
-
timeline_list.sort(key=lambda x: (float(x[self._start_time_idx])))
|
|
232
|
-
|
|
233
|
-
# Add cuda activity timeline.
|
|
234
|
-
timeline_list.extend(cuda_op_timeline[0])
|
|
235
|
-
timeline_list.sort(key=lambda x: float(x[2]))
|
|
236
|
-
|
|
237
|
-
return timeline_list
|
|
238
|
-
|
|
239
|
-
def _clock_synchronize_to_gpu(self, timeline_list):
|
|
240
|
-
"""Synchronize the timestamp from device to host."""
|
|
241
|
-
start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._device_id}.txt")
|
|
242
|
-
|
|
243
|
-
try:
|
|
244
|
-
with open(start_time_file_path) as f_obj:
|
|
245
|
-
lines = f_obj.readlines()
|
|
246
|
-
# lines[0] stores the host monotonic time of start training.
|
|
247
|
-
host_monotonic_start_time = int(lines[0].strip().split(':')[-1])
|
|
248
|
-
# lines[1] stores the gpu time of start training.
|
|
249
|
-
gpu_start_time = int(lines[1].strip().split(':')[-1])
|
|
250
|
-
except (IOError, OSError) as err:
|
|
251
|
-
logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
|
|
252
|
-
raise ProfilerIOException() from err
|
|
253
|
-
|
|
254
|
-
time_diff = gpu_start_time - host_monotonic_start_time
|
|
255
|
-
for idx, time_item in enumerate(timeline_list):
|
|
256
|
-
timeline_list[idx][self._start_time_idx] = int(time_item[self._start_time_idx]) + time_diff
|
|
257
|
-
|
|
258
|
-
def _load_op_data(self, op_file_path, reduce_op_type):
|
|
259
|
-
"""Load operator data from file"""
|
|
260
|
-
op_timeline_list = []
|
|
261
|
-
communication_info = []
|
|
262
|
-
try:
|
|
263
|
-
with open(op_file_path, 'r') as f_obj:
|
|
264
|
-
for line in f_obj:
|
|
265
|
-
self._timeline_summary['num_of_ops'] += 1
|
|
266
|
-
op_list = line.strip('\n').strip().split(';')
|
|
267
|
-
time_arr = op_list[-1]
|
|
268
|
-
time_arr = time_arr.split(" ")
|
|
269
|
-
for time in time_arr:
|
|
270
|
-
time = time.split(",")
|
|
271
|
-
line_list = op_list[:2] + time
|
|
272
|
-
communication_op_name = line_list[0].strip().split('/')[-1]
|
|
273
|
-
if communication_op_name not in reduce_op_type:
|
|
274
|
-
op_timeline_list.append(line_list)
|
|
275
|
-
else:
|
|
276
|
-
communication_info.append(line_list)
|
|
277
|
-
except (IOError, OSError) as err:
|
|
278
|
-
logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
|
|
279
|
-
raise ProfilerIOException() from err
|
|
280
|
-
|
|
281
|
-
return op_timeline_list, communication_info
|
|
282
|
-
|
|
283
|
-
def _load_activity_data(self):
|
|
284
|
-
"""Load activity data from file"""
|
|
285
|
-
activity_timeline_list = []
|
|
286
|
-
cuda_compute_ops_timeline_list = []
|
|
287
|
-
args_dict = {}
|
|
288
|
-
activity_file_path = self._get_and_validate_path(
|
|
289
|
-
self._output_activity_execute_time_file_path)
|
|
290
|
-
activity_args_file_path = self._get_and_validate_path(
|
|
291
|
-
self._output_gpu_activity_info_file_path)
|
|
292
|
-
|
|
293
|
-
if not os.path.exists(activity_args_file_path):
|
|
294
|
-
logger.error(f'The file {activity_args_file_path} does not exist.')
|
|
295
|
-
raise ProfilerFileNotFoundException(activity_args_file_path)
|
|
296
|
-
with open(activity_args_file_path, 'r') as args_file:
|
|
297
|
-
csv_reader = csv.reader(args_file)
|
|
298
|
-
keys_list = next(csv_reader)
|
|
299
|
-
# keys_list format is: name, type, op_full_name, stream_id, block_dim, grid_dim, ...
|
|
300
|
-
self._activity_keys_list = keys_list[1:3] + keys_list[4:6]
|
|
301
|
-
for info in csv_reader:
|
|
302
|
-
args_dict[info[0]] = info[1:3] + info[4:6]
|
|
303
|
-
|
|
304
|
-
if not os.path.exists(activity_file_path):
|
|
305
|
-
logger.error(f'The file {activity_file_path} does not exist.')
|
|
306
|
-
raise ProfilerFileNotFoundException(activity_file_path)
|
|
307
|
-
with open(activity_file_path, 'r') as f_obj:
|
|
308
|
-
for line in f_obj:
|
|
309
|
-
line_list = line.strip('\n').split(';')
|
|
310
|
-
# concat activity args info.
|
|
311
|
-
line_list += args_dict.get(line_list[0])
|
|
312
|
-
if not line_list[0].startswith('nccl'):
|
|
313
|
-
cuda_compute_ops_timeline_list.append(line_list)
|
|
314
|
-
activity_timeline_list.append(line_list)
|
|
315
|
-
|
|
316
|
-
return activity_timeline_list, cuda_compute_ops_timeline_list
|
|
317
|
-
|
|
318
|
-
def _get_step_time_list_from_step_trace(self):
|
|
319
|
-
"""Produce the time of each step based on step_trace_profiling file."""
|
|
320
|
-
# Record the time of each step.
|
|
321
|
-
step_time_list = []
|
|
322
|
-
step_start_op_name = []
|
|
323
|
-
step_end_op_name = []
|
|
324
|
-
step_num = 1
|
|
325
|
-
tid = "Steps"
|
|
326
|
-
step_trace_profiling_path = self._get_and_validate_path(
|
|
327
|
-
self._step_trace_original_filename
|
|
328
|
-
)
|
|
329
|
-
|
|
330
|
-
try:
|
|
331
|
-
with open(step_trace_profiling_path, 'r') as f_obj:
|
|
332
|
-
for line in f_obj:
|
|
333
|
-
line = line.strip().split()
|
|
334
|
-
step_start_op_name.append(line[0].split(',')[0])
|
|
335
|
-
step_end_op_name.append(line[3].split(',')[0])
|
|
336
|
-
cur_step_start_time = float(line[0].split(',')[1])
|
|
337
|
-
cur_step_end_time = float(line[3].split(',')[1])
|
|
338
|
-
# convert duration time unit from ns to us.
|
|
339
|
-
cur_step_duration_time = (cur_step_end_time - cur_step_start_time) / 1e3
|
|
340
|
-
step_time_item = [str(step_num), tid, cur_step_start_time, cur_step_duration_time]
|
|
341
|
-
step_time_list.append(step_time_item)
|
|
342
|
-
step_num += 1
|
|
343
|
-
except (IOError, OSError) as err:
|
|
344
|
-
logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
|
|
345
|
-
raise ProfilerIOException() from err
|
|
346
|
-
|
|
347
|
-
return step_time_list
|
|
348
|
-
|
|
349
|
-
def _get_cluster_timeline(self, timeline, activity_info, comm_info, step_info):
|
|
350
|
-
"""
|
|
351
|
-
Analyse the cluster communication and computation data, and write result to file.
|
|
352
|
-
|
|
353
|
-
To analyse the cluster performance bottleneck based on timeline, define the time of a training
|
|
354
|
-
step as "t_total", propose five metrics as follows:
|
|
355
|
-
1) The time that "receive" operators not overlapped by others(t1)
|
|
356
|
-
2) The time that is consumed inside the stage(t_total - t1)
|
|
357
|
-
3) The time that "communication" operators not overlapped by others(t2)
|
|
358
|
-
4) The time that consumed by computation(t_total - t2)
|
|
359
|
-
5) The time that "collective communication" operators not overlapped by others(t3)
|
|
360
|
-
In pipeline parallel mode, we can locate slow stage based on t_total - t1. Inside each stage,
|
|
361
|
-
we can locate slow card based on t_total - t2. The value of t1 indicates the degree that
|
|
362
|
-
communication time between stages slow down the training. The value of t3 indicates the degree
|
|
363
|
-
that communication inside each stage slow down the training.
|
|
364
|
-
"""
|
|
365
|
-
time_info = {
|
|
366
|
-
"stage_time": [], "computation_time": [], "recieve_alone_time": [], "comm_alone_time": [],
|
|
367
|
-
"collective_comm_alone_time": []
|
|
368
|
-
}
|
|
369
|
-
is_pipeline_parallel = False
|
|
370
|
-
comm_timeline = self._get_merged_time_list(
|
|
371
|
-
comm_info,
|
|
372
|
-
display_name="communication",
|
|
373
|
-
factor=1e-3
|
|
374
|
-
)
|
|
375
|
-
compute_op_timeline = timeline + activity_info
|
|
376
|
-
compute_op_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
377
|
-
compute_timeline = self._get_merged_time_list(
|
|
378
|
-
compute_op_timeline,
|
|
379
|
-
get_interval_time=True,
|
|
380
|
-
factor=1e-3
|
|
381
|
-
)
|
|
382
|
-
# Consider if the overlap will be 0 or not.
|
|
383
|
-
comm_not_overlapped_timeline = self._get_intersection_time(
|
|
384
|
-
compute_timeline[0],
|
|
385
|
-
comm_timeline[0]
|
|
386
|
-
)
|
|
387
|
-
|
|
388
|
-
# Process receive part.
|
|
389
|
-
all_timeline = timeline + comm_info
|
|
390
|
-
all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
391
|
-
receive_op_timeline = self._produce_two_separated_timeline(
|
|
392
|
-
all_timeline,
|
|
393
|
-
"Receive-op"
|
|
394
|
-
)[0]
|
|
395
|
-
if receive_op_timeline:
|
|
396
|
-
is_pipeline_parallel = True
|
|
397
|
-
receive_op_merged_timeline = self._get_merged_time_list(receive_op_timeline,
|
|
398
|
-
factor=1e-3)[0]
|
|
399
|
-
|
|
400
|
-
receive_op_not_overlapped_timeline = self._get_intersection_time(
|
|
401
|
-
compute_timeline[0],
|
|
402
|
-
receive_op_merged_timeline,
|
|
403
|
-
display_name="receive_op_not_overlapped"
|
|
404
|
-
)
|
|
405
|
-
|
|
406
|
-
# Process collective communication part.
|
|
407
|
-
collective_comm_timeline = self._produce_two_separated_timeline(
|
|
408
|
-
comm_info,
|
|
409
|
-
"Receive-op"
|
|
410
|
-
)[-1]
|
|
411
|
-
collective_comm_merged_timeline = self._get_merged_time_list(collective_comm_timeline,
|
|
412
|
-
factor=1e-3)[0]
|
|
413
|
-
collective_comm_not_overlapped_timeline = self._get_intersection_time(
|
|
414
|
-
compute_timeline[0],
|
|
415
|
-
collective_comm_merged_timeline,
|
|
416
|
-
display_name="exclude_receive_op"
|
|
417
|
-
)
|
|
418
|
-
|
|
419
|
-
# Generate free time that exclude computation and communication time.
|
|
420
|
-
all_timeline = compute_op_timeline + comm_info
|
|
421
|
-
all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
422
|
-
free_timeline = self._get_merged_time_list(
|
|
423
|
-
all_timeline,
|
|
424
|
-
get_interval_time=True,
|
|
425
|
-
display_name="free_time",
|
|
426
|
-
factor=1e-3
|
|
427
|
-
)[1]
|
|
428
|
-
|
|
429
|
-
# Compute these five metrics mentioned above per step.
|
|
430
|
-
time_info["recieve_alone_time"] = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
|
|
431
|
-
time_info["comm_alone_time"] = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
|
|
432
|
-
time_info["collective_comm_alone_time"] = self._compute_time_inside_step(
|
|
433
|
-
collective_comm_not_overlapped_timeline,
|
|
434
|
-
step_info
|
|
435
|
-
)
|
|
436
|
-
step_num = len(step_info)
|
|
437
|
-
for step in range(step_num):
|
|
438
|
-
try:
|
|
439
|
-
if is_pipeline_parallel:
|
|
440
|
-
time_info.get("stage_time").append(
|
|
441
|
-
step_info[step][self._duration_idx] - time_info.get("recieve_alone_time")[step]
|
|
442
|
-
)
|
|
443
|
-
except IndexError as e:
|
|
444
|
-
logger.error(e)
|
|
445
|
-
try:
|
|
446
|
-
time_info.get("computation_time").append(
|
|
447
|
-
step_info[step][self._duration_idx] - time_info.get("comm_alone_time")[step]
|
|
448
|
-
)
|
|
449
|
-
except IndexError as e:
|
|
450
|
-
logger.error(e)
|
|
451
|
-
|
|
452
|
-
metrices_per_step_list = [
|
|
453
|
-
time_info.get("computation_time"), time_info.get("comm_alone_time"),
|
|
454
|
-
time_info.get("stage_time"), time_info.get("recieve_alone_time"),
|
|
455
|
-
time_info.get("collective_comm_alone_time")
|
|
456
|
-
]
|
|
457
|
-
if step_num > 1:
|
|
458
|
-
for metric in metrices_per_step_list:
|
|
459
|
-
metric.append(sum(metric[1:]) / (step_num - 1))
|
|
460
|
-
try:
|
|
461
|
-
self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Gpu", self._device_id)
|
|
462
|
-
except (IOError, OSError) as err:
|
|
463
|
-
logger.warning(err)
|
|
464
|
-
raise ProfilerIOException from err
|
|
465
|
-
|
|
466
|
-
res_timeline = []
|
|
467
|
-
res_timeline.extend(comm_not_overlapped_timeline)
|
|
468
|
-
res_timeline.extend(compute_timeline[2])
|
|
469
|
-
res_timeline.extend(comm_timeline[2])
|
|
470
|
-
res_timeline.extend(free_timeline)
|
|
471
|
-
return res_timeline
|
|
472
|
-
|
|
473
|
-
def _compute_time_inside_step(self, metric_timeline, step_time_list):
|
|
474
|
-
"""Compute per step time of metric_timeline."""
|
|
475
|
-
per_step_time_list = []
|
|
476
|
-
step = 0
|
|
477
|
-
cur_step_metric_time = 0
|
|
478
|
-
factor_us_to_ns = 1e3
|
|
479
|
-
step_end_time = step_time_list[step][self._start_time_idx] + \
|
|
480
|
-
step_time_list[step][self._duration_idx] * factor_us_to_ns
|
|
481
|
-
for time_item in metric_timeline:
|
|
482
|
-
start_time = time_item[self._start_time_idx]
|
|
483
|
-
if start_time > step_end_time:
|
|
484
|
-
per_step_time_list.append(cur_step_metric_time)
|
|
485
|
-
step += 1
|
|
486
|
-
if step >= len(step_time_list):
|
|
487
|
-
logger.warning("Compute profiler compute_time_inside_step time, "
|
|
488
|
-
"find the data length is more than step count, "
|
|
489
|
-
"maybe current graph has multi sub graph, skip the last data.")
|
|
490
|
-
break
|
|
491
|
-
step_end_time = step_time_list[step][self._start_time_idx] + \
|
|
492
|
-
step_time_list[step][self._duration_idx] * factor_us_to_ns
|
|
493
|
-
cur_step_metric_time = 0
|
|
494
|
-
cur_step_metric_time += time_item[self._duration_idx]
|
|
495
|
-
per_step_time_list.append(cur_step_metric_time)
|
|
496
|
-
|
|
497
|
-
return per_step_time_list
|
|
498
|
-
|
|
499
|
-
def _get_intersection_time(self, first_time_list, second_time_list,
|
|
500
|
-
display_name="communication_not_overlapped"):
|
|
501
|
-
"""Get intersection time of two time list."""
|
|
502
|
-
first_list_idx, second_list_idx = 0, 0
|
|
503
|
-
first_list_len = len(first_time_list)
|
|
504
|
-
second_list_len = len(second_time_list)
|
|
505
|
-
intersection_segment_display_list = []
|
|
506
|
-
factor_ns_to_us = 1e-3
|
|
507
|
-
while first_list_idx < first_list_len and second_list_idx < second_list_len:
|
|
508
|
-
intersection_start = max(
|
|
509
|
-
first_time_list[first_list_idx][self._start_time_idx],
|
|
510
|
-
second_time_list[second_list_idx][self._start_time_idx]
|
|
511
|
-
)
|
|
512
|
-
intersection_end = min(
|
|
513
|
-
first_time_list[first_list_idx][self._duration_idx],
|
|
514
|
-
second_time_list[second_list_idx][self._duration_idx]
|
|
515
|
-
)
|
|
516
|
-
if intersection_start < intersection_end:
|
|
517
|
-
intersection_segment_display_list.append(
|
|
518
|
-
[display_name, self._tid_dict.get(display_name, ('',))[0],
|
|
519
|
-
intersection_start, (intersection_end - intersection_start) * factor_ns_to_us,
|
|
520
|
-
self._tid_dict.get(display_name, ('', ''))[1]]
|
|
521
|
-
)
|
|
522
|
-
if first_time_list[first_list_idx][self._duration_idx] >= \
|
|
523
|
-
second_time_list[second_list_idx][self._duration_idx]:
|
|
524
|
-
second_list_idx += 1
|
|
525
|
-
else:
|
|
526
|
-
first_list_idx += 1
|
|
527
|
-
|
|
528
|
-
return intersection_segment_display_list
|
|
529
|
-
|
|
530
|
-
def _produce_two_separated_timeline(self, timeline, op_name):
|
|
531
|
-
"""Produce two separated timeline based on op_name."""
|
|
532
|
-
timeline_include_op_name = []
|
|
533
|
-
timeline_exclude_op_name = []
|
|
534
|
-
for time_item in timeline:
|
|
535
|
-
if op_name in time_item[self._op_name_idx]:
|
|
536
|
-
timeline_include_op_name.append(time_item)
|
|
537
|
-
else:
|
|
538
|
-
timeline_exclude_op_name.append(time_item)
|
|
539
|
-
return timeline_include_op_name, timeline_exclude_op_name
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
class CpuTimelineGenerator(GpuTimelineGenerator):
|
|
543
|
-
"""Generate cpu Timeline data from file."""
|
|
544
|
-
_output_op_execute_time_file_path = "cpu_op_execute_timestamp_{}.txt"
|
|
545
|
-
_display_filename = 'cpu_timeline_display_{}.json'
|
|
546
|
-
_timeline_summary_filename = 'cpu_timeline_summary_{}.json'
|
|
547
|
-
|
|
548
|
-
def __init__(self, profiling_dir, device_id, model):
|
|
549
|
-
super().__init__(profiling_dir, device_id, 0, model)
|
|
550
|
-
self._device_target = DeviceTarget.CPU.value
|
|
551
|
-
|
|
552
|
-
def get_timeline_data(self):
|
|
553
|
-
"""Get timeline data from file."""
|
|
554
|
-
timeline_list = self.load_cpu_op_data()
|
|
555
|
-
factor_ns_to_ms = 1e6
|
|
556
|
-
factor_us_to_ms = 1e3
|
|
557
|
-
for time_item in timeline_list:
|
|
558
|
-
time_item[self._start_time_idx] = float(time_item[self._start_time_idx]) / factor_ns_to_ms
|
|
559
|
-
time_item[self._duration_idx] = float(time_item[self._duration_idx]) / factor_us_to_ms
|
|
560
|
-
|
|
561
|
-
return timeline_list
|
|
562
|
-
|
|
563
|
-
def init_timeline(self, pretty=False):
|
|
564
|
-
"""Init timeline metadata, adding all collected info."""
|
|
565
|
-
self._pretty = pretty
|
|
566
|
-
timeline_list = self._load_timeline_data()
|
|
567
|
-
|
|
568
|
-
# Init a dict for counting the num of streams.
|
|
569
|
-
stream_count_dict = {}
|
|
570
|
-
for timeline in timeline_list:
|
|
571
|
-
self._parse_timeline_data(timeline, 0)
|
|
572
|
-
# Updating the collection of streams.
|
|
573
|
-
if len(timeline) == 4:
|
|
574
|
-
self._update_num_of_streams(timeline, stream_count_dict)
|
|
575
|
-
|
|
576
|
-
# Update timeline summary info
|
|
577
|
-
self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
|
|
578
|
-
|
|
579
|
-
def load_cpu_op_data(self):
|
|
580
|
-
"""Load cpu operator data from file"""
|
|
581
|
-
op_file_path = self._get_and_validate_path(self._output_op_execute_time_file_path)
|
|
582
|
-
timeline_list = []
|
|
583
|
-
if not os.path.exists(op_file_path):
|
|
584
|
-
logger.info("No cpu operator info.")
|
|
585
|
-
return timeline_list
|
|
586
|
-
timeline_list = self._load_op_data(op_file_path)
|
|
587
|
-
factor_ms_to_us = 1e-3
|
|
588
|
-
for time_item in timeline_list:
|
|
589
|
-
time_item[self._duration_idx] = float(time_item[self._duration_idx]) / factor_ms_to_us
|
|
590
|
-
|
|
591
|
-
return timeline_list
|
|
592
|
-
|
|
593
|
-
def _get_and_validate_path(self, file_name):
|
|
594
|
-
"""Generate op or activity file path from file name, and validate this path."""
|
|
595
|
-
file_path = os.path.join(
|
|
596
|
-
self._profiling_dir,
|
|
597
|
-
file_name.format(self._device_id)
|
|
598
|
-
)
|
|
599
|
-
file_path = validate_and_normalize_path(file_path)
|
|
600
|
-
|
|
601
|
-
return file_path
|
|
602
|
-
|
|
603
|
-
def _load_op_data(self, op_file_path):
|
|
604
|
-
"""Load operator data from file"""
|
|
605
|
-
op_timeline_list = []
|
|
606
|
-
try:
|
|
607
|
-
with open(op_file_path, 'r') as f_obj:
|
|
608
|
-
for line in f_obj:
|
|
609
|
-
self._timeline_summary['num_of_ops'] += 1
|
|
610
|
-
op_list = line.strip('\n').strip().split(';')
|
|
611
|
-
time_arr = op_list[-1]
|
|
612
|
-
time_arr = time_arr.split(" ")
|
|
613
|
-
for time in time_arr:
|
|
614
|
-
time = time.split(",")
|
|
615
|
-
if len(time) == 3:
|
|
616
|
-
# for time value is [start_timestamp, duration, tid]
|
|
617
|
-
# line_list[1] would be like "HostCpuOps" + str(tid)
|
|
618
|
-
line_list = op_list[:1] + [op_list[1] + str(time[-1])] + time[:-1]
|
|
619
|
-
else:
|
|
620
|
-
# for time value is [start_timestamp, duration]
|
|
621
|
-
line_list = op_list[:2] + time
|
|
622
|
-
op_timeline_list.append(line_list)
|
|
623
|
-
except (IOError, OSError) as err:
|
|
624
|
-
logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
|
|
625
|
-
raise ProfilerIOException() from err
|
|
626
|
-
|
|
627
|
-
return op_timeline_list
|
|
628
|
-
|
|
629
|
-
def _load_timeline_data(self):
|
|
630
|
-
"""Load timeline data from file."""
|
|
631
|
-
timeline_list = self.load_cpu_op_data()
|
|
632
|
-
|
|
633
|
-
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
634
|
-
self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
|
|
635
|
-
self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
|
|
636
|
-
|
|
637
|
-
# Generate step time.
|
|
638
|
-
factor_start_time_uint_to_duration = 1e-3
|
|
639
|
-
self._set_step_start_and_end_op_name(timeline_list)
|
|
640
|
-
|
|
641
|
-
step_time_list = self._get_step_time_list(timeline_list, factor_start_time_uint_to_duration)
|
|
642
|
-
|
|
643
|
-
# Add merge compute time and free time
|
|
644
|
-
merge_compute_timeline = self._get_merged_time_list(
|
|
645
|
-
timeline_list, False, "computation_op", factor_start_time_uint_to_duration)[2]
|
|
646
|
-
free_time_timeline = self._get_merged_time_list(
|
|
647
|
-
timeline_list, True, "free_time", factor_start_time_uint_to_duration)[1]
|
|
648
|
-
|
|
649
|
-
# Add Scope Name.
|
|
650
|
-
default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default",
|
|
651
|
-
factor_start_time_uint_to_duration)
|
|
652
|
-
gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients",
|
|
653
|
-
factor_start_time_uint_to_duration)
|
|
654
|
-
recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default",
|
|
655
|
-
factor_start_time_uint_to_duration)
|
|
656
|
-
timeline_list.extend(default_scope_name_time_list)
|
|
657
|
-
timeline_list.extend(gradient_scope_name_time_list)
|
|
658
|
-
timeline_list.extend(recompute_scope_name_time_list)
|
|
659
|
-
timeline_list.extend(step_time_list)
|
|
660
|
-
|
|
661
|
-
timeline_list.sort(key=lambda x: (float(x[self._start_time_idx]), x[self._tid_idx]))
|
|
662
|
-
timeline_list.sort(key=lambda x: float(x[2]))
|
|
663
|
-
timeline_list.extend(merge_compute_timeline)
|
|
664
|
-
timeline_list.extend(free_time_timeline)
|
|
665
|
-
|
|
666
|
-
return timeline_list
|
|
667
|
-
|
|
668
|
-
def _parse_timeline_data(self, timeline, min_cycle_counter):
|
|
669
|
-
"""Parse timeline data."""
|
|
670
|
-
# factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
|
|
671
|
-
factor = 1000
|
|
672
|
-
op_meta = TimelineContainer(timeline)
|
|
673
|
-
timeline_info = {}
|
|
674
|
-
timeline_info['name'] = op_meta.op_name.split('/')[-1]
|
|
675
|
-
timeline_info['ph'] = 'X'
|
|
676
|
-
timeline_info['tid'] = op_meta.stream_id
|
|
677
|
-
timeline_info['ts'] = (op_meta.start_time - min_cycle_counter) / factor
|
|
678
|
-
dur = op_meta.duration
|
|
679
|
-
timeline_info['dur'] = dur
|
|
680
|
-
timeline_info['pid'] = int(self._device_id)
|
|
681
|
-
if op_meta.stream_id == "Scope Name":
|
|
682
|
-
# remove the level of scope name which has a format like "0-conv2-Conv2d".
|
|
683
|
-
timeline_info['name'] = "-".join(op_meta.op_name.split('-')[1:])
|
|
684
|
-
timeline_info['scope_level'] = int(op_meta.op_name.split('-')[0])
|
|
685
|
-
elif self._host_cpu_op_label == op_meta.stream_id[:len(self._host_cpu_op_label)]:
|
|
686
|
-
timeline_info['pid'] = self._HOST_CPU_PID
|
|
687
|
-
|
|
688
|
-
if len(timeline) == 5:
|
|
689
|
-
# len(timeline) == 5 refers to analyse data.
|
|
690
|
-
timeline_info["pid"] = op_meta.pid
|
|
691
|
-
elif op_meta.stream_id not in ["Scope Name", "Steps"]:
|
|
692
|
-
# Update total time of operator execution.
|
|
693
|
-
self._timeline_summary['total_time'] += dur / factor
|
|
694
|
-
self._timeline_summary['op_exe_times'] += 1
|
|
695
|
-
|
|
696
|
-
self._update_format_meta_data(timeline_info)
|
|
697
|
-
self._timeline_meta.append(timeline_info)
|