mindspore 2.6.0__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +36 -61
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +32 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +76 -15
- mindspore/common/api.py +193 -112
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +48 -83
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +38 -23
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +32 -2
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +208 -5
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +122 -98
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +325 -499
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
- mindspore/ops/auto_generate/gen_extend_func.py +1 -51
- mindspore/ops/auto_generate/gen_ops_def.py +463 -257
- mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +7 -94
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +21 -367
- mindspore/ops/function/nn_func.py +26 -41
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +0 -2
- mindspore/ops/functional_overload.py +463 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +235 -172
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +5 -6
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +36 -4
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +4 -2
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +12 -5
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +3 -1
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +1 -1
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +2 -2
- mindspore/parallel/transform_safetensors.py +462 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +21 -30
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +6 -2
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +71 -13
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +51 -33
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +329 -367
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,483 +0,0 @@
|
|
|
1
|
-
# Copyright 2022 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""The integrator for integrating parsed profiling files."""
|
|
16
|
-
import os
|
|
17
|
-
import stat
|
|
18
|
-
import csv
|
|
19
|
-
import json
|
|
20
|
-
|
|
21
|
-
from mindspore import context
|
|
22
|
-
from mindspore import log as logger
|
|
23
|
-
from mindspore.context import get_auto_parallel_context
|
|
24
|
-
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
|
|
25
|
-
from mindspore.profiler.parser.integrator import DeviceTarget
|
|
26
|
-
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
27
|
-
|
|
28
|
-
SIZE_LIMIT_DEFAULT = 20 * 1024 * 1024 # 20MB
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class BaseTimelineGenerator:
|
|
32
|
-
"""
|
|
33
|
-
Analyse timeline data from file.
|
|
34
|
-
"""
|
|
35
|
-
# AI Core Op pid is device_id
|
|
36
|
-
_AI_CPU_PID = 9000
|
|
37
|
-
_COMMUNICATION_OP_PID = 10000
|
|
38
|
-
_HOST_CPU_PID = 11000
|
|
39
|
-
_OP_OVERLAP_PID = 12000
|
|
40
|
-
|
|
41
|
-
_OP_GPU_ACTIVITY_PID = 13000
|
|
42
|
-
|
|
43
|
-
_RECEIVE_ALONE = 7997
|
|
44
|
-
_ALLREDUCE_ALONE = 7998
|
|
45
|
-
_MERGED_COMPUTATION_TID = 7999
|
|
46
|
-
_PURE_COMMUNICATION_TID = 8000
|
|
47
|
-
_MERGED_COMMUNICATION_TID = 8001
|
|
48
|
-
_FREE_TIME_TID = 8002
|
|
49
|
-
_STEPS_TID = 100000
|
|
50
|
-
_SCOPE_NAME_TID = 100001
|
|
51
|
-
_GPU_OP_TID = 100002
|
|
52
|
-
_HOST_CPU_OP_TID = 100003
|
|
53
|
-
_SINGLE_TID = 0
|
|
54
|
-
|
|
55
|
-
_STEPS_SORT_INDEX = -4
|
|
56
|
-
|
|
57
|
-
_output_timeline_data_file_path = 'output_timeline_data_{}.txt'
|
|
58
|
-
_timeline_meta = []
|
|
59
|
-
_format_meta_data_list = []
|
|
60
|
-
_thread_processed_list = []
|
|
61
|
-
|
|
62
|
-
_map_tid_name_to_int = {
|
|
63
|
-
"Steps": (-4, _STEPS_TID),
|
|
64
|
-
"Scope Name": (-3, _SCOPE_NAME_TID),
|
|
65
|
-
"GpuOps": (-2, _GPU_OP_TID),
|
|
66
|
-
"HostCpuOps": (-1, _HOST_CPU_OP_TID)
|
|
67
|
-
}
|
|
68
|
-
_timeline_summary = {
|
|
69
|
-
'total_time': 0,
|
|
70
|
-
'num_of_streams': 0,
|
|
71
|
-
'num_of_ops': 0,
|
|
72
|
-
'op_exe_times': 0,
|
|
73
|
-
'max_scope_name_num': 0,
|
|
74
|
-
}
|
|
75
|
-
_op_name_idx, _tid_idx, _start_time_idx, _duration_idx = 0, 1, 2, 3
|
|
76
|
-
_max_scope_name_num = 0
|
|
77
|
-
_host_cpu_op_label = 'Host CPU OP'
|
|
78
|
-
_gpu_op_label = "GPU Op"
|
|
79
|
-
_ascend_op_label = "Ascend Op"
|
|
80
|
-
_aicore_op_label = "AICORE OP"
|
|
81
|
-
_aicpu_op_label = "AICPU OP"
|
|
82
|
-
|
|
83
|
-
_device_id = 0
|
|
84
|
-
_rank_size = 1
|
|
85
|
-
_profiling_dir = ""
|
|
86
|
-
_timeline_summary_filename = ""
|
|
87
|
-
_display_filename = ""
|
|
88
|
-
_op_name_list = []
|
|
89
|
-
_device_target = DeviceTarget.ASCEND.value
|
|
90
|
-
_model = context.GRAPH_MODE
|
|
91
|
-
_framework_dir = "FRAMEWORK"
|
|
92
|
-
_op_range_name = "op_range_{}"
|
|
93
|
-
|
|
94
|
-
_col_names = ['op_name', 'stream_id', 'start_time', 'duration']
|
|
95
|
-
|
|
96
|
-
def __init__(self, device_target, model):
|
|
97
|
-
self._tid_dict = {
|
|
98
|
-
"computation_op": (self._MERGED_COMPUTATION_TID, self._OP_OVERLAP_PID),
|
|
99
|
-
"communication_not_overlapped": (self._PURE_COMMUNICATION_TID, self._OP_OVERLAP_PID),
|
|
100
|
-
"communication": (self._MERGED_COMMUNICATION_TID, self._OP_OVERLAP_PID),
|
|
101
|
-
"free_time": (self._FREE_TIME_TID, self._OP_OVERLAP_PID)
|
|
102
|
-
}
|
|
103
|
-
self._device_target = str(device_target).lower()
|
|
104
|
-
self._model = model
|
|
105
|
-
self._step_start_op_name = ""
|
|
106
|
-
self._step_end_op_name = ""
|
|
107
|
-
self._kernel_events = []
|
|
108
|
-
self._pretty = False
|
|
109
|
-
|
|
110
|
-
def get_kernel_event_list(self):
|
|
111
|
-
return self._kernel_events
|
|
112
|
-
|
|
113
|
-
@property
|
|
114
|
-
def indent(self):
|
|
115
|
-
indent = 1 if self._pretty else None
|
|
116
|
-
return indent
|
|
117
|
-
|
|
118
|
-
@staticmethod
|
|
119
|
-
def get_parallel_context():
|
|
120
|
-
"""Get parallel context."""
|
|
121
|
-
try:
|
|
122
|
-
parallel_mode, stage_num = get_auto_parallel_context("parallel_mode"), get_auto_parallel_context(
|
|
123
|
-
"pipeline_stages")
|
|
124
|
-
except RuntimeError:
|
|
125
|
-
logger.warning("[profiler] the feature of cluster bottleneck analyse "
|
|
126
|
-
"is not supported in offline parse mode.")
|
|
127
|
-
parallel_mode = "data_parallel"
|
|
128
|
-
stage_num = 1
|
|
129
|
-
if stage_num > 1:
|
|
130
|
-
parallel_mode = "pipeline-parallel"
|
|
131
|
-
elif parallel_mode != "data_parallel":
|
|
132
|
-
parallel_mode = "model-parallel"
|
|
133
|
-
else:
|
|
134
|
-
parallel_mode = "data-parallel"
|
|
135
|
-
return parallel_mode, stage_num
|
|
136
|
-
|
|
137
|
-
@staticmethod
|
|
138
|
-
def _update_num_of_streams(timeline, stream_count_dict):
|
|
139
|
-
"""Update number of streams."""
|
|
140
|
-
stream_id = timeline[1]
|
|
141
|
-
if stream_id in ["Steps", "Scope Name"]:
|
|
142
|
-
return
|
|
143
|
-
if stream_id not in stream_count_dict.keys():
|
|
144
|
-
stream_count_dict[stream_id] = 1
|
|
145
|
-
else:
|
|
146
|
-
stream_count_dict[stream_id] += 1
|
|
147
|
-
|
|
148
|
-
def get_thread_label_name(self):
|
|
149
|
-
"""Get process and thread config."""
|
|
150
|
-
device_process_label = self._get_device_process_label()
|
|
151
|
-
return [
|
|
152
|
-
{"name": "process_labels", "ph": "M", "pid": f'2{self._device_id}',
|
|
153
|
-
"args": {"labels": device_process_label}},
|
|
154
|
-
{"name": "process_labels", "ph": "M", "pid": self._AI_CPU_PID, "args": {"labels": self._aicpu_op_label}},
|
|
155
|
-
{"name": "process_labels", "ph": "M", "pid": self._COMMUNICATION_OP_PID,
|
|
156
|
-
"args": {"labels": "Communication Op"}},
|
|
157
|
-
{"name": "process_labels", "ph": "M", "pid": self._HOST_CPU_PID,
|
|
158
|
-
"args": {"labels": self._host_cpu_op_label}},
|
|
159
|
-
{"name": "process_labels", "ph": "M", "pid": self._OP_OVERLAP_PID,
|
|
160
|
-
"args": {"labels": "Op Overlap Analyse"}},
|
|
161
|
-
{"name": "process_labels", "ph": "M", "pid": self._OP_GPU_ACTIVITY_PID,
|
|
162
|
-
"args": {"labels": "Activity Op"}},
|
|
163
|
-
|
|
164
|
-
{"name": "process_sort_index", "ph": "M", "pid": f'2{self._device_id}', "args": {"sort_index": 2}},
|
|
165
|
-
{"name": "process_sort_index", "ph": "M", "pid": self._AI_CPU_PID, "args": {"sort_index": 10}},
|
|
166
|
-
{"name": "process_sort_index", "ph": "M", "pid": self._COMMUNICATION_OP_PID, "args": {"sort_index": 20}},
|
|
167
|
-
{"name": "process_sort_index", "ph": "M", "pid": self._HOST_CPU_PID, "args": {"sort_index": 30}},
|
|
168
|
-
{"name": "process_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "args": {"sort_index": 40}},
|
|
169
|
-
|
|
170
|
-
{"name": "thread_name", "ph": "M", "pid": self._HOST_CPU_PID, "tid": self._HOST_CPU_OP_TID,
|
|
171
|
-
"args": {"name": "Host CPU Op"}},
|
|
172
|
-
{"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMPUTATION_TID,
|
|
173
|
-
"args": {"name": "Merged Computation Op"}},
|
|
174
|
-
{"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._PURE_COMMUNICATION_TID,
|
|
175
|
-
"args": {"name": "Pure Communication Op"}},
|
|
176
|
-
{"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMMUNICATION_TID,
|
|
177
|
-
"args": {"name": "Merged Communication Op"}},
|
|
178
|
-
{"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._FREE_TIME_TID,
|
|
179
|
-
"args": {"name": "Free Time"}},
|
|
180
|
-
{"name": "thread_name", "ph": "M", "pid": f'2{self._device_id}', "tid": self._STEPS_TID,
|
|
181
|
-
"args": {"name": "Steps"}},
|
|
182
|
-
{"name": "thread_name", "ph": "M", "pid": f'2{self._device_id}', "tid": self._SINGLE_TID,
|
|
183
|
-
"args": {"name": "Ops"}},
|
|
184
|
-
|
|
185
|
-
{"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMPUTATION_TID,
|
|
186
|
-
"args": {"sort_index": self._MERGED_COMPUTATION_TID}},
|
|
187
|
-
{"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._PURE_COMMUNICATION_TID,
|
|
188
|
-
"args": {"sort_index": self._PURE_COMMUNICATION_TID}},
|
|
189
|
-
{"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMMUNICATION_TID,
|
|
190
|
-
"args": {"sort_index": self._MERGED_COMMUNICATION_TID}},
|
|
191
|
-
{"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._FREE_TIME_TID,
|
|
192
|
-
"args": {"sort_index": self._FREE_TIME_TID}},
|
|
193
|
-
{"name": "thread_sort_index", "ph": "M", "pid": f'2{self._device_id}', "tid": self._STEPS_TID,
|
|
194
|
-
"args": {"sort_index": self._STEPS_SORT_INDEX}},
|
|
195
|
-
]
|
|
196
|
-
|
|
197
|
-
def write_timeline(self):
|
|
198
|
-
"""Load data according to the parsed profiling files."""
|
|
199
|
-
# Write timeline to file.
|
|
200
|
-
logger.info('Writing timeline file...')
|
|
201
|
-
timeline_meta = self.write_timeline_to_json_by_limitation()
|
|
202
|
-
logger.info('Finished file writing!')
|
|
203
|
-
return timeline_meta
|
|
204
|
-
|
|
205
|
-
def write_timeline_to_json_by_limitation(self):
|
|
206
|
-
"""Write timeline to json by limitation."""
|
|
207
|
-
display_file_path = os.path.join(
|
|
208
|
-
self._profiling_dir,
|
|
209
|
-
self._display_filename
|
|
210
|
-
)
|
|
211
|
-
display_file_path = validate_and_normalize_path(display_file_path)
|
|
212
|
-
|
|
213
|
-
try:
|
|
214
|
-
timeline_data = self.get_thread_label_name()
|
|
215
|
-
for data in self._timeline_meta:
|
|
216
|
-
timeline_data.append(data)
|
|
217
|
-
if "scope_level" in data.keys():
|
|
218
|
-
self._max_scope_name_num = max(
|
|
219
|
-
self._max_scope_name_num, data["scope_level"] + 1)
|
|
220
|
-
|
|
221
|
-
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
222
|
-
json.dump(timeline_data, json_file, indent=self.indent)
|
|
223
|
-
os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
224
|
-
return self._timeline_meta
|
|
225
|
-
except (IOError, OSError) as err:
|
|
226
|
-
logger.critical('Error occurred when write timeline display file: %s', err)
|
|
227
|
-
raise ProfilerIOException() from err
|
|
228
|
-
|
|
229
|
-
def write_timeline_summary(self):
|
|
230
|
-
"""Write timeline summary to json."""
|
|
231
|
-
timeline_summary_file_path = os.path.join(
|
|
232
|
-
self._profiling_dir,
|
|
233
|
-
self._timeline_summary_filename
|
|
234
|
-
)
|
|
235
|
-
|
|
236
|
-
timeline_summary_file_path = validate_and_normalize_path(timeline_summary_file_path)
|
|
237
|
-
|
|
238
|
-
try:
|
|
239
|
-
with os.fdopen(os.open(timeline_summary_file_path,
|
|
240
|
-
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
241
|
-
json.dump(self._timeline_summary, json_file, indent=self.indent)
|
|
242
|
-
except (IOError, OSError) as err:
|
|
243
|
-
logger.critical('Error occurred when write timeline summary file: %s', err)
|
|
244
|
-
raise ProfilerIOException() from err
|
|
245
|
-
if os.path.exists(timeline_summary_file_path):
|
|
246
|
-
os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
247
|
-
|
|
248
|
-
def _get_device_process_label(self):
|
|
249
|
-
"""Get device process label."""
|
|
250
|
-
device_process_label = self._aicore_op_label
|
|
251
|
-
if self._device_target == DeviceTarget.ASCEND.value:
|
|
252
|
-
if self._model == context.GRAPH_MODE:
|
|
253
|
-
device_process_label = self._aicore_op_label
|
|
254
|
-
elif self._model == context.PYNATIVE_MODE:
|
|
255
|
-
device_process_label = self._ascend_op_label
|
|
256
|
-
elif self._device_target == DeviceTarget.GPU.value:
|
|
257
|
-
device_process_label = self._gpu_op_label
|
|
258
|
-
elif self._device_target == DeviceTarget.CPU.value:
|
|
259
|
-
device_process_label = self._host_cpu_op_label
|
|
260
|
-
return device_process_label
|
|
261
|
-
|
|
262
|
-
def _get_merged_time_list(self, time_list, get_interval_time=False, display_name="computation_op", factor=1):
|
|
263
|
-
"""
|
|
264
|
-
Get merged time segment list.
|
|
265
|
-
|
|
266
|
-
The process of merge is, for example, there is a list [[1,5], [2,6], [7,8]],
|
|
267
|
-
each items in this list contains a start_time and end_time,
|
|
268
|
-
the merged result is [[1,6], [7,8]].
|
|
269
|
-
"""
|
|
270
|
-
time_merged_segment_list = []
|
|
271
|
-
tid = self._tid_dict.get(display_name, (0, 0))[0]
|
|
272
|
-
pid = self._tid_dict.get(display_name, (0, 0))[1]
|
|
273
|
-
for time_item in time_list:
|
|
274
|
-
time_segment = list(map(float, time_item[self._start_time_idx:self._duration_idx + 1]))
|
|
275
|
-
time_segment[1] = time_segment[0] + time_segment[1] / factor
|
|
276
|
-
if not time_merged_segment_list or \
|
|
277
|
-
time_segment[0] > time_merged_segment_list[-1]:
|
|
278
|
-
time_merged_segment_list.extend(time_segment)
|
|
279
|
-
else:
|
|
280
|
-
time_merged_segment_list[-1] = max(
|
|
281
|
-
time_merged_segment_list[-1],
|
|
282
|
-
time_segment[1]
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
# merged_display_list data used for ui page.
|
|
286
|
-
merged_display_list = []
|
|
287
|
-
for i in range(len(time_merged_segment_list) // 2):
|
|
288
|
-
merged_display_list.append([display_name, tid, time_merged_segment_list[i * 2],
|
|
289
|
-
(time_merged_segment_list[i * 2 + 1] - time_merged_segment_list[
|
|
290
|
-
i * 2]) * factor, pid])
|
|
291
|
-
|
|
292
|
-
if get_interval_time:
|
|
293
|
-
time_merged_segment_list = time_merged_segment_list[1:-1]
|
|
294
|
-
|
|
295
|
-
# merged_res_list data used to compute overlap with other time_list.
|
|
296
|
-
merged_res_list = []
|
|
297
|
-
for i in range(len(time_merged_segment_list) // 2):
|
|
298
|
-
merged_res_list.append(
|
|
299
|
-
[display_name, tid, time_merged_segment_list[i * 2], time_merged_segment_list[i * 2 + 1], pid])
|
|
300
|
-
|
|
301
|
-
# interval_display_list is interval time used for ui page.
|
|
302
|
-
interval_display_list = []
|
|
303
|
-
for i in range(len(time_merged_segment_list) // 2):
|
|
304
|
-
interval_display_list.append([display_name, tid, time_merged_segment_list[i * 2],
|
|
305
|
-
(time_merged_segment_list[i * 2 + 1] - time_merged_segment_list[
|
|
306
|
-
i * 2]) * factor, pid])
|
|
307
|
-
|
|
308
|
-
return merged_res_list, interval_display_list, merged_display_list
|
|
309
|
-
|
|
310
|
-
def _update_format_meta_data(self, timeline_dict):
|
|
311
|
-
"""Update format meta data which control the display arrange and map the thread name."""
|
|
312
|
-
thread_name_meta_data = {
|
|
313
|
-
"name": "thread_name",
|
|
314
|
-
"pid": int(f'2{self._device_id}'),
|
|
315
|
-
"tid": 100000,
|
|
316
|
-
"ts": 0,
|
|
317
|
-
"ph": "M",
|
|
318
|
-
"cat": "__metadata",
|
|
319
|
-
"args": {
|
|
320
|
-
"name": "Steps"
|
|
321
|
-
}
|
|
322
|
-
}
|
|
323
|
-
tid_name = timeline_dict['tid']
|
|
324
|
-
sort_index = 0
|
|
325
|
-
|
|
326
|
-
if tid_name in self._map_tid_name_to_int:
|
|
327
|
-
sort_index, tid = self._map_tid_name_to_int.get(tid_name)
|
|
328
|
-
elif tid_name.startswith("Stream"):
|
|
329
|
-
tid = int(tid_name.split("#")[-1])
|
|
330
|
-
sort_index = tid
|
|
331
|
-
else:
|
|
332
|
-
return
|
|
333
|
-
|
|
334
|
-
if self._host_cpu_op_label == tid_name[:len(self._host_cpu_op_label)]:
|
|
335
|
-
thread_name_meta_data['pid'] = self._HOST_CPU_PID
|
|
336
|
-
|
|
337
|
-
thread_name_meta_data["tid"] = tid
|
|
338
|
-
thread_name_meta_data.get("args")["name"] = tid_name
|
|
339
|
-
self._format_meta_data_list.append(thread_name_meta_data)
|
|
340
|
-
|
|
341
|
-
thread_name_meta_data['name'] = "thread_sort_index"
|
|
342
|
-
thread_name_meta_data["args"] = {"sort_index": sort_index}
|
|
343
|
-
self._format_meta_data_list.append(thread_name_meta_data)
|
|
344
|
-
timeline_dict["tid"] = tid
|
|
345
|
-
|
|
346
|
-
if tid_name in self._thread_processed_list:
|
|
347
|
-
return
|
|
348
|
-
self._thread_processed_list.append(tid_name)
|
|
349
|
-
|
|
350
|
-
def _get_max_scope_name_num(self, timeline_list):
|
|
351
|
-
"""Get the max number of scope level from all operator."""
|
|
352
|
-
max_scope_name_num = 0
|
|
353
|
-
for time_item in timeline_list:
|
|
354
|
-
cur_scope_name_num = len(time_item[self._op_name_idx].split('/')) - 1
|
|
355
|
-
max_scope_name_num = max(cur_scope_name_num, max_scope_name_num)
|
|
356
|
-
|
|
357
|
-
return max_scope_name_num
|
|
358
|
-
|
|
359
|
-
def _get_scope_name_time_list(self, timeline_list, subgraph, factor_start_time_to_duration=1):
|
|
360
|
-
"""Produce the timeline of hierarchical scope name."""
|
|
361
|
-
# the key of scope_name_start_duration_dict is scope name, the value is a dict which store the
|
|
362
|
-
# start and end index of time_item in timeline_list.
|
|
363
|
-
scope_name_start_duration_dict = {}
|
|
364
|
-
scope_name_time_list = []
|
|
365
|
-
sort_idx = {"op_full_name_idx": 0, "scope_name_idx": 0, "invalid_idx": -1}
|
|
366
|
-
for idx, time_item in enumerate(timeline_list):
|
|
367
|
-
scope_name_list = time_item[sort_idx.get("op_full_name_idx")].split('/')[:-1]
|
|
368
|
-
# skip Default/InitDataSetQueue operator.
|
|
369
|
-
if time_item[sort_idx.get("op_full_name_idx")].startswith("Default/InitDataSetQueue"):
|
|
370
|
-
scope_name_list = []
|
|
371
|
-
# process scope name of subgraph(Default/Gradients/recompute_Default) only.
|
|
372
|
-
if scope_name_list and scope_name_list[0] != subgraph:
|
|
373
|
-
scope_name_list = []
|
|
374
|
-
# add the level of scope name, used to distinguish the same name at different scope level.
|
|
375
|
-
scope_name_list = [f"{scope_level}-{scope_name}"
|
|
376
|
-
for scope_level, scope_name in enumerate(scope_name_list)]
|
|
377
|
-
|
|
378
|
-
# update the start and end index of time_item according to current scope_name
|
|
379
|
-
for scope_name in scope_name_list:
|
|
380
|
-
if scope_name not in scope_name_start_duration_dict:
|
|
381
|
-
scope_name_start_duration_dict[scope_name] = {'start_item_idx': idx, 'end_item_idx': idx}
|
|
382
|
-
if scope_name_start_duration_dict.get(scope_name)['start_item_idx'] == sort_idx.get("invalid_idx"):
|
|
383
|
-
scope_name_start_duration_dict[scope_name] = {'start_item_idx': idx, 'end_item_idx': idx}
|
|
384
|
-
else:
|
|
385
|
-
scope_name_start_duration_dict.get(scope_name)['end_item_idx'] = idx
|
|
386
|
-
# if the key(scope name) in scope_name_start_duration_dict does not appear in scope_name_list,
|
|
387
|
-
# it means this key(scope name) is end and it is append to scope_name_time_list.
|
|
388
|
-
for key, val in scope_name_start_duration_dict.items():
|
|
389
|
-
if val['start_item_idx'] == sort_idx.get("invalid_idx"):
|
|
390
|
-
continue
|
|
391
|
-
if (key not in scope_name_list) \
|
|
392
|
-
or idx == (len(timeline_list) - 1) \
|
|
393
|
-
or time_item[sort_idx.get("op_full_name_idx")] == self._step_end_op_name:
|
|
394
|
-
start_time = timeline_list[val['start_item_idx']][self._start_time_idx]
|
|
395
|
-
duration = (float(timeline_list[val['end_item_idx']][self._start_time_idx]) - float(start_time)) * \
|
|
396
|
-
factor_start_time_to_duration + \
|
|
397
|
-
float(timeline_list[val['end_item_idx']][self._duration_idx])
|
|
398
|
-
scope_name_time_list.append([key, "Scope Name", start_time, duration])
|
|
399
|
-
scope_name_start_duration_dict.get(key)['start_item_idx'] = sort_idx.get("invalid_idx")
|
|
400
|
-
|
|
401
|
-
# x[scope_name_idx] is a scope name like "0-Default".
|
|
402
|
-
# if two element in scope_name_time_list have the same start time,
|
|
403
|
-
# the previous element in list will displayed at the higher line in UI page.
|
|
404
|
-
scope_name_time_list.sort(
|
|
405
|
-
key=lambda x: (float(x[self._start_time_idx]), int(x[sort_idx.get("scope_name_idx")].split('-')[0]))
|
|
406
|
-
)
|
|
407
|
-
|
|
408
|
-
return scope_name_time_list
|
|
409
|
-
|
|
410
|
-
def _set_step_start_and_end_op_name(self, timeline_list):
|
|
411
|
-
"""Set the start and end operator full name of each step."""
|
|
412
|
-
if not timeline_list:
|
|
413
|
-
return
|
|
414
|
-
start_op_idx = 0
|
|
415
|
-
if timeline_list[0][self._op_name_idx].startswith("Default/InitDataSetQueue"):
|
|
416
|
-
start_op_idx = 1
|
|
417
|
-
self._step_start_op_name = timeline_list[start_op_idx][self._op_name_idx]
|
|
418
|
-
self._step_end_op_name = self._step_start_op_name
|
|
419
|
-
if len(timeline_list) > (start_op_idx + 1):
|
|
420
|
-
for time_item in timeline_list[start_op_idx + 1:]:
|
|
421
|
-
if time_item[self._op_name_idx] != self._step_start_op_name:
|
|
422
|
-
self._step_end_op_name = time_item[self._op_name_idx]
|
|
423
|
-
else:
|
|
424
|
-
break
|
|
425
|
-
|
|
426
|
-
def _get_step_time_list(self, timeline_list, factor_start_time_to_duration=1):
|
|
427
|
-
"""Produce the time of each step."""
|
|
428
|
-
# Record the time of each step.
|
|
429
|
-
step_time_list = []
|
|
430
|
-
step_num = 1
|
|
431
|
-
tid = "Steps"
|
|
432
|
-
cur_step_start_time, cur_step_duration_time = 0, 0
|
|
433
|
-
for time_item in timeline_list:
|
|
434
|
-
if time_item[self._op_name_idx] == self._step_start_op_name:
|
|
435
|
-
cur_step_start_time = time_item[self._start_time_idx]
|
|
436
|
-
if time_item[self._op_name_idx] == self._step_end_op_name:
|
|
437
|
-
cur_step_duration_time = (float(time_item[self._start_time_idx]) - float(cur_step_start_time)) * \
|
|
438
|
-
float(factor_start_time_to_duration) + float(time_item[self._duration_idx])
|
|
439
|
-
step_time_item = [str(step_num), tid, float(cur_step_start_time), cur_step_duration_time]
|
|
440
|
-
step_time_list.append(step_time_item)
|
|
441
|
-
step_num += 1
|
|
442
|
-
|
|
443
|
-
return step_time_list
|
|
444
|
-
|
|
445
|
-
def _write_cluster_metrices(self, metrices, is_pipeline_parallel, device_target, dev_id):
|
|
446
|
-
"""Write cluster metric."""
|
|
447
|
-
# Note that the feature of cluster bottleneck analyse is not supported in offline parse mode,
|
|
448
|
-
# due to that parallel context is not set.
|
|
449
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
450
|
-
return
|
|
451
|
-
parallel_mode, stage_num = BaseTimelineGenerator.get_parallel_context()
|
|
452
|
-
|
|
453
|
-
unit = 1 if device_target == "Ascend" else 1e3
|
|
454
|
-
time_decimal_digits = 4
|
|
455
|
-
cluster_analyse_file_path = os.path.join(
|
|
456
|
-
self._profiling_dir,
|
|
457
|
-
self._cluster_analyse_filename.format(parallel_mode, stage_num, self._rank_size, dev_id)
|
|
458
|
-
)
|
|
459
|
-
cluster_analyse_file_path = validate_and_normalize_path(cluster_analyse_file_path)
|
|
460
|
-
|
|
461
|
-
with os.fdopen(os.open(cluster_analyse_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
462
|
-
'w') as file_handle:
|
|
463
|
-
csv_writer = csv.writer(file_handle)
|
|
464
|
-
if is_pipeline_parallel:
|
|
465
|
-
header = [
|
|
466
|
-
'computation_time', 'communication_alone_time', 'stage_time',
|
|
467
|
-
'receive_alone_time', 'collective_communication_alone_time'
|
|
468
|
-
]
|
|
469
|
-
zip_metrices = zip(metrices[0], metrices[1], metrices[2], metrices[3], metrices[4])
|
|
470
|
-
else:
|
|
471
|
-
header = ['computation_time', 'communication_alone_time']
|
|
472
|
-
zip_metrices = zip(metrices[0], metrices[1])
|
|
473
|
-
csv_writer.writerow(header)
|
|
474
|
-
for row_data in zip_metrices:
|
|
475
|
-
row_data = [round(val / unit, time_decimal_digits) for val in row_data]
|
|
476
|
-
csv_writer.writerow(row_data)
|
|
477
|
-
os.chmod(cluster_analyse_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
478
|
-
|
|
479
|
-
def _register_op_name(self, timeline_list):
|
|
480
|
-
"""Register op name to op name list."""
|
|
481
|
-
for timeline in timeline_list:
|
|
482
|
-
if timeline and timeline[self._op_name_idx] not in self._op_name_list:
|
|
483
|
-
self._op_name_list.append(timeline[self._op_name_idx])
|