mindspore 2.6.0__cp310-cp310-win_amd64.whl → 2.7.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +64 -83
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +47 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +177 -52
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +338 -208
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +84 -133
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +47 -38
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +69 -23
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +425 -19
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +125 -101
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +488 -620
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +2 -4
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
- mindspore/ops/auto_generate/gen_extend_func.py +5 -55
- mindspore/ops/auto_generate/gen_ops_def.py +753 -273
- mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +17 -100
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +65 -399
- mindspore/ops/function/nn_func.py +44 -61
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +486 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +4 -50
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +244 -175
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +6 -7
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +52 -11
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +9 -17
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +33 -12
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +5 -1
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +7 -6
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +9 -23
- mindspore/parallel/transform_safetensors.py +468 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +22 -30
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +9 -5
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +72 -18
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +49 -47
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -1,800 +0,0 @@
|
|
|
1
|
-
# Copyright 2021 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""The analyzer for MindData profiling files."""
|
|
16
|
-
import copy
|
|
17
|
-
import csv
|
|
18
|
-
import json
|
|
19
|
-
import os
|
|
20
|
-
import stat
|
|
21
|
-
|
|
22
|
-
from mindspore.profiler.common.exceptions.exceptions import \
|
|
23
|
-
ProfilerPathErrorException, ProfilerFileNotFoundException, \
|
|
24
|
-
ProfilerDirNotFoundException, ProfilerRawFileException
|
|
25
|
-
from mindspore import log as logger
|
|
26
|
-
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class MinddataProfilingAnalyzer:
|
|
30
|
-
"""
|
|
31
|
-
The analyzer for MindData profiling files.
|
|
32
|
-
|
|
33
|
-
Args:
|
|
34
|
-
source_dir (str): The source directory for MindData profiling input files.
|
|
35
|
-
device_id (str): The device ID.
|
|
36
|
-
output_path (str): The target directory for the analyzed summary. Default: `./`.
|
|
37
|
-
|
|
38
|
-
Raises:
|
|
39
|
-
ProfilerPathErrorException: If the source directory or the output path is invalid.
|
|
40
|
-
ProfilerDirNotFoundException: If the source directory or the output path does not exist.
|
|
41
|
-
ProfilerFileNotFoundException: If any of the MindData profiling input files do not exist.
|
|
42
|
-
"""
|
|
43
|
-
|
|
44
|
-
def __init__(self, source_dir, device_id, output_path='./', pretty=False):
|
|
45
|
-
# Validate and save input parameters
|
|
46
|
-
self._device_id = device_id
|
|
47
|
-
self._source_dir = self._validate_directory(source_dir, 'Source directory')
|
|
48
|
-
self._output_path = self._validate_directory(output_path, 'Output path')
|
|
49
|
-
|
|
50
|
-
# Get MindData profiling input filenames
|
|
51
|
-
self._pipeline_path_filename = self._get_pipeline_path_filename(source_dir)
|
|
52
|
-
self._cpu_utilization_path_filename = self._get_cpu_utilization_path_filename(source_dir)
|
|
53
|
-
self._device_trace_path_filename, self._device_queue_file_found = \
|
|
54
|
-
self._get_device_trace_path_filename(source_dir)
|
|
55
|
-
|
|
56
|
-
# Save output filename
|
|
57
|
-
self._save_path = self._get_save_path(output_path)
|
|
58
|
-
self._pretty = pretty
|
|
59
|
-
|
|
60
|
-
@property
|
|
61
|
-
def indent(self):
|
|
62
|
-
indent = 1 if self._pretty else None
|
|
63
|
-
return indent
|
|
64
|
-
|
|
65
|
-
@property
|
|
66
|
-
def save_path(self):
|
|
67
|
-
"""
|
|
68
|
-
The property of save path.
|
|
69
|
-
|
|
70
|
-
Returns:
|
|
71
|
-
str, the save path.
|
|
72
|
-
"""
|
|
73
|
-
return self._save_path
|
|
74
|
-
|
|
75
|
-
@staticmethod
|
|
76
|
-
def _validate_directory(dir_name, dir_type):
|
|
77
|
-
"""
|
|
78
|
-
Validate the input directory.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
dir_name (str): The directory name.
|
|
82
|
-
dir_type (str): The type of directory. (Should begin with capital since is used for output messages.)
|
|
83
|
-
"""
|
|
84
|
-
try:
|
|
85
|
-
validated_dir = validate_and_normalize_path(dir_name)
|
|
86
|
-
except RuntimeError as path_error:
|
|
87
|
-
logger.warning('<%s> is invalid.', dir_type)
|
|
88
|
-
raise ProfilerPathErrorException(dir_type + ' is invalid.') from path_error
|
|
89
|
-
|
|
90
|
-
if not os.path.isdir(validated_dir):
|
|
91
|
-
logger.warning('<%s> <%s> not found.', dir_type, validated_dir)
|
|
92
|
-
raise ProfilerDirNotFoundException(validated_dir)
|
|
93
|
-
return validated_dir
|
|
94
|
-
|
|
95
|
-
@staticmethod
|
|
96
|
-
def _parse_pipeline_metrics_info(metrics):
|
|
97
|
-
"""
|
|
98
|
-
Parse and process the pipeline profiling metrics information for a given op.
|
|
99
|
-
|
|
100
|
-
Args:
|
|
101
|
-
metrics (dict): The pipeline profiling metrics information for a given op.
|
|
102
|
-
|
|
103
|
-
Returns:
|
|
104
|
-
List with the following analyzed metrics information:
|
|
105
|
-
output queue size
|
|
106
|
-
output queue length
|
|
107
|
-
output queue average size,
|
|
108
|
-
output queue utilization percentage
|
|
109
|
-
output queue empty frequency percentage
|
|
110
|
-
"""
|
|
111
|
-
# Note: Some ops like DeviceQueue and inline ops do not have metrics information
|
|
112
|
-
queue_size = -1
|
|
113
|
-
queue_length = -1
|
|
114
|
-
queue_average_size = -1
|
|
115
|
-
queue_utilization_pct = -1
|
|
116
|
-
queue_empty_freq_pct = -1
|
|
117
|
-
if metrics and metrics['output_queue']:
|
|
118
|
-
queue_size = metrics['output_queue']['size']
|
|
119
|
-
queue_length = metrics['output_queue']['length']
|
|
120
|
-
queue_average_size = round(sum(queue_size) / len(queue_size), 2) if queue_size else -1
|
|
121
|
-
queue_utilization_pct = round(100 * queue_average_size / queue_length, 2) if queue_length else -1
|
|
122
|
-
# Compute percentage of time queue is empty
|
|
123
|
-
empty_count = 0
|
|
124
|
-
for q_size in queue_size:
|
|
125
|
-
if q_size == 0:
|
|
126
|
-
empty_count += 1
|
|
127
|
-
queue_empty_freq_pct = round(100 * empty_count / len(queue_size), 2) if queue_size else -1
|
|
128
|
-
return [queue_size, queue_length, queue_average_size, queue_utilization_pct, queue_empty_freq_pct]
|
|
129
|
-
|
|
130
|
-
@staticmethod
|
|
131
|
-
def _parse_cpu_util_info(cpu_util_info):
|
|
132
|
-
"""
|
|
133
|
-
Parse and process the CPU profiling information.
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
cpu_util_info (dict): The CPU utilization profiling information.
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
Dictionary with analyzed summary output information
|
|
140
|
-
Dictionary consists of:
|
|
141
|
-
avg_cpu_pct: Average CPU utilization percentage for each op, a list ordered by increasing op id
|
|
142
|
-
|
|
143
|
-
Raises:
|
|
144
|
-
ProfilerRawFileException: If the format of the input is wrong.
|
|
145
|
-
"""
|
|
146
|
-
# Perform sanity checks for CPU utilization information
|
|
147
|
-
cpu_processor_num = cpu_util_info.get('cpu_processor_num')
|
|
148
|
-
cpu_op_info = cpu_util_info.get('op_info')
|
|
149
|
-
if cpu_processor_num is None or not cpu_op_info:
|
|
150
|
-
raise ProfilerRawFileException('The format of MindData CPU utilization JSON file is wrong.')
|
|
151
|
-
|
|
152
|
-
for item in cpu_op_info:
|
|
153
|
-
if not item:
|
|
154
|
-
raise ProfilerRawFileException('The contents of MindData CPU utilization JSON file is wrong.')
|
|
155
|
-
|
|
156
|
-
# Parse and process the following CPU utilization information:
|
|
157
|
-
# - overage cpu utilization for each op
|
|
158
|
-
dict_opid_cpuutil = {}
|
|
159
|
-
for op in cpu_util_info["op_info"]:
|
|
160
|
-
# Note: The CPU utilization data may have an extra entry with op_id=-1
|
|
161
|
-
# Omit info for op_id=1
|
|
162
|
-
if op["op_id"] != -1:
|
|
163
|
-
op_sys, op_usr = op["metrics"]["sys_utilization"], op["metrics"]["user_utilization"]
|
|
164
|
-
dict_opid_cpuutil[op["op_id"]] = [op_sys[i] + op_usr[i] for i in range(len(op_sys))]
|
|
165
|
-
|
|
166
|
-
# Initialize oplist_avg_cpu_pct with -1 for each pipeline op, since
|
|
167
|
-
# CPU utilization data may not have information for each pipeline op
|
|
168
|
-
oplist_avg_cpu_pct = [-1] * len(dict_opid_cpuutil)
|
|
169
|
-
total_cpu = 0
|
|
170
|
-
for op_id, cpu in dict_opid_cpuutil.items():
|
|
171
|
-
op_avg_cpu_pct = sum(cpu) / len(cpu) if cpu else 0
|
|
172
|
-
oplist_avg_cpu_pct[op_id] = round(op_avg_cpu_pct, 2)
|
|
173
|
-
total_cpu += op_avg_cpu_pct
|
|
174
|
-
|
|
175
|
-
return_dict = {}
|
|
176
|
-
return_dict['avg_cpu_pct'] = oplist_avg_cpu_pct
|
|
177
|
-
return return_dict
|
|
178
|
-
|
|
179
|
-
@staticmethod
|
|
180
|
-
def _compute_composite_info(summary_dict):
|
|
181
|
-
"""
|
|
182
|
-
Compute composite analysis information from the current summary pipeline data.
|
|
183
|
-
|
|
184
|
-
Args:
|
|
185
|
-
summary_dict (dict): Input summary pipeline information.
|
|
186
|
-
|
|
187
|
-
Returns:
|
|
188
|
-
Dictionary with composite analysis output information
|
|
189
|
-
Dictionary consists of:
|
|
190
|
-
avg_cpu_pct_per_worker: Average CPU utilization percentage per worker
|
|
191
|
-
"""
|
|
192
|
-
return_dict = {}
|
|
193
|
-
|
|
194
|
-
# Build list: average CPU utilization percentage per worker - for each op
|
|
195
|
-
avg_cpu_pct_per_worker = []
|
|
196
|
-
for c, n in zip(summary_dict.get('avg_cpu_pct'), summary_dict.get('num_workers')):
|
|
197
|
-
avg_cpu_pct_per_worker.append(round(c / n if (n != 0 and c >= 0) else -1, 2))
|
|
198
|
-
return_dict['avg_cpu_pct_per_worker'] = avg_cpu_pct_per_worker
|
|
199
|
-
|
|
200
|
-
return return_dict
|
|
201
|
-
|
|
202
|
-
@staticmethod
|
|
203
|
-
def _analyze_for_bottleneck_op(summary_dict):
|
|
204
|
-
"""
|
|
205
|
-
Analyze the MindData summary information and identify any potential bottleneck operator
|
|
206
|
-
in the MindData pipeline.
|
|
207
|
-
|
|
208
|
-
Args:
|
|
209
|
-
summary_dict (dict): Input summary pipeline information.
|
|
210
|
-
|
|
211
|
-
Returns:
|
|
212
|
-
Dictionary with the following information, if applicable:
|
|
213
|
-
- CPU utilization analysis
|
|
214
|
-
- queue utilization analysis
|
|
215
|
-
- bottleneck warning: Information on the bottleneck op
|
|
216
|
-
(This is returned only if a potential bottleneck is identified.)
|
|
217
|
-
- bottleneck suggestion: Reason why the subject op is it is identified as
|
|
218
|
-
a potential bottleneck, plus suggestion on how to resolve the bottleneck.
|
|
219
|
-
(This is returned only if a potential bottleneck is identified.)
|
|
220
|
-
"""
|
|
221
|
-
try:
|
|
222
|
-
bottleneck_analyzer = BottleneckAnalyzer(summary_dict)
|
|
223
|
-
return_dict = bottleneck_analyzer.analyze()
|
|
224
|
-
except IndexError:
|
|
225
|
-
return_dict = {}
|
|
226
|
-
|
|
227
|
-
return return_dict
|
|
228
|
-
|
|
229
|
-
def analyze(self):
|
|
230
|
-
"""
|
|
231
|
-
Analyze the MindData profiling files, produce summary pipeline information, including potential
|
|
232
|
-
bottleneck operator in the MindData pipeline, and save the result to disk.
|
|
233
|
-
|
|
234
|
-
Returns:
|
|
235
|
-
dict, Analyzed MindData pipeline summary information, which is also written to disk in
|
|
236
|
-
JSON file 'minddata_pipeline_summary_<device_id>.json' and
|
|
237
|
-
CSV file 'minddata_pipeline_summary_<device_id>.csv'.
|
|
238
|
-
|
|
239
|
-
Raises:
|
|
240
|
-
ProfilerRawFileException: If fails to find a MindData profiling file or a file is empty.
|
|
241
|
-
"""
|
|
242
|
-
|
|
243
|
-
# Open the MindData pipeline file
|
|
244
|
-
with open(self._pipeline_path_filename, 'r') as pipeline_file:
|
|
245
|
-
try:
|
|
246
|
-
pipeline_info = json.load(pipeline_file)
|
|
247
|
-
except (json.JSONDecodeError, TypeError) as path_filename_error:
|
|
248
|
-
logger.warning(path_filename_error)
|
|
249
|
-
raise ProfilerRawFileException(
|
|
250
|
-
'Failed to find the MindData pipeline profiling file.') from path_filename_error
|
|
251
|
-
if not pipeline_info:
|
|
252
|
-
logger.warning('The MindData pipeline file <%s> is empty.', self._pipeline_path_filename)
|
|
253
|
-
raise ProfilerRawFileException('The MindData pipeline file is empty.')
|
|
254
|
-
|
|
255
|
-
# Open the CPU utilization file
|
|
256
|
-
with open(self._cpu_utilization_path_filename, 'r') as cpu_util_file:
|
|
257
|
-
try:
|
|
258
|
-
cpu_util_info = json.load(cpu_util_file)
|
|
259
|
-
except (json.JSONDecodeError, TypeError) as path_filename_error:
|
|
260
|
-
logger.warning(path_filename_error)
|
|
261
|
-
raise ProfilerRawFileException(
|
|
262
|
-
'Failed to find the MindData CPU utilization file.') from path_filename_error
|
|
263
|
-
if not cpu_util_info:
|
|
264
|
-
logger.warning('The MindData CPU utilization file <%s> is empty.', self._cpu_utilization_path_filename)
|
|
265
|
-
raise ProfilerRawFileException('The MindData CPU utilization file is empty.')
|
|
266
|
-
|
|
267
|
-
# Open the device queue or dataset iterator trace profiling file
|
|
268
|
-
with open(self._device_trace_path_filename, 'r') as device_trace_file:
|
|
269
|
-
try:
|
|
270
|
-
device_trace_info = device_trace_file.readlines()
|
|
271
|
-
except (TypeError) as path_filename_error:
|
|
272
|
-
logger.warning(path_filename_error)
|
|
273
|
-
raise ProfilerRawFileException(
|
|
274
|
-
'Failed to find the MindData trace profiling file.') from path_filename_error
|
|
275
|
-
if not device_trace_info:
|
|
276
|
-
logger.warning('The MindData trace profiling file <%s> is empty.', self._device_trace_path_filename)
|
|
277
|
-
raise ProfilerRawFileException('The MindData trace profiling file is empty.')
|
|
278
|
-
|
|
279
|
-
# Analyze the MindData profiling file information and save the result
|
|
280
|
-
summary_dict = self._analyze_and_save(pipeline_info, cpu_util_info, device_trace_info)
|
|
281
|
-
return summary_dict
|
|
282
|
-
|
|
283
|
-
def _get_pipeline_path_filename(self, source_dir):
|
|
284
|
-
"""
|
|
285
|
-
Get the MindData pipeline full path filename.
|
|
286
|
-
The filename is 'pipeline_profiling_<device_id>.json'.
|
|
287
|
-
|
|
288
|
-
Args:
|
|
289
|
-
source_dir (str): The source directory for MindData profiling files.
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
str, the MindData pipeline full path filename.
|
|
293
|
-
"""
|
|
294
|
-
|
|
295
|
-
pipeline_profiling_templatename = 'pipeline_profiling_{}.json'
|
|
296
|
-
pipeline_path_filename = os.path.join(
|
|
297
|
-
source_dir,
|
|
298
|
-
pipeline_profiling_templatename.format(self._device_id))
|
|
299
|
-
|
|
300
|
-
try:
|
|
301
|
-
pipeline_path_filename = validate_and_normalize_path(pipeline_path_filename)
|
|
302
|
-
except RuntimeError as path_filename_error:
|
|
303
|
-
logger.warning('The MindData pipeline path %s is invalid.', pipeline_path_filename)
|
|
304
|
-
raise ProfilerPathErrorException('The MindData pipeline path is invalid.') from path_filename_error
|
|
305
|
-
|
|
306
|
-
if not os.path.isfile(pipeline_path_filename):
|
|
307
|
-
logger.warning('The MindData pipeline file <%s> is not found.', pipeline_path_filename)
|
|
308
|
-
raise ProfilerFileNotFoundException(pipeline_path_filename)
|
|
309
|
-
|
|
310
|
-
return pipeline_path_filename
|
|
311
|
-
|
|
312
|
-
def _get_cpu_utilization_path_filename(self, source_dir):
|
|
313
|
-
"""
|
|
314
|
-
Get the MindData CPU utilization full path filename.
|
|
315
|
-
The filename is 'minddata_cpu_utilization_<device_id>.json'.
|
|
316
|
-
|
|
317
|
-
Args:
|
|
318
|
-
source_dir (str): The source directory for MindData profiling files.
|
|
319
|
-
|
|
320
|
-
Returns:
|
|
321
|
-
str, the MindData CPU utilization full path filename.
|
|
322
|
-
"""
|
|
323
|
-
cpu_utilization_templatename = 'minddata_cpu_utilization_{}.json'
|
|
324
|
-
cpu_utilization_path_filename = os.path.join(
|
|
325
|
-
source_dir,
|
|
326
|
-
cpu_utilization_templatename.format(self._device_id))
|
|
327
|
-
|
|
328
|
-
try:
|
|
329
|
-
cpu_utilization_path_filename = validate_and_normalize_path(cpu_utilization_path_filename)
|
|
330
|
-
except RuntimeError as path_filename_error:
|
|
331
|
-
logger.warning('The MindData CPU utilization path <%s> is invalid.', cpu_utilization_path_filename)
|
|
332
|
-
raise ProfilerPathErrorException('The MindData CPU utilization path is invalid.') from path_filename_error
|
|
333
|
-
|
|
334
|
-
if not os.path.isfile(cpu_utilization_path_filename):
|
|
335
|
-
logger.warning('The MindData CPU utilization file <%s> is not found.', cpu_utilization_path_filename)
|
|
336
|
-
raise ProfilerFileNotFoundException(cpu_utilization_path_filename)
|
|
337
|
-
|
|
338
|
-
return cpu_utilization_path_filename
|
|
339
|
-
|
|
340
|
-
def _get_device_trace_path_filename(self, source_dir):
|
|
341
|
-
"""
|
|
342
|
-
Get the MindData device trace profiling full path filename.
|
|
343
|
-
File search order:
|
|
344
|
-
1) 'device_queue_profiling_<device_id>.txt' and then
|
|
345
|
-
2) 'dataset_iterator_profiling_<device_id>.txt'.
|
|
346
|
-
|
|
347
|
-
Args:
|
|
348
|
-
source_dir (str): The source directory for MindData profiling files.
|
|
349
|
-
|
|
350
|
-
Returns:
|
|
351
|
-
str, the MindData device trace profiling full path filename.
|
|
352
|
-
bool, flag which indicates if 'device_queue_profiling_<device_id>.txt' has been found or not
|
|
353
|
-
"""
|
|
354
|
-
# Initialize variable for MindData device trace profiling filename
|
|
355
|
-
device_trace_path_filename = ''
|
|
356
|
-
# Initialize flag that 'device_queue_profiling_<device_id>.txt' has not yet been found
|
|
357
|
-
device_queue_file_found = False
|
|
358
|
-
|
|
359
|
-
txt_names = [os.path.join(source_dir, txt_name.format(self._device_id))
|
|
360
|
-
for txt_name in ('device_queue_profiling_{}.txt', 'dataset_iterator_profiling_{}.txt')]
|
|
361
|
-
|
|
362
|
-
# Search for a device trace profiling file
|
|
363
|
-
if os.path.exists(txt_names[0]):
|
|
364
|
-
device_trace_path_filename = txt_names[0]
|
|
365
|
-
device_queue_file_found = True
|
|
366
|
-
elif os.path.exists(txt_names[1]):
|
|
367
|
-
device_trace_path_filename = txt_names[1]
|
|
368
|
-
else:
|
|
369
|
-
logger.warning('A MindData device trace profiling file <%s> nor <%s> cannot be found.',
|
|
370
|
-
txt_names[0], txt_names[1])
|
|
371
|
-
raise ProfilerPathErrorException('A MindData device trace profiling file cannot be found.')
|
|
372
|
-
|
|
373
|
-
if not os.path.isfile(device_trace_path_filename):
|
|
374
|
-
logger.warning('The MindData device trace profiling file <%s> is not found.', device_trace_path_filename)
|
|
375
|
-
raise ProfilerFileNotFoundException(device_trace_path_filename)
|
|
376
|
-
|
|
377
|
-
return device_trace_path_filename, device_queue_file_found
|
|
378
|
-
|
|
379
|
-
def _get_save_path(self, output_path):
|
|
380
|
-
"""
|
|
381
|
-
Get the full pathname for the output file to save MindData pipeline summary analyzed information.
|
|
382
|
-
The output filename is 'minddata_pipeline_summary_<device_id>.json'.
|
|
383
|
-
|
|
384
|
-
Args:
|
|
385
|
-
output_path (str): The output directory.
|
|
386
|
-
|
|
387
|
-
Returns:
|
|
388
|
-
str, the save path.
|
|
389
|
-
"""
|
|
390
|
-
try:
|
|
391
|
-
output_dir = validate_and_normalize_path(output_path)
|
|
392
|
-
except RuntimeError as path_error:
|
|
393
|
-
logger.warning('Output path <%s> is invalid.', output_path)
|
|
394
|
-
raise ProfilerPathErrorException('Output path is invalid.') from path_error
|
|
395
|
-
|
|
396
|
-
if not os.path.isdir(output_dir):
|
|
397
|
-
logger.warning('The output directory <%s> not found.', output_dir)
|
|
398
|
-
raise ProfilerDirNotFoundException(output_dir)
|
|
399
|
-
|
|
400
|
-
summary_templatename = 'minddata_pipeline_summary_{}.json'
|
|
401
|
-
return os.path.join(output_dir, summary_templatename.format(self._device_id))
|
|
402
|
-
|
|
403
|
-
def _parse_pipeline_info(self, pipeline_info):
|
|
404
|
-
"""
|
|
405
|
-
Parse and process the pipeline profiling information.
|
|
406
|
-
|
|
407
|
-
Args:
|
|
408
|
-
pipeline_info (dict): The pipeline profiling information.
|
|
409
|
-
|
|
410
|
-
Returns:
|
|
411
|
-
Dictionary with analyzed summary output information
|
|
412
|
-
For the following key-value pairs, each value is a list ordered by increasing op id
|
|
413
|
-
pipeline_ops: operator name and operator id, a string, with example format Batch(id=0)
|
|
414
|
-
op_names: operator name, a string
|
|
415
|
-
op_ids: operator id, an integer
|
|
416
|
-
num_workers: number of workers for the op, an integer
|
|
417
|
-
queue_average_size: average queue size for the op, a float
|
|
418
|
-
queue_utilization_pct: average percentage of time queue is used for op, a float from 0.00 to 1.00
|
|
419
|
-
queue_empty_freq_pct: percentage of time queue is empty for op, a float from 0.00 to 1.00
|
|
420
|
-
children_ids: children op ids of op; list if empty [] if op has no children
|
|
421
|
-
parent_id: parent id of op
|
|
422
|
-
|
|
423
|
-
Raises:
|
|
424
|
-
ProfilerRawFileException: If the format of the input is wrong.
|
|
425
|
-
"""
|
|
426
|
-
# Perform sanity checks for pipeline information
|
|
427
|
-
pipeline_op_info = pipeline_info.get('op_info')
|
|
428
|
-
for item in pipeline_op_info:
|
|
429
|
-
if not item:
|
|
430
|
-
raise ProfilerRawFileException('The contents of MindData pipeline JSON file is wrong.')
|
|
431
|
-
|
|
432
|
-
# Parse and process pipeline information
|
|
433
|
-
# Obtain the following for each op (and build a list), ordered by increasing op id
|
|
434
|
-
# - op id (handy for user output)
|
|
435
|
-
# - op name (needed for basic processing)
|
|
436
|
-
# - op name with op id (handy for user output)
|
|
437
|
-
# - num_workers
|
|
438
|
-
# - various queue information
|
|
439
|
-
# - children op ids
|
|
440
|
-
# - parent op id
|
|
441
|
-
dict_opid_pipeline_ops = {}
|
|
442
|
-
dict_opid_opname = {}
|
|
443
|
-
dict_opid_numworkers = {}
|
|
444
|
-
dict_opid_queue_info = {}
|
|
445
|
-
dict_opid_children_ids = {}
|
|
446
|
-
dict_opid_parent_id = {}
|
|
447
|
-
# Note: Will process the input pipeline ops in "reversed" order since typically they are ordered
|
|
448
|
-
# from largest op id (usually leaf/source op) to smallest op id (usually root).
|
|
449
|
-
# However, since there may be non-linear pipelines, the processed op info needs to be sorted
|
|
450
|
-
# before final output is produced and saved.
|
|
451
|
-
for op_info in reversed(pipeline_info['op_info']):
|
|
452
|
-
op_id = op_info.get('op_id')
|
|
453
|
-
op_name = op_info.get('op_type')[0:-2]
|
|
454
|
-
dict_opid_pipeline_ops[op_id] = '{}(id={})'.format(op_name, str(op_id))
|
|
455
|
-
dict_opid_opname[op_id] = op_name
|
|
456
|
-
dict_opid_numworkers[op_id] = op_info.get('num_workers')
|
|
457
|
-
|
|
458
|
-
# Obtain the output queue metrics information for the current op
|
|
459
|
-
dict_opid_queue_info[op_id] = self._parse_pipeline_metrics_info(op_info.get('metrics'))
|
|
460
|
-
|
|
461
|
-
# For current op, initialize parent_id=-1, in case after processing all children in pipeline,
|
|
462
|
-
# it is determined that current op has no parent
|
|
463
|
-
if dict_opid_parent_id.get(op_id) is None:
|
|
464
|
-
dict_opid_parent_id[op_id] = -1
|
|
465
|
-
|
|
466
|
-
children_ids = op_info.get('children')
|
|
467
|
-
if children_ids:
|
|
468
|
-
# Set children op ids for current op
|
|
469
|
-
dict_opid_children_ids[op_id] = children_ids
|
|
470
|
-
# For each child op, set parent op to be current op
|
|
471
|
-
for child_op_id in children_ids:
|
|
472
|
-
dict_opid_parent_id[child_op_id] = op_id
|
|
473
|
-
else:
|
|
474
|
-
dict_opid_children_ids[op_id] = []
|
|
475
|
-
|
|
476
|
-
# Build resultant dictionary
|
|
477
|
-
return_dict = {}
|
|
478
|
-
|
|
479
|
-
return_dict['pipeline_ops'] = [x[1] for x in sorted(dict_opid_pipeline_ops.items())]
|
|
480
|
-
return_dict['op_names'] = [x[1] for x in sorted(dict_opid_opname.items())]
|
|
481
|
-
return_dict['op_ids'] = sorted(dict_opid_opname.keys())
|
|
482
|
-
return_dict['num_workers'] = [x[1] for x in sorted(dict_opid_numworkers.items())]
|
|
483
|
-
|
|
484
|
-
queue_info_items = [x[1] for x in sorted(dict_opid_queue_info.items())]
|
|
485
|
-
return_dict['queue_average_size'] = [y[2] for y in queue_info_items]
|
|
486
|
-
return_dict['queue_utilization_pct'] = [y[3] for y in queue_info_items]
|
|
487
|
-
return_dict['queue_empty_freq_pct'] = [y[4] for y in queue_info_items]
|
|
488
|
-
|
|
489
|
-
return_dict['children_ids'] = [x[1] for x in sorted(dict_opid_children_ids.items())]
|
|
490
|
-
return_dict['parent_id'] = [x[1] for x in sorted(dict_opid_parent_id.items())]
|
|
491
|
-
|
|
492
|
-
return return_dict
|
|
493
|
-
|
|
494
|
-
def _parse_device_trace_info(self, device_trace_info):
|
|
495
|
-
"""
|
|
496
|
-
Parse and process the device trace profiling information.
|
|
497
|
-
|
|
498
|
-
Args:
|
|
499
|
-
device_trace_info: The device trace profiling information in text format, one line per record.
|
|
500
|
-
|
|
501
|
-
Returns:
|
|
502
|
-
Dictionary with analyzed summary output information
|
|
503
|
-
Dictionary consists of:
|
|
504
|
-
per_batch_time: Average per batch time for pipeline in milliseconds
|
|
505
|
-
per_pipeline_time: Average per pipeline time in milliseconds
|
|
506
|
-
per_push_queue_time: Average per queue push time in milliseconds
|
|
507
|
-
"""
|
|
508
|
-
# Information on the format of the device tracing profiling information.
|
|
509
|
-
# Format is: type extra-info batch-num value timestamp
|
|
510
|
-
# 0) type: 0: time, 1: connector size
|
|
511
|
-
# 1) extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time
|
|
512
|
-
# if type is 1 - connector capacity
|
|
513
|
-
# 2) batch-num: batch number
|
|
514
|
-
# 3) value: if type is 0 - value is time(ms)
|
|
515
|
-
# if type is 1 - value is connector size
|
|
516
|
-
# 4) timestamp
|
|
517
|
-
# Examples:
|
|
518
|
-
# 0 0 20 10 xxx - The 20th batch took 10ms to get data from pipeline.
|
|
519
|
-
# 1 64 20 5 yyy - Connector size is 5 when get the 20th batch.Connector capacity is 64.
|
|
520
|
-
|
|
521
|
-
prev_time = 0
|
|
522
|
-
q_time = [[], [], []] # pipeline time, push TDT time, batch time
|
|
523
|
-
|
|
524
|
-
# Parse each record
|
|
525
|
-
for line_data in device_trace_info:
|
|
526
|
-
record = [int(d) for d in line_data.split(" ")][0:5]
|
|
527
|
-
if record[2] < 2: # skip 1st batch
|
|
528
|
-
prev_time = record[4]
|
|
529
|
-
continue
|
|
530
|
-
|
|
531
|
-
if record[0] == 0: # type 0: time record
|
|
532
|
-
q_time[record[1]].append(record[3])
|
|
533
|
-
elif record[0] == 1: # type 1: connector size record
|
|
534
|
-
# Check if dataset_iterator trace profiling file was found
|
|
535
|
-
if not self._device_queue_file_found:
|
|
536
|
-
q_time[2].append(record[4] - prev_time)
|
|
537
|
-
prev_time = record[4]
|
|
538
|
-
|
|
539
|
-
# Compute average queue times
|
|
540
|
-
avg_pipeline_time = sum(q_time[0]) / len(q_time[0]) if q_time[0] else -1
|
|
541
|
-
avg_push_queue_time = sum(q_time[1]) / len(q_time[1]) if q_time[1] else -1
|
|
542
|
-
avg_batch_time = sum(q_time[2]) / len(q_time[2]) if q_time[2] else -1
|
|
543
|
-
|
|
544
|
-
return_dict = {}
|
|
545
|
-
return_dict['per_batch_time'] = [round(avg_batch_time, 3)]
|
|
546
|
-
return_dict['per_pipeline_time'] = [round(avg_pipeline_time, 3)]
|
|
547
|
-
return_dict['per_push_queue_time'] = [round(avg_push_queue_time, 3)]
|
|
548
|
-
|
|
549
|
-
return return_dict
|
|
550
|
-
|
|
551
|
-
def _save_as_csv_file(self, data_dict):
|
|
552
|
-
"""
|
|
553
|
-
Save data dictionary information to CSV file.
|
|
554
|
-
|
|
555
|
-
Args:
|
|
556
|
-
data_dict (dict): Input data dictionary information.
|
|
557
|
-
|
|
558
|
-
Returns:
|
|
559
|
-
Data dictionary information is saved to CSV file named 'minddata_pipeline_summary_<device_id>.csv'.
|
|
560
|
-
"""
|
|
561
|
-
|
|
562
|
-
summary_templatename = 'minddata_pipeline_summary_{}.csv'
|
|
563
|
-
output_csv_path_filename = os.path.join(self._output_path, summary_templatename.format(self._device_id))
|
|
564
|
-
|
|
565
|
-
# Open file for writing
|
|
566
|
-
with os.fdopen(os.open(output_csv_path_filename, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
567
|
-
'w') as data_file:
|
|
568
|
-
|
|
569
|
-
# Create CSV writer object
|
|
570
|
-
csv_writer = csv.writer(data_file)
|
|
571
|
-
|
|
572
|
-
# Write the dictionary information to CSV file
|
|
573
|
-
# Create deepcopy of input data_dict so zip processing in this function does NOT change the data_dict
|
|
574
|
-
temp_dict = copy.deepcopy(data_dict)
|
|
575
|
-
for data_key, data_value in zip(temp_dict.keys(), temp_dict.values()):
|
|
576
|
-
# Begin/prefix the data value with the data key
|
|
577
|
-
data_value.insert(0, data_key)
|
|
578
|
-
csv_writer.writerow(data_value)
|
|
579
|
-
|
|
580
|
-
# Update file permissions
|
|
581
|
-
os.chmod(output_csv_path_filename, stat.S_IREAD | stat.S_IWRITE)
|
|
582
|
-
|
|
583
|
-
def _analyze_and_save(self, pipeline_info, cpu_util_info, device_trace_info):
|
|
584
|
-
"""
|
|
585
|
-
Analyze and save the MindData summary information to file.
|
|
586
|
-
|
|
587
|
-
Args:
|
|
588
|
-
pipeline_info (dict): The pipeline information read from the input JSON file.
|
|
589
|
-
cpu_util_info (dict): The CPU utilization information read from the input JSON file.
|
|
590
|
-
device_trace_info (text): The dataset iterator (CPU) or device queue (GPU, Ascend) trace profiling
|
|
591
|
-
text file. Value is None if such file could not be identified.
|
|
592
|
-
|
|
593
|
-
Returns:
|
|
594
|
-
summary_dict (dict): Analyzed summary information.
|
|
595
|
-
The summary dictionary information is doubly saved to a JSON file and a CSV file
|
|
596
|
-
(so that these different formats are available to the users).
|
|
597
|
-
"""
|
|
598
|
-
|
|
599
|
-
# Initialize summary output dictionary
|
|
600
|
-
summary_dict = {}
|
|
601
|
-
|
|
602
|
-
# Parse and process pipeline information
|
|
603
|
-
summary_dict.update(self._parse_pipeline_info(pipeline_info))
|
|
604
|
-
|
|
605
|
-
# Parse and process CPU utilization information
|
|
606
|
-
summary_dict.update(self._parse_cpu_util_info(cpu_util_info))
|
|
607
|
-
|
|
608
|
-
if device_trace_info is not None:
|
|
609
|
-
# Parse and process device queue or dataset iterator trace profiling information
|
|
610
|
-
summary_dict.update(self._parse_device_trace_info(device_trace_info))
|
|
611
|
-
|
|
612
|
-
# Check if both pipeline data and CPU utilization data have the same number of ops
|
|
613
|
-
num_pipeline_ops = len(summary_dict.get('pipeline_ops'))
|
|
614
|
-
num_cpu_util_ops = len(summary_dict.get('avg_cpu_pct'))
|
|
615
|
-
if num_pipeline_ops == num_cpu_util_ops:
|
|
616
|
-
# Compute composite analysis information
|
|
617
|
-
summary_dict.update(self._compute_composite_info(summary_dict))
|
|
618
|
-
|
|
619
|
-
# Analyze pipeline info for potential bottleneck op
|
|
620
|
-
bottleneck_dict = self._analyze_for_bottleneck_op(summary_dict)
|
|
621
|
-
if bottleneck_dict:
|
|
622
|
-
summary_dict.update(bottleneck_dict)
|
|
623
|
-
|
|
624
|
-
else:
|
|
625
|
-
# Produce a warning since the pipeline data and the CPU utilization data do not include information
|
|
626
|
-
# for the same number of ops
|
|
627
|
-
warning_msg = 'Number of ops for pipeline data: ' + str(num_pipeline_ops) + \
|
|
628
|
-
' does not match number of ops for CPU utilization data: ' + str(num_cpu_util_ops)
|
|
629
|
-
logger.warning(warning_msg)
|
|
630
|
-
|
|
631
|
-
# Save summary output dictionary to JSON output file (format#1)
|
|
632
|
-
with os.fdopen(os.open(self._save_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as save_file:
|
|
633
|
-
json.dump(summary_dict, save_file, indent=self.indent)
|
|
634
|
-
|
|
635
|
-
os.chmod(self._save_path, stat.S_IREAD | stat.S_IWRITE)
|
|
636
|
-
|
|
637
|
-
# Save summary output to CSV file (format#2)
|
|
638
|
-
self._save_as_csv_file(summary_dict)
|
|
639
|
-
# Return summary output dictionary (format#3)
|
|
640
|
-
return summary_dict
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
class BottleneckAnalyzer:
|
|
644
|
-
""" analyzer for bottleneck """
|
|
645
|
-
|
|
646
|
-
# These are the threshold values used in the pipeline bottleneck analyzer algorithm
|
|
647
|
-
_AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM = 75.0
|
|
648
|
-
_AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM = 20.0
|
|
649
|
-
_LEAF_OUTPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM = 50
|
|
650
|
-
_DEVICEQUEUE_INPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM = 60
|
|
651
|
-
_IN_OUT_QUEUE_UTIL_PCT_DIFF_MAXIMUM = 50
|
|
652
|
-
_IN_QUEUE_UTIL_PCT_MAXIMUM = 10
|
|
653
|
-
|
|
654
|
-
def __init__(self, summary_dict):
|
|
655
|
-
""" constructor for BottleneckAnalyzer """
|
|
656
|
-
self.pipeline_ops = summary_dict["pipeline_ops"]
|
|
657
|
-
self.op_names = summary_dict["op_names"]
|
|
658
|
-
self.op_ids = summary_dict["op_ids"]
|
|
659
|
-
self.num_workers = summary_dict["num_workers"]
|
|
660
|
-
self.queue_average_size = summary_dict["queue_average_size"]
|
|
661
|
-
self.queue_utilization_pct = summary_dict["queue_utilization_pct"]
|
|
662
|
-
self.queue_empty_freq_pct = summary_dict["queue_empty_freq_pct"]
|
|
663
|
-
self.children_ids = summary_dict["children_ids"]
|
|
664
|
-
self.parent_id = summary_dict["parent_id"]
|
|
665
|
-
self.avg_cpu_pct = summary_dict["avg_cpu_pct"]
|
|
666
|
-
self.avg_cpu_pct_per_worker = summary_dict["avg_cpu_pct_per_worker"]
|
|
667
|
-
|
|
668
|
-
self.op_id_not_exist = -1
|
|
669
|
-
self.queue_usage_not_exist = -1
|
|
670
|
-
self.non_multithreaded_ops = set(["Barrier",
|
|
671
|
-
"Concat",
|
|
672
|
-
"EpochCtrl",
|
|
673
|
-
"Rename",
|
|
674
|
-
"Repeat",
|
|
675
|
-
"Shuffle",
|
|
676
|
-
"Skip",
|
|
677
|
-
"Take",
|
|
678
|
-
"Zip"])
|
|
679
|
-
|
|
680
|
-
def analyze(self):
|
|
681
|
-
""" analyze all op's usage """
|
|
682
|
-
detailed_analysis = {}
|
|
683
|
-
|
|
684
|
-
cpu_analysis = self.analyze_cpu_usage()
|
|
685
|
-
queue_analysis = self.analyze_queue_usage()
|
|
686
|
-
|
|
687
|
-
if cpu_analysis:
|
|
688
|
-
detailed_analysis["cpu_analysis_details"] = cpu_analysis
|
|
689
|
-
|
|
690
|
-
if queue_analysis:
|
|
691
|
-
detailed_analysis["queue_analysis_details"] = queue_analysis
|
|
692
|
-
|
|
693
|
-
bottleneck, suggestion = self.analyze_bottleneck()
|
|
694
|
-
|
|
695
|
-
if bottleneck[0]:
|
|
696
|
-
detailed_analysis["bottleneck_warning"] = bottleneck
|
|
697
|
-
detailed_analysis["bottleneck_suggestion"] = suggestion
|
|
698
|
-
|
|
699
|
-
return detailed_analysis
|
|
700
|
-
|
|
701
|
-
def analyze_cpu_usage(self):
|
|
702
|
-
""" analyze cpu usage of each op """
|
|
703
|
-
cpu_usage_analysis = []
|
|
704
|
-
for op_id in self.op_ids:
|
|
705
|
-
if op_id == self.op_id_not_exist or self.op_names[op_id] in self.non_multithreaded_ops:
|
|
706
|
-
continue
|
|
707
|
-
|
|
708
|
-
if self.avg_cpu_pct_per_worker[op_id] > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM and \
|
|
709
|
-
self.op_names[op_id]:
|
|
710
|
-
cpu_usage_analysis.append(
|
|
711
|
-
("{} is using {}% CPU per worker."
|
|
712
|
-
" Setting num_parallel_workers"
|
|
713
|
-
">{} might bring extra performance.").format(self.pipeline_ops[op_id],
|
|
714
|
-
self.avg_cpu_pct_per_worker[op_id],
|
|
715
|
-
self.num_workers[op_id]))
|
|
716
|
-
elif self.avg_cpu_pct_per_worker[op_id] < self._AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM and \
|
|
717
|
-
self.num_workers[op_id] > 1:
|
|
718
|
-
cpu_usage_analysis.append(
|
|
719
|
-
("{} is using {}% CPU per worker. Using num_parallel_workers={} might not bring as much benefit"
|
|
720
|
-
" due to low CPU usage per worker.").format(self.pipeline_ops[op_id],
|
|
721
|
-
self.avg_cpu_pct_per_worker[op_id],
|
|
722
|
-
self.num_workers[op_id]))
|
|
723
|
-
return cpu_usage_analysis
|
|
724
|
-
|
|
725
|
-
def analyze_queue_usage(self):
|
|
726
|
-
""" analyze queue usage of each op """
|
|
727
|
-
queue_usage_analysis = []
|
|
728
|
-
for op_id in self.op_ids:
|
|
729
|
-
if op_id == self.op_id_not_exist or self.op_names[op_id] in self.non_multithreaded_ops:
|
|
730
|
-
continue
|
|
731
|
-
|
|
732
|
-
if self.op_names[op_id] == "Batch":
|
|
733
|
-
continue
|
|
734
|
-
in_op_id, out_q = self.__get_non_inline_child_recur(
|
|
735
|
-
op_id), self.queue_utilization_pct[op_id]
|
|
736
|
-
if in_op_id == self.op_id_not_exist and out_q != self.queue_usage_not_exist:
|
|
737
|
-
# This is a leaf node since input queue does not exist and output queue exists
|
|
738
|
-
if out_q < self._LEAF_OUTPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM:
|
|
739
|
-
queue_usage_analysis.append(("Leaf op {} is using {}% of its output queue."
|
|
740
|
-
"Setting num_parallel_workers"
|
|
741
|
-
">{} might speed up I/O.").format(self.pipeline_ops[op_id],
|
|
742
|
-
out_q,
|
|
743
|
-
self.num_workers[op_id]))
|
|
744
|
-
elif self.op_names[op_id] == "DeviceQueue" and in_op_id != self.op_id_not_exist:
|
|
745
|
-
# if this is device_queue op,
|
|
746
|
-
if self.queue_empty_freq_pct[in_op_id] > self._DEVICEQUEUE_INPUT_QUEUE_EMPTY_FREQ_PCT_MAXIMUM:
|
|
747
|
-
queue_usage_analysis.append(
|
|
748
|
-
f"{self.pipeline_ops[op_id]}'s input queue is empty {self.queue_empty_freq_pct[in_op_id]}% "
|
|
749
|
-
f"of the time. This might indicate dataset bottlenecks. Hence host cannot keep up with "
|
|
750
|
-
f"the device {self.queue_empty_freq_pct[in_op_id]}% of the time. "
|
|
751
|
-
f"Device waits whenever input queue is empty.")
|
|
752
|
-
elif in_op_id != self.op_id_not_exist and out_q != self.queue_usage_not_exist:
|
|
753
|
-
in_q = self.queue_utilization_pct[in_op_id]
|
|
754
|
-
if in_q != self.queue_usage_not_exist and in_q - out_q > self._IN_OUT_QUEUE_UTIL_PCT_DIFF_MAXIMUM:
|
|
755
|
-
queue_usage_analysis.append(
|
|
756
|
-
f"{self.pipeline_ops[op_id]}'s input queue usage={in_q}% is greater output queue "
|
|
757
|
-
f"usage={out_q}%. This indicates child op {self.pipeline_ops[in_op_id]} "
|
|
758
|
-
f"might be producing faster than its parent {self.pipeline_ops[op_id]} can consume. "
|
|
759
|
-
f"If this op has low CPU utilization, try increasing "
|
|
760
|
-
f"prefetch_size or increasing num_workers.")
|
|
761
|
-
return queue_usage_analysis
|
|
762
|
-
|
|
763
|
-
def analyze_bottleneck(self):
|
|
764
|
-
""" analyze bottleneck by using both cpu and queue usage """
|
|
765
|
-
bottleneck, suggestion = "", ""
|
|
766
|
-
for op_id in reversed(self.op_ids):
|
|
767
|
-
in_op_id, out_q = self.__get_non_inline_child_recur(
|
|
768
|
-
op_id), self.queue_utilization_pct[op_id]
|
|
769
|
-
wkr_cpu = self.avg_cpu_pct_per_worker[op_id]
|
|
770
|
-
if op_id == self.op_id_not_exist or \
|
|
771
|
-
self.op_names[op_id] in self.non_multithreaded_ops \
|
|
772
|
-
or self.op_names[op_id] == "DeviceQueue":
|
|
773
|
-
continue
|
|
774
|
-
|
|
775
|
-
if wkr_cpu > self._AVG_CPU_UTIL_PCT_PER_WORKER_MAXIMUM:
|
|
776
|
-
bottleneck = self.pipeline_ops[op_id]
|
|
777
|
-
suggestion = "{} has high CPU utilization per worker of {}%".format(
|
|
778
|
-
self.pipeline_ops[op_id], wkr_cpu)
|
|
779
|
-
suggestion += " Try increasing num_parallel_workers above {}.".format(self.num_workers[op_id])
|
|
780
|
-
elif wkr_cpu < self._AVG_CPU_UTIL_PCT_PER_WORKER_MINIMUM:
|
|
781
|
-
in_op_id = self.__get_non_inline_child_recur(op_id)
|
|
782
|
-
in_q_usage = self.queue_utilization_pct[in_op_id]
|
|
783
|
-
if in_op_id != self.op_id_not_exist and (
|
|
784
|
-
in_q_usage < self._IN_QUEUE_UTIL_PCT_MAXIMUM or out_q -
|
|
785
|
-
in_q_usage > self._IN_OUT_QUEUE_UTIL_PCT_DIFF_MAXIMUM):
|
|
786
|
-
bottleneck = self.pipeline_ops[op_id]
|
|
787
|
-
suggestion = "{} has low CPU utilization per worker of {}%".format(
|
|
788
|
-
self.pipeline_ops[op_id], wkr_cpu)
|
|
789
|
-
suggestion += " and abnormal queue usage. Try increasing prefetch_size."
|
|
790
|
-
|
|
791
|
-
return [bottleneck], [suggestion]
|
|
792
|
-
|
|
793
|
-
def __get_non_inline_child_recur(self, cur_op_id):
|
|
794
|
-
"""get the child id of cur op which isn't an inline op"""
|
|
795
|
-
if cur_op_id == self.op_id_not_exist or not self.children_ids[cur_op_id]:
|
|
796
|
-
return self.op_id_not_exist
|
|
797
|
-
cur_child_id = self.children_ids[cur_op_id][0]
|
|
798
|
-
if self.queue_average_size[cur_child_id] != -1:
|
|
799
|
-
return cur_child_id
|
|
800
|
-
return self.__get_non_inline_child_recur(cur_child_id)
|