mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +64 -83
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +47 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +177 -52
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +338 -208
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +84 -133
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +47 -38
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +69 -23
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +425 -19
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +125 -101
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +488 -620
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +2 -4
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
- mindspore/ops/auto_generate/gen_extend_func.py +5 -55
- mindspore/ops/auto_generate/gen_ops_def.py +753 -273
- mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +17 -100
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +65 -399
- mindspore/ops/function/nn_func.py +44 -61
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +486 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +4 -50
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +244 -175
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +6 -7
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +52 -11
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +9 -17
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +33 -12
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +5 -1
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +7 -6
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +9 -23
- mindspore/parallel/transform_safetensors.py +468 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +22 -30
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +9 -5
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +72 -18
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +49 -47
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -15,36 +15,25 @@
|
|
|
15
15
|
"""Dynamic Profile Monitor"""
|
|
16
16
|
import os
|
|
17
17
|
import sys
|
|
18
|
+
import json
|
|
18
19
|
import time
|
|
19
20
|
import stat
|
|
20
|
-
import json
|
|
21
21
|
import atexit
|
|
22
|
-
import struct
|
|
23
22
|
import random
|
|
24
23
|
import multiprocessing
|
|
25
24
|
|
|
26
25
|
from mindspore import log as logger
|
|
27
26
|
from mindspore.train import Callback
|
|
28
|
-
from mindspore.profiler import
|
|
29
|
-
from mindspore.
|
|
30
|
-
from mindspore.profiler.
|
|
31
|
-
from mindspore.profiler.
|
|
32
|
-
from mindspore.profiler.
|
|
33
|
-
from mindspore.profiler.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
AicoreMetrics,
|
|
37
|
-
ExportType,
|
|
38
|
-
)
|
|
27
|
+
from mindspore.profiler import tensorboard_trace_handler, schedule
|
|
28
|
+
from mindspore.profiler.profiler import Profile
|
|
29
|
+
from mindspore.profiler.experimental_config import _ExperimentalConfig
|
|
30
|
+
from mindspore.profiler.common.file_manager import FileManager
|
|
31
|
+
from mindspore.profiler.common.path_manager import PathManager
|
|
32
|
+
from mindspore.profiler.dynamic_profile.dynamic_profiler_config_context import DynamicProfilerConfigContext
|
|
33
|
+
from mindspore.profiler.dynamic_profile.dynamic_monitor_proxy import MsDynamicMonitorProxySingleton
|
|
34
|
+
from mindspore.profiler.dynamic_profile.dynamic_profiler_utils import DynamicProfilerUtils
|
|
39
35
|
from mindspore.profiler.common.util import no_exception_func
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def get_real_rank():
|
|
43
|
-
"""get rank id"""
|
|
44
|
-
try:
|
|
45
|
-
return get_rank()
|
|
46
|
-
except RuntimeError:
|
|
47
|
-
return int(os.getenv("RANK_ID", "0"))
|
|
36
|
+
from mindspore.profiler.profiler_interface import ProfilerInterface
|
|
48
37
|
|
|
49
38
|
|
|
50
39
|
def print_msg(msg):
|
|
@@ -52,210 +41,21 @@ def print_msg(msg):
|
|
|
52
41
|
print("[Dynamic Profiler] " + msg, flush=True)
|
|
53
42
|
|
|
54
43
|
|
|
55
|
-
class DynamicProfilerArgs:
|
|
56
|
-
"""
|
|
57
|
-
Data class for dynamic profile config.
|
|
58
|
-
"""
|
|
59
|
-
FMT = "i" * 7 + "?" * 6
|
|
60
|
-
SIZE = struct.calcsize(FMT)
|
|
61
|
-
|
|
62
|
-
def __init__(self,
|
|
63
|
-
start_step: int = -1,
|
|
64
|
-
stop_step: int = -1,
|
|
65
|
-
aic_metrics: int = -1,
|
|
66
|
-
profiler_level: int = 0,
|
|
67
|
-
analyse_mode: int = -1,
|
|
68
|
-
activities: int = 0,
|
|
69
|
-
export_type: int = 0,
|
|
70
|
-
profile_memory: bool = False,
|
|
71
|
-
mstx: bool = False,
|
|
72
|
-
parallel_strategy: bool = False,
|
|
73
|
-
with_stack: bool = False,
|
|
74
|
-
data_simplification: bool = True,
|
|
75
|
-
is_valid: bool = False,
|
|
76
|
-
**kwargs):
|
|
77
|
-
self._start_step = start_step
|
|
78
|
-
self._stop_step = stop_step
|
|
79
|
-
self._aic_metrics = aic_metrics
|
|
80
|
-
self._profiler_level = profiler_level
|
|
81
|
-
self._analyse_mode = analyse_mode
|
|
82
|
-
self._activities = activities
|
|
83
|
-
self._export_type = export_type
|
|
84
|
-
self._profile_memory = profile_memory
|
|
85
|
-
self._mstx = mstx
|
|
86
|
-
self._parallel_strategy = parallel_strategy
|
|
87
|
-
self._with_stack = with_stack
|
|
88
|
-
self._data_simplification = data_simplification
|
|
89
|
-
self._is_valid = is_valid
|
|
90
|
-
self._check_params_type()
|
|
91
|
-
|
|
92
|
-
def _check_params_type(self):
|
|
93
|
-
"""Check and enforce parameter types with lower complexity."""
|
|
94
|
-
# Define a parameter check rule. {Parameter name: (expected type, default value)}
|
|
95
|
-
param_rules = {
|
|
96
|
-
'_start_step': (int, -1),
|
|
97
|
-
'_stop_step': (int, -1),
|
|
98
|
-
'_aic_metrics': (int, -1),
|
|
99
|
-
'_profiler_level': (int, 0),
|
|
100
|
-
'_analyse_mode': (int, -1),
|
|
101
|
-
'_activities': (int, 0),
|
|
102
|
-
'_export_type': (int, 0),
|
|
103
|
-
'_profile_memory': (bool, False),
|
|
104
|
-
'_mstx': (bool, False),
|
|
105
|
-
'_parallel_strategy': (bool, False),
|
|
106
|
-
'_with_stack': (bool, False),
|
|
107
|
-
'_data_simplification': (bool, True),
|
|
108
|
-
'_is_valid': (bool, False)
|
|
109
|
-
}
|
|
110
|
-
|
|
111
|
-
def _is_valid_type(value, expected_type):
|
|
112
|
-
"""Helper method for type checking."""
|
|
113
|
-
if expected_type is int and isinstance(value, bool):
|
|
114
|
-
return False
|
|
115
|
-
return isinstance(value, expected_type)
|
|
116
|
-
|
|
117
|
-
for param, (expected_type, default) in param_rules.items():
|
|
118
|
-
value = getattr(self, param)
|
|
119
|
-
if not _is_valid_type(value, expected_type):
|
|
120
|
-
logger.warning(
|
|
121
|
-
f"{param[1:]} should be {expected_type.__name__} type, "
|
|
122
|
-
f"will be reset to {default}."
|
|
123
|
-
)
|
|
124
|
-
setattr(self, param, default)
|
|
125
|
-
|
|
126
|
-
@property
|
|
127
|
-
def start_step(self):
|
|
128
|
-
""" get start step value."""
|
|
129
|
-
return self._start_step
|
|
130
|
-
|
|
131
|
-
@property
|
|
132
|
-
def stop_step(self):
|
|
133
|
-
""" get stop step value."""
|
|
134
|
-
return self._stop_step
|
|
135
|
-
|
|
136
|
-
@property
|
|
137
|
-
def is_valid(self):
|
|
138
|
-
""" get json valid value."""
|
|
139
|
-
return self._is_valid
|
|
140
|
-
|
|
141
|
-
@is_valid.setter
|
|
142
|
-
def is_valid(self, value):
|
|
143
|
-
""" set json valid value."""
|
|
144
|
-
self._is_valid = value
|
|
145
|
-
|
|
146
|
-
@property
|
|
147
|
-
def analyse_mode(self):
|
|
148
|
-
""" get analyse mode value."""
|
|
149
|
-
return self._convert_analyse_mode(self._analyse_mode)
|
|
150
|
-
|
|
151
|
-
@property
|
|
152
|
-
def vars(self):
|
|
153
|
-
""" get all values in DynamicProfilerArgs."""
|
|
154
|
-
not_supported_args = ['_is_valid']
|
|
155
|
-
res = {}
|
|
156
|
-
for key, value in self.__dict__.items():
|
|
157
|
-
if key not in not_supported_args:
|
|
158
|
-
res[key.replace('_', '', 1)] = value
|
|
159
|
-
return res
|
|
160
|
-
|
|
161
|
-
@property
|
|
162
|
-
def args(self):
|
|
163
|
-
""" get all args in DynamicProfilerArgs."""
|
|
164
|
-
self._profiler_level = self._convert_profiler_level(self._profiler_level)
|
|
165
|
-
self._activities = self._convert_activities(self._activities)
|
|
166
|
-
self._aic_metrics = self._convert_aic_metrics(self._aic_metrics)
|
|
167
|
-
self._export_type = self._convert_export_type(self._export_type)
|
|
168
|
-
not_supported_args = ['_start_step', '_stop_step', '_analyse_mode', '_is_valid']
|
|
169
|
-
res = {}
|
|
170
|
-
for key, value in self.__dict__.items():
|
|
171
|
-
if key not in not_supported_args:
|
|
172
|
-
res[key.replace('_', '', 1)] = value
|
|
173
|
-
return res
|
|
174
|
-
|
|
175
|
-
@classmethod
|
|
176
|
-
def from_bytes(cls, byte_data):
|
|
177
|
-
""" unpack bytes to DynamicProfilerArgs."""
|
|
178
|
-
unpacked = struct.unpack(cls.FMT, byte_data)
|
|
179
|
-
return cls(*unpacked)
|
|
180
|
-
|
|
181
|
-
def to_bytes(self):
|
|
182
|
-
""" pack DynamicProfilerArgs to bytes."""
|
|
183
|
-
instance_vars = tuple(self.__dict__.values())
|
|
184
|
-
if len(instance_vars) != len(self.FMT):
|
|
185
|
-
raise ValueError("Number of variables does not match format string.")
|
|
186
|
-
return struct.pack(DynamicProfilerArgs.FMT, *instance_vars)
|
|
187
|
-
|
|
188
|
-
def _convert_analyse_mode(self, analyse_mode: int) -> str:
|
|
189
|
-
""" convert analyse_mode to real args in Profiler."""
|
|
190
|
-
if analyse_mode == 0:
|
|
191
|
-
return 'sync'
|
|
192
|
-
if analyse_mode == 1:
|
|
193
|
-
return 'async'
|
|
194
|
-
return None
|
|
195
|
-
|
|
196
|
-
def _convert_profiler_level(self, profiler_level: int) -> ProfilerLevel:
|
|
197
|
-
""" convert profiler_level to real args in Profiler."""
|
|
198
|
-
if profiler_level == -1:
|
|
199
|
-
return ProfilerLevel.LevelNone
|
|
200
|
-
if profiler_level == 0:
|
|
201
|
-
return ProfilerLevel.Level0
|
|
202
|
-
if profiler_level == 1:
|
|
203
|
-
return ProfilerLevel.Level1
|
|
204
|
-
if profiler_level == 2:
|
|
205
|
-
return ProfilerLevel.Level2
|
|
206
|
-
return ProfilerLevel.Level0
|
|
207
|
-
|
|
208
|
-
def _convert_activities(self, activities: int) -> ProfilerLevel:
|
|
209
|
-
""" convert activities to real args in Profiler."""
|
|
210
|
-
if activities == 0:
|
|
211
|
-
return [ProfilerActivity.CPU, ProfilerActivity.NPU]
|
|
212
|
-
if activities == 1:
|
|
213
|
-
return [ProfilerActivity.CPU]
|
|
214
|
-
if activities == 2:
|
|
215
|
-
return [ProfilerActivity.NPU]
|
|
216
|
-
return [ProfilerActivity.CPU, ProfilerActivity.NPU]
|
|
217
|
-
|
|
218
|
-
def _convert_aic_metrics(self, aic_metrics: int) -> AicoreMetrics:
|
|
219
|
-
""" convert aic_metrics to real args in Profiler."""
|
|
220
|
-
if aic_metrics == -1:
|
|
221
|
-
return AicoreMetrics.AiCoreNone
|
|
222
|
-
if aic_metrics == 0:
|
|
223
|
-
return AicoreMetrics.PipeUtilization
|
|
224
|
-
if aic_metrics == 1:
|
|
225
|
-
return AicoreMetrics.ArithmeticUtilization
|
|
226
|
-
if aic_metrics == 2:
|
|
227
|
-
return AicoreMetrics.Memory
|
|
228
|
-
if aic_metrics == 3:
|
|
229
|
-
return AicoreMetrics.MemoryL0
|
|
230
|
-
if aic_metrics == 4:
|
|
231
|
-
return AicoreMetrics.MemoryUB
|
|
232
|
-
if aic_metrics == 5:
|
|
233
|
-
return AicoreMetrics.ResourceConflictRatio
|
|
234
|
-
if aic_metrics == 6:
|
|
235
|
-
return AicoreMetrics.L2Cache
|
|
236
|
-
if aic_metrics == 7:
|
|
237
|
-
return AicoreMetrics.MemoryAccess
|
|
238
|
-
return AicoreMetrics.AiCoreNone
|
|
239
|
-
|
|
240
|
-
def _convert_export_type(self, export_type: int) -> ExportType:
|
|
241
|
-
""" convert export_type to real args in Profiler."""
|
|
242
|
-
if export_type == 0:
|
|
243
|
-
return [ExportType.Text]
|
|
244
|
-
if export_type == 1:
|
|
245
|
-
return [ExportType.Db]
|
|
246
|
-
if export_type == 2:
|
|
247
|
-
return [ExportType.Text, ExportType.Db]
|
|
248
|
-
return [ExportType.Text]
|
|
249
|
-
|
|
250
44
|
class DynamicProfilerMonitorBase(Callback):
|
|
251
45
|
"""
|
|
252
|
-
Dynamic
|
|
46
|
+
Dynamic profiler callback base class implementing the dynamic profiler functionality.
|
|
253
47
|
"""
|
|
254
48
|
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
self.
|
|
49
|
+
NPU_MONITOR_START = "NPU_MONITOR_START"
|
|
50
|
+
|
|
51
|
+
def __init__(self, cfg_path=None, output_path=None, poll_interval=2, **kwargs):
|
|
52
|
+
self._is_dyno = DynamicProfilerUtils.is_dyno_mode()
|
|
53
|
+
self._rank_id = DynamicProfilerUtils.get_real_rank()
|
|
54
|
+
if not self._is_dyno:
|
|
55
|
+
self._cfg_path = cfg_path
|
|
56
|
+
self._cfg_json_path = os.path.join(self._cfg_path, "profiler_config.json")
|
|
57
|
+
self._cfg_json_path = os.path.realpath(self._cfg_json_path)
|
|
58
|
+
self._init_cfg_json()
|
|
259
59
|
self._output_path = "dyn_profile_data" if output_path is None else output_path
|
|
260
60
|
self._poll_interval = poll_interval
|
|
261
61
|
if not isinstance(self._poll_interval, int):
|
|
@@ -268,7 +68,6 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
268
68
|
|
|
269
69
|
self._kwargs = kwargs
|
|
270
70
|
self._shm_name = time.strftime("DynamicProfileShm%Y%m%d%H", time.localtime())
|
|
271
|
-
self._rank_id = get_real_rank()
|
|
272
71
|
self._shared_loop_flag = multiprocessing.Value('b', True)
|
|
273
72
|
self._shm = None
|
|
274
73
|
self._process = None
|
|
@@ -282,26 +81,35 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
282
81
|
self._step_num = 0
|
|
283
82
|
|
|
284
83
|
self._check_shm_for_killed()
|
|
285
|
-
self._init_cfg_json()
|
|
286
84
|
self._create_shm()
|
|
287
85
|
self._create_process()
|
|
288
86
|
atexit.register(self._clean_resource)
|
|
87
|
+
if self._is_dyno:
|
|
88
|
+
atexit.register(self._finalize_dynolog)
|
|
289
89
|
|
|
290
90
|
@no_exception_func()
|
|
291
91
|
def step_begin(self, run_context):
|
|
292
92
|
"""
|
|
293
|
-
Start
|
|
93
|
+
Start profiler at the begin of step.
|
|
294
94
|
|
|
295
95
|
Args:
|
|
296
96
|
run_context (RunContext): Context of the train running.
|
|
297
97
|
"""
|
|
298
|
-
|
|
98
|
+
prof_json = self._get_prof_args()
|
|
99
|
+
if not prof_json:
|
|
100
|
+
return
|
|
101
|
+
if self._is_dyno:
|
|
102
|
+
# Dyno monitor process
|
|
103
|
+
if self.NPU_MONITOR_START in prof_json:
|
|
104
|
+
self._call_dyno_monitor(prof_json)
|
|
105
|
+
return
|
|
299
106
|
|
|
107
|
+
prof_args = DynamicProfilerConfigContext(prof_json)
|
|
300
108
|
if not prof_args.is_valid:
|
|
301
|
-
logger.error("Dynamic
|
|
109
|
+
logger.error("Dynamic profiler json is not valid, please check the json file.")
|
|
302
110
|
return
|
|
303
111
|
|
|
304
|
-
if prof_args.start_step
|
|
112
|
+
if prof_args.start_step in (-1, self._last_start_step):
|
|
305
113
|
return
|
|
306
114
|
|
|
307
115
|
cb_params = run_context.original_args()
|
|
@@ -311,16 +119,22 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
311
119
|
# Prevent repeated calls of the start function within a complete interval
|
|
312
120
|
if step_num == start_step:
|
|
313
121
|
if self._is_started:
|
|
314
|
-
logger.error("Dynamic
|
|
315
|
-
"please wait the first
|
|
122
|
+
logger.error("Dynamic profiler is already started at step %d, "
|
|
123
|
+
"please wait the first profiler finished at step %d.",
|
|
316
124
|
self._last_start_step, self._last_stop_step)
|
|
317
125
|
return
|
|
318
126
|
|
|
319
127
|
if self._profiler is None:
|
|
320
|
-
|
|
128
|
+
output_path = prof_args.prof_path if prof_args.prof_path != "./" else self._output_path
|
|
129
|
+
prof_path = os.path.join(
|
|
130
|
+
output_path,
|
|
131
|
+
f"rank{self._rank_id}_start{start_step}_stop{stop_step}"
|
|
132
|
+
)
|
|
321
133
|
PathManager.check_input_directory_path(prof_path)
|
|
322
|
-
|
|
323
|
-
|
|
134
|
+
profiler_config = self._get_prof_config(prof_args, prof_path, start_step, stop_step,
|
|
135
|
+
start_profile=False,
|
|
136
|
+
skip_first=0)
|
|
137
|
+
self._profiler = Profile(**profiler_config)
|
|
324
138
|
print_msg(f"Rank {self._rank_id} create output path {prof_path}")
|
|
325
139
|
|
|
326
140
|
self._profiler.start()
|
|
@@ -330,37 +144,82 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
330
144
|
print_msg(f"Rank {self._rank_id} Dynamic profiler start at step {start_step}, "
|
|
331
145
|
f"will stop at step {stop_step}")
|
|
332
146
|
|
|
147
|
+
@staticmethod
|
|
148
|
+
def _get_prof_config(prof_args, prof_path, start_step, stop_step, start_profile, skip_first):
|
|
149
|
+
"""
|
|
150
|
+
Get profiler config.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
prof_args: Profiler config.
|
|
154
|
+
prof_path: Profiler output path.
|
|
155
|
+
start_step: Start step.
|
|
156
|
+
stop_step: Stop step.
|
|
157
|
+
start_profile: enable start_profile.
|
|
158
|
+
skip_first: skip first step.
|
|
159
|
+
"""
|
|
160
|
+
profiler_config = {
|
|
161
|
+
"activities": prof_args.args.get("activities"),
|
|
162
|
+
"with_stack": prof_args.args.get("with_stack"),
|
|
163
|
+
"profile_memory": prof_args.args.get("profile_memory"),
|
|
164
|
+
"parallel_strategy": prof_args.args.get("parallel_strategy"),
|
|
165
|
+
"start_profile": start_profile,
|
|
166
|
+
"record_shapes": prof_args.args.get("record_shapes"),
|
|
167
|
+
"schedule": schedule(
|
|
168
|
+
wait=0,
|
|
169
|
+
warmup=0,
|
|
170
|
+
active=stop_step - start_step + 1,
|
|
171
|
+
repeat=1,
|
|
172
|
+
skip_first=skip_first
|
|
173
|
+
),
|
|
174
|
+
"on_trace_ready": tensorboard_trace_handler(
|
|
175
|
+
dir_name=prof_path,
|
|
176
|
+
analyse_flag=prof_args.analyse,
|
|
177
|
+
async_mode=prof_args.analyse_mode == "async",
|
|
178
|
+
),
|
|
179
|
+
"experimental_config": _ExperimentalConfig(
|
|
180
|
+
profiler_level=prof_args.args.get("profiler_level"),
|
|
181
|
+
aic_metrics=prof_args.args.get("aic_metrics"),
|
|
182
|
+
l2_cache=prof_args.args.get("l2_cache"),
|
|
183
|
+
mstx=prof_args.args.get("mstx"),
|
|
184
|
+
data_simplification=prof_args.args.get("data_simplification"),
|
|
185
|
+
export_type=prof_args.args.get("export_type"),
|
|
186
|
+
mstx_domain_include=prof_args.args.get("mstx_domain_include"),
|
|
187
|
+
mstx_domain_exclude=prof_args.args.get("mstx_domain_exclude"),
|
|
188
|
+
sys_io=prof_args.args.get("sys_io"),
|
|
189
|
+
sys_interconnection=prof_args.args.get("sys_interconnection"),
|
|
190
|
+
host_sys=prof_args.args.get("host_sys")
|
|
191
|
+
)
|
|
192
|
+
}
|
|
193
|
+
return profiler_config
|
|
194
|
+
|
|
333
195
|
@no_exception_func()
|
|
334
196
|
def step_end(self, run_context):
|
|
335
197
|
"""
|
|
336
|
-
Stop
|
|
198
|
+
Stop profiler at the end of step.
|
|
337
199
|
|
|
338
200
|
Args:
|
|
339
201
|
run_context (RunContext): Context of the train running.
|
|
340
202
|
"""
|
|
341
|
-
|
|
203
|
+
prof_json = self._get_prof_args()
|
|
204
|
+
prof_args = DynamicProfilerConfigContext(prof_json)
|
|
342
205
|
|
|
343
206
|
if not prof_args.is_valid:
|
|
344
|
-
logger.error("Dynamic
|
|
207
|
+
logger.error("Dynamic profiler json is not valid, please check the json file.")
|
|
345
208
|
return
|
|
346
209
|
|
|
347
210
|
if prof_args.stop_step == -1:
|
|
348
211
|
return
|
|
349
212
|
|
|
213
|
+
if self._profiler:
|
|
214
|
+
self._profiler.step()
|
|
215
|
+
|
|
350
216
|
cb_params = run_context.original_args()
|
|
351
217
|
step_num = cb_params.cur_step_num
|
|
352
218
|
|
|
353
219
|
if step_num == self._last_stop_step and self._is_started:
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
self._profiler.analyse(mode=prof_args.analyse_mode)
|
|
358
|
-
else:
|
|
359
|
-
ProfilerInterface.finalize()
|
|
360
|
-
ProfilerInterface.clear()
|
|
361
|
-
self._profiler = None
|
|
362
|
-
self._is_started = False
|
|
363
|
-
print_msg(f"Rank {self._rank_id} Dynamic profiler stop at step {step_num}")
|
|
220
|
+
self._profiler = None
|
|
221
|
+
self._is_started = False
|
|
222
|
+
print_msg(f"Rank {self._rank_id} Dynamic profiler stop at step {step_num}")
|
|
364
223
|
|
|
365
224
|
@no_exception_func()
|
|
366
225
|
def step(self):
|
|
@@ -415,19 +274,28 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
415
274
|
... context.set_context(mode=mindspore.PYNATIVE_MODE)
|
|
416
275
|
... mindspore.set_device("Ascend")
|
|
417
276
|
... data_cfg = {
|
|
418
|
-
...
|
|
419
|
-
...
|
|
420
|
-
...
|
|
421
|
-
...
|
|
422
|
-
...
|
|
423
|
-
...
|
|
424
|
-
...
|
|
425
|
-
...
|
|
426
|
-
...
|
|
427
|
-
...
|
|
428
|
-
...
|
|
429
|
-
...
|
|
430
|
-
...
|
|
277
|
+
... "start_step": 2,
|
|
278
|
+
... "stop_step": 5,
|
|
279
|
+
... "aic_metrics": "AiCoreNone",
|
|
280
|
+
... "profiler_level": "Level0",
|
|
281
|
+
... "analyse_mode": 0,
|
|
282
|
+
... "activities": ["CPU", "NPU"],
|
|
283
|
+
... "export_type": ["text"],
|
|
284
|
+
... "profile_memory": False,
|
|
285
|
+
... "mstx": False,
|
|
286
|
+
... "parallel_strategy": False,
|
|
287
|
+
... "with_stack": False,
|
|
288
|
+
... "data_simplification": True,
|
|
289
|
+
... "l2_cache": False,
|
|
290
|
+
... "analyse": True,
|
|
291
|
+
... "record_shape": False,
|
|
292
|
+
... "prof_path": "./data",
|
|
293
|
+
... "mstx_domain_include": [],
|
|
294
|
+
... "mstx_domain_exclude": [],
|
|
295
|
+
... "host_sys": [],
|
|
296
|
+
... "sys_io": False,
|
|
297
|
+
... "sys_interconnection": False
|
|
298
|
+
... }
|
|
431
299
|
... output_path = "./cfg_path"
|
|
432
300
|
... cfg_path = os.path.join(output_path, "profiler_config.json")
|
|
433
301
|
... os.makedirs(output_path, exist_ok=True)
|
|
@@ -442,7 +310,8 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
442
310
|
... for i in range(STEP_NUM):
|
|
443
311
|
... print(f"step {i}")
|
|
444
312
|
... train(net)
|
|
445
|
-
... # Modify the configuration file after step 7
|
|
313
|
+
... # Modify the configuration file after step 7
|
|
314
|
+
... # For example, change start_step to 8 and stop_step to 10
|
|
446
315
|
... if i == 5:
|
|
447
316
|
... # Modify parameters in the JSON file
|
|
448
317
|
... change_cfg_json(os.path.join(output_path, "profiler_config.json"))
|
|
@@ -451,43 +320,81 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
451
320
|
"""
|
|
452
321
|
|
|
453
322
|
self._step_num += 1
|
|
454
|
-
|
|
323
|
+
prof_json = self._get_prof_args()
|
|
324
|
+
if not prof_json:
|
|
325
|
+
return
|
|
326
|
+
if self._is_dyno:
|
|
327
|
+
# Dyno monitor process
|
|
328
|
+
if self.NPU_MONITOR_START in prof_json:
|
|
329
|
+
self._call_dyno_monitor(prof_json)
|
|
330
|
+
return
|
|
455
331
|
|
|
332
|
+
prof_args = DynamicProfilerConfigContext(prof_json)
|
|
456
333
|
if not prof_args.is_valid:
|
|
457
|
-
logger.error("Dynamic
|
|
334
|
+
logger.error("Dynamic profiler config is not valid, please check the json or dyno config.")
|
|
458
335
|
return
|
|
336
|
+
self._handle_profiler_setup(prof_args)
|
|
459
337
|
|
|
460
|
-
if
|
|
461
|
-
|
|
338
|
+
if self._profiler:
|
|
339
|
+
self._profiler.step()
|
|
462
340
|
|
|
463
|
-
|
|
464
|
-
|
|
341
|
+
def _handle_profiler_setup(self, args):
|
|
342
|
+
"""Common handler for profiler setup logic shared between dyno and non-dyno paths."""
|
|
343
|
+
start_step = args.start_step
|
|
344
|
+
stop_step = args.stop_step
|
|
345
|
+
|
|
346
|
+
if not self._is_valid_start_stop_step(self._step_num, start_step, stop_step):
|
|
465
347
|
return
|
|
466
348
|
|
|
467
|
-
if self._start_step !=
|
|
468
|
-
|
|
469
|
-
self.
|
|
470
|
-
|
|
471
|
-
if
|
|
472
|
-
prof_path = os.path.join(self._output_path,
|
|
473
|
-
f"rank{self._rank_id}_start{self._start_step}_stop{self._stop_step}")
|
|
474
|
-
print_msg(f"Rank {self._rank_id} create output path {prof_path}")
|
|
475
|
-
print_msg(f"Rank {self._rank_id} Dynamic profile start at step {self._start_step}, "
|
|
476
|
-
f"will stop at step {self._stop_step}")
|
|
477
|
-
self._profiler = Profiler(schedule=schedule(wait=0, warmup=0,
|
|
478
|
-
active=self._stop_step - self._start_step + 1,
|
|
479
|
-
repeat=1,
|
|
480
|
-
skip_first=1),
|
|
481
|
-
on_trace_ready=tensorboard_trace_handler(dir_name=prof_path),
|
|
482
|
-
**prof_args.args)
|
|
483
|
-
else:
|
|
349
|
+
if self._start_step != start_step or self._stop_step != stop_step:
|
|
350
|
+
self._start_step = start_step
|
|
351
|
+
self._stop_step = stop_step
|
|
352
|
+
|
|
353
|
+
if not (start_step >= 0 and 0 <= start_step <= stop_step):
|
|
484
354
|
self._profiler = None
|
|
485
|
-
logger.error(
|
|
486
|
-
|
|
487
|
-
|
|
355
|
+
logger.error(
|
|
356
|
+
"Rank %d Dynamic profiler start at step %d and stop at step %d must be "
|
|
357
|
+
"greater than or equal to 0, and stop step should not be less than start step",
|
|
358
|
+
self._rank_id, start_step, stop_step
|
|
359
|
+
)
|
|
360
|
+
return
|
|
488
361
|
|
|
489
|
-
|
|
490
|
-
self.
|
|
362
|
+
# Setup profiler configuration
|
|
363
|
+
output_path = args.prof_path if args.prof_path != "./" else self._output_path
|
|
364
|
+
prof_path = os.path.join(
|
|
365
|
+
output_path,
|
|
366
|
+
f"rank{self._rank_id}_start{start_step}_stop{stop_step}"
|
|
367
|
+
)
|
|
368
|
+
print_msg(f"Rank {self._rank_id} create output path {prof_path}")
|
|
369
|
+
print_msg(
|
|
370
|
+
f"Rank {self._rank_id} Dynamic profiler start at step {start_step}, "
|
|
371
|
+
f"will stop at step {stop_step}"
|
|
372
|
+
)
|
|
373
|
+
profiler_config = self._get_prof_config(args, prof_path, start_step, stop_step, start_profile=True,
|
|
374
|
+
skip_first=1)
|
|
375
|
+
self._profiler = Profile(**profiler_config)
|
|
376
|
+
|
|
377
|
+
def _is_valid_start_stop_step(self, step_num, start_step, stop_step):
|
|
378
|
+
"""Verify whether start_step and stop_step are valid parameters."""
|
|
379
|
+
if start_step < 0 or stop_step < 0:
|
|
380
|
+
return False
|
|
381
|
+
|
|
382
|
+
if step_num < start_step:
|
|
383
|
+
return False
|
|
384
|
+
|
|
385
|
+
if step_num > stop_step != self._stop_step:
|
|
386
|
+
logger.warning("stop_step must be greater than step_num, "
|
|
387
|
+
"but get start_step = %d, stop_step = %d, step_num = %d", start_step, stop_step, step_num)
|
|
388
|
+
return False
|
|
389
|
+
|
|
390
|
+
return True
|
|
391
|
+
|
|
392
|
+
@no_exception_func()
|
|
393
|
+
def _call_dyno_monitor(self, dyno_args):
|
|
394
|
+
if "is_valid" in dyno_args:
|
|
395
|
+
del dyno_args["is_valid"]
|
|
396
|
+
dyno_monitor_proxy = MsDynamicMonitorProxySingleton().get_proxy()
|
|
397
|
+
dyno_monitor_proxy.enable_dyno_npu_monitor(dyno_args)
|
|
491
398
|
|
|
492
399
|
@no_exception_func()
|
|
493
400
|
def on_train_end(self, run_context):
|
|
@@ -502,12 +409,16 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
502
409
|
def _get_prof_args(self):
|
|
503
410
|
""" Get prof_args """
|
|
504
411
|
logger.error("Dynamic profiler _get_prof_args is not implemented")
|
|
505
|
-
return
|
|
412
|
+
return {}
|
|
506
413
|
|
|
507
414
|
def _clean_resource(self):
|
|
508
415
|
"""Clean resource"""
|
|
509
416
|
logger.error("Dynamic profiler _clean_resource is not implemented")
|
|
510
417
|
|
|
418
|
+
def _finalize_dynolog(self):
|
|
419
|
+
"""finalize dynolog"""
|
|
420
|
+
logger.error("Dynolog monitor _finalize_dynolog is not implemented")
|
|
421
|
+
|
|
511
422
|
def _check_step(self, start_step, stop_step, step_num):
|
|
512
423
|
"""Check step valid"""
|
|
513
424
|
if start_step <= 0 or stop_step <= 0:
|
|
@@ -535,9 +446,11 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
535
446
|
"""Init config json file"""
|
|
536
447
|
if self._rank_id == 0:
|
|
537
448
|
if not os.path.exists(self._cfg_json_path):
|
|
538
|
-
logger.
|
|
539
|
-
|
|
540
|
-
|
|
449
|
+
logger.info("cfg_path is not exist, create default cfg json")
|
|
450
|
+
default_dy_config_context = DynamicProfilerConfigContext({})
|
|
451
|
+
PathManager.make_dir_safety(self._cfg_path)
|
|
452
|
+
config_file_path = os.path.join(self._cfg_path, "profiler_config.json")
|
|
453
|
+
FileManager.create_json_file(config_file_path, default_dy_config_context.vars, indent=4)
|
|
541
454
|
else:
|
|
542
455
|
logger.info("rank_id is not 0, skip init cfg json")
|
|
543
456
|
print_msg(f"Init config json file: {self._cfg_json_path}")
|
|
@@ -550,10 +463,12 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
550
463
|
def _create_process(self):
|
|
551
464
|
"""Create json monitor process, one process will be created at one worker"""
|
|
552
465
|
if self._is_create_process:
|
|
466
|
+
args = [self._shared_loop_flag, self._poll_interval, self._shm, self._rank_id] if self._is_dyno else \
|
|
467
|
+
[self._shared_loop_flag, self._poll_interval, self._shm, self._cfg_json_path]
|
|
553
468
|
# daemon need to be set to True, otherwise the process will not be killed when the main process exits.
|
|
554
|
-
self._process = multiprocessing.Process(target=worker_func,
|
|
555
|
-
|
|
556
|
-
|
|
469
|
+
self._process = multiprocessing.Process(target=worker_dyno_func if self._is_dyno else worker_func,
|
|
470
|
+
daemon=True,
|
|
471
|
+
args=args)
|
|
557
472
|
self._process.start()
|
|
558
473
|
logger.info("Config monitor process has been created by rank %d.", self._rank_id)
|
|
559
474
|
else:
|
|
@@ -573,7 +488,7 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
573
488
|
if not os.path.exists(shm_path):
|
|
574
489
|
return
|
|
575
490
|
|
|
576
|
-
MAX_TIME_DIFF =
|
|
491
|
+
MAX_TIME_DIFF = 60 # seconds
|
|
577
492
|
time_shm = os.stat(shm_path).st_ctime
|
|
578
493
|
cur_proc_time = self._get_pid_st_ctime(os.getpid())
|
|
579
494
|
|
|
@@ -584,7 +499,7 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
584
499
|
def _get_pid_st_ctime(self, pid):
|
|
585
500
|
"""Get pid st_ctime"""
|
|
586
501
|
try:
|
|
587
|
-
fd = os.open(
|
|
502
|
+
fd = os.open(os.path.join('/proc', str(pid)), os.O_RDONLY, stat.S_IRUSR | stat.S_IRGRP)
|
|
588
503
|
stat_ino = os.fstat(fd)
|
|
589
504
|
os.close(fd)
|
|
590
505
|
create_time = stat_ino.st_ctime
|
|
@@ -593,7 +508,7 @@ class DynamicProfilerMonitorBase(Callback):
|
|
|
593
508
|
logger.error("Process with PID %d does not exist.", pid)
|
|
594
509
|
except PermissionError:
|
|
595
510
|
logger.error("Permission denied when accessing PID %d.", pid)
|
|
596
|
-
except Exception as ex:
|
|
511
|
+
except Exception as ex: # pylint: disable=W0703
|
|
597
512
|
logger.error("An error occurred while getting creation time for PID %d: %s", pid, str(ex))
|
|
598
513
|
|
|
599
514
|
|
|
@@ -601,7 +516,8 @@ if sys.version_info >= (3, 8):
|
|
|
601
516
|
@no_exception_func()
|
|
602
517
|
def write_bytes(shm, byte_data):
|
|
603
518
|
"""Write bytes to shared memory"""
|
|
604
|
-
shm.buf[:
|
|
519
|
+
shm.buf[:] = b'\x00' * len(shm.buf)
|
|
520
|
+
shm.buf[:len(byte_data)] = byte_data
|
|
605
521
|
else:
|
|
606
522
|
@no_exception_func()
|
|
607
523
|
def write_bytes(shm, byte_data):
|
|
@@ -624,15 +540,13 @@ def worker_func(loop_flag, poll_interval, shm, cfg_path):
|
|
|
624
540
|
with open(cfg_path, 'r') as f:
|
|
625
541
|
data = json.load(f)
|
|
626
542
|
|
|
627
|
-
|
|
628
|
-
prof_args = DynamicProfilerArgs(**data)
|
|
629
|
-
prof_args.is_valid = True
|
|
543
|
+
data['is_valid'] = True
|
|
630
544
|
logger.info("Dynamic profiler process load json success")
|
|
631
545
|
except json.JSONDecodeError as e:
|
|
632
|
-
|
|
633
|
-
prof_args.is_valid = False
|
|
546
|
+
data = {'is_valid': False}
|
|
634
547
|
logger.error("Dynamic profiler process load json failed: %s", e)
|
|
635
|
-
|
|
548
|
+
# convert json to bytes
|
|
549
|
+
byte_data = DynamicProfilerConfigContext.json_to_bytes(data)
|
|
636
550
|
write_bytes(shm, byte_data)
|
|
637
551
|
else:
|
|
638
552
|
logger.error("Dynamic profiler cfg json not exists")
|
|
@@ -640,6 +554,36 @@ def worker_func(loop_flag, poll_interval, shm, cfg_path):
|
|
|
640
554
|
logger.info("Dynamic profiler process done")
|
|
641
555
|
|
|
642
556
|
|
|
557
|
+
@no_exception_func()
|
|
558
|
+
def worker_dyno_func(loop_flag, poll_interval, shm, rank_id):
|
|
559
|
+
""" dyno monitor process worker function python version >= 3.8"""
|
|
560
|
+
proxy = MsDynamicMonitorProxySingleton().get_proxy()
|
|
561
|
+
ret = proxy.init_dyno(rank_id)
|
|
562
|
+
|
|
563
|
+
if not ret:
|
|
564
|
+
logger.warning("Rank %d init dynolog failed !")
|
|
565
|
+
return
|
|
566
|
+
print_msg("Init dynolog success !")
|
|
567
|
+
|
|
568
|
+
while loop_flag.value:
|
|
569
|
+
try:
|
|
570
|
+
res = proxy.poll_dyno()
|
|
571
|
+
if not res:
|
|
572
|
+
continue
|
|
573
|
+
data = DynamicProfilerUtils.dyno_str_to_dict(res)
|
|
574
|
+
except Exception as e: # pylint: disable=broad-except
|
|
575
|
+
data = {'is_valid': False}
|
|
576
|
+
logger.error("Dynolog process load config failed: %s", e)
|
|
577
|
+
else:
|
|
578
|
+
data['is_valid'] = True
|
|
579
|
+
|
|
580
|
+
# convert dyno config json to bytes
|
|
581
|
+
byte_data = DynamicProfilerConfigContext.json_to_bytes(data)
|
|
582
|
+
write_bytes(shm, byte_data)
|
|
583
|
+
time.sleep(poll_interval)
|
|
584
|
+
logger.info("Dynolog process done")
|
|
585
|
+
|
|
586
|
+
|
|
643
587
|
if sys.version_info >= (3, 8):
|
|
644
588
|
from multiprocessing import shared_memory
|
|
645
589
|
from unittest.mock import patch
|
|
@@ -647,10 +591,10 @@ if sys.version_info >= (3, 8):
|
|
|
647
591
|
|
|
648
592
|
class DynamicProfilerMonitor(DynamicProfilerMonitorBase):
|
|
649
593
|
r"""
|
|
650
|
-
This class to enable the dynamic
|
|
594
|
+
This class to enable the dynamic profiler monitoring of MindSpore neural networks.
|
|
651
595
|
|
|
652
596
|
Args:
|
|
653
|
-
cfg_path (str): (Ascend only) Dynamic
|
|
597
|
+
cfg_path (str): (Ascend only) Dynamic profiler json config file directory. The requirement is a shared path
|
|
654
598
|
that can be accessed by all nodes. The parameters of the json configuration file are as follows:
|
|
655
599
|
|
|
656
600
|
- start_step (int, required) - Sets the step number at which the Profiler starts collecting data.
|
|
@@ -660,27 +604,46 @@ if sys.version_info >= (3, 8):
|
|
|
660
604
|
a relative value, with the first step of training being 1. The stop_step must be greater than or
|
|
661
605
|
equal to start_step. The default value is -1, indicating that data collection will not start during
|
|
662
606
|
the entire training process.
|
|
663
|
-
- aic_metrics (int, optional) -
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
607
|
+
- aic_metrics (int/str, optional) - Set the collection of AI Core metric data. The current version can
|
|
608
|
+
pass in either type int or str. Later, it will be updated to only pass in the str type.
|
|
609
|
+
Here, ``0`` and ``"PipeUtilization"`` represent PipeUtilization; ``1`` and ``"ArithmeticUtilization"``
|
|
610
|
+
represent ArithmeticUtilization; ``2`` and ``"Memory"`` represent Memory; ``3`` and ``"MemoryL0"``
|
|
611
|
+
represent MemoryL0; ``4`` and ``"MemoryUB"`` stand for MemoryUB; ``5`` and ``"ResourceConflictRatio"``
|
|
612
|
+
represent ResourceConflictRatio; ``6`` and ``"L2Cache"`` represent L2Cache; ``7`` and
|
|
613
|
+
``"MemoryAccess"`` stand for MemoryAccess. The default value ``"AiCoreNone"`` indicates that the
|
|
614
|
+
AI Core metric is not collected.
|
|
615
|
+
- profiler_level (int/str, optional) - Set the level for collecting performance data. The current
|
|
616
|
+
version can pass in either type int or str, and it will be updated to only pass in str type
|
|
617
|
+
in the future. Among them, ``-1`` and ``"LevelNone"`` represent ProfilerLevel.LevelNone, ``0``
|
|
618
|
+
and ``"Level0"`` represent ProfilerLevel.Level0, and ``1`` and ``"Level1"`` represent
|
|
619
|
+
ProfilerLevel.Level1. ``2`` and ``"Level2"`` stand for Profile Level.Level2.
|
|
620
|
+
The default value ``"Level0"`` indicates the collection level of ProfilerLevel.Level0.
|
|
621
|
+
- activities (int/list, optional) - Set the device for collecting performance data.
|
|
622
|
+
The current version can pass in either type int or list. Later, it will be updated to only
|
|
623
|
+
pass in the list type. Among them, ``0`` and ``["CPU","NPU"]`` represent CPU+NPU, ``1`` and
|
|
624
|
+
``["CPU"]`` represent CPU, and ``2`` and ``["NPU"]`` represent NPU. The default values
|
|
625
|
+
``["CPU","NPU"]`` indicate the collection of performance data of CPU+NPU.
|
|
626
|
+
- export_type (int/list, optional) - Set the type of the exported performance data.
|
|
627
|
+
The current version can pass in either type int or list, and it will be updated later
|
|
628
|
+
to only pass in the list type. Among them, ``0`` and ``["text"]`` represent text, ``1`` and ``["db"]``
|
|
629
|
+
represent db, and ``2`` and ``["text","db"]`` represent text and db respectively. The default value
|
|
630
|
+
``["text"]`` indicates that only performance data of the text type is exported.
|
|
676
631
|
- profile_memory (bool, optional) - Set whether to collect memory performance data, true indicates that
|
|
677
632
|
memory performance data is collected, false indicates that memory performance data is not collected.
|
|
678
633
|
The default value is false, indicating that memory performance data is not collected.
|
|
679
634
|
- mstx (bool, optional) - Set whether to enable mstx, true indicates that mstx is enabled, false
|
|
680
635
|
indicates that mstx is disabled. The default value is false, indicating that mstx is not enabled.
|
|
681
|
-
-
|
|
682
|
-
|
|
683
|
-
|
|
636
|
+
- analyse (bool, optional) - Set whether to enable online analysis. True indicates that online analysis
|
|
637
|
+
is enabled, while false indicates that online analysis is disabled. The default value is false,
|
|
638
|
+
indicating that online analysis is not enabled. This parameter has a higher priority than the
|
|
639
|
+
`analyse_mode` parameter. When this parameter is set to false, the setting of the `analyse_mode`
|
|
640
|
+
parameter does not take effect. When this parameter is set to true,
|
|
641
|
+
setting the `analyse_mode` parameter to -1 does not take effect.
|
|
642
|
+
- analyse_mode (int, optional) - Sets the mode for online analysis,
|
|
643
|
+
where 0 represents "sync" and 1 represents "async". The default value is -1,
|
|
644
|
+
indicating that online analysis is not used. This parameter has a lower priority than the `analyse`
|
|
645
|
+
parameter. When the `analyse` parameter is set to false, the setting of this parameter does not take
|
|
646
|
+
effect. When the `analyse` parameter is set to true, setting it to -1 does not take effect.
|
|
684
647
|
- parallel_strategy (bool, optional) - Sets whether to collect parallel strategy performance data,
|
|
685
648
|
where true means to collect and false means not to collect. The default value is false, indicating
|
|
686
649
|
that parallel strategy performance data is not collected.
|
|
@@ -690,6 +653,44 @@ if sys.version_info >= (3, 8):
|
|
|
690
653
|
- data_simplification (bool, optional) - Sets whether to enable data simplification, where true means
|
|
691
654
|
to enable and false means not to enable. The default value is true, indicating that data
|
|
692
655
|
simplification is enabled.
|
|
656
|
+
- record_shapes (bool, optional) - Sets whether to collect operator input tensor shapes data, where true
|
|
657
|
+
means that the shape data is collected and false means that the shape data is not collected. The
|
|
658
|
+
default value is false, indicating that input tensor shapes data is not collected.
|
|
659
|
+
- mstx_domain_include (list, optional) - Set the set of enabled domain names when the mstx switch
|
|
660
|
+
is turned on. The name must be of str type. Default value: ``[]``, indicating that this parameter
|
|
661
|
+
is not used to control the domain. This parameter is mutually exclusive with the mstx_domain_exclude
|
|
662
|
+
parameter and cannot be set. simultaneously. If both are set, only the mstx_domain_include parameter
|
|
663
|
+
takes effect.
|
|
664
|
+
- mstx_domain_exclude (list, optional) - Set the set of domain names that are not enabled when the
|
|
665
|
+
mstx switch is turned on. The name must be of str type. Default value: ``[]``, indicating that this
|
|
666
|
+
parameter is not used to control the domain.
|
|
667
|
+
- prof_path (str, optional) - Output data path of the dynamic profiler. It is the same as the interface
|
|
668
|
+
parameter `output_path`. When both are set, `prof_path` takes effect. Default value:
|
|
669
|
+
``"./"`` .
|
|
670
|
+
- sys_io (bool, optional) - Set whether to collect NIC and RoCE data. Default value: ``False`` ,
|
|
671
|
+
indicating that these data are not collected.
|
|
672
|
+
- sys_interconnection (bool, optional) - Set whether to collect system interconnection data,
|
|
673
|
+
including aggregate collective communication statistics (HCCS), PCIe data, and inter-chip transmission
|
|
674
|
+
bandwidth information. Default value: ``False`` , indicating that these data are not collected.
|
|
675
|
+
- host_sys (list, optional) - Collect the data of system class calls, storage classes and cpu usage
|
|
676
|
+
rate on the host side, and pass in the list type. It supports passing in one or more of ``"cpu"``,
|
|
677
|
+
``"mem"``, ``"disk"``, ``"network"`` and ``"osrt"``. Among them, ``"cpu"`` represents the cpu
|
|
678
|
+
utilization at the process level, ``"mem"`` represents the memory utilization at the process level,
|
|
679
|
+
``"disk"`` represents the disk I/O utilization at the process level, and ``"network"`` represents the
|
|
680
|
+
network I/O utilization at the system level. ``"osrt"`` represents system-level syscall and
|
|
681
|
+
pthreadcall. Default value: ``[]``, indicating that system class data on the host side is
|
|
682
|
+
not collected. When collecting DISK or OSRT data, it is necessary to install the iotop, perf,
|
|
683
|
+
and ltrace third-party tools in advance. For detailed steps, please refer to
|
|
684
|
+
`Installing Third-party Tools <https://www.hiascend.com/document/detail/zh/mindstudio/80RC1/T&ITools/
|
|
685
|
+
Profiling/atlasprofiling_16_0136.html>`_ .
|
|
686
|
+
After the third-party tool is successfully installed, user permissions need to be configured. For
|
|
687
|
+
detailed steps, please refer to `Configure User Permissions <https://www.hiascend.com/document/
|
|
688
|
+
detail/zh/mindstudio/80RC1/T&ITools/Profiling/atlasprofiling_16_0137.
|
|
689
|
+
html>`_ .
|
|
690
|
+
Note that in step 3 of configuring user permissions, the content in the msprof_data_collection.sh
|
|
691
|
+
script needs to be replaced with `msprof_data_collection.sh
|
|
692
|
+
<https://gitee.com/mindspore/mindspore/blob/master/docs/api/api_python/mindspore/script/
|
|
693
|
+
msprof_data_collection.sh>`_.
|
|
693
694
|
|
|
694
695
|
output_path (str, optional): (Ascend only) Output data path. Default: ``"./dyn_profile_data"`` .
|
|
695
696
|
poll_interval (int, optional): (Ascend only) The polling period of the monitoring process, in seconds.
|
|
@@ -729,9 +730,13 @@ if sys.version_info >= (3, 8):
|
|
|
729
730
|
... model.train(10, data, callbacks=[dynprof_cb])
|
|
730
731
|
"""
|
|
731
732
|
|
|
732
|
-
def __init__(self, cfg_path, output_path="./dyn_profile_data", poll_interval=2, **kwargs):
|
|
733
|
-
if
|
|
734
|
-
|
|
733
|
+
def __init__(self, cfg_path=None, output_path="./dyn_profile_data", poll_interval=2, **kwargs):
|
|
734
|
+
if DynamicProfilerUtils.is_dyno_mode() and cfg_path is not None:
|
|
735
|
+
logger.warning("If you export 'MSMONITOR_USE_DAEMON=1', your 'cfg_path' parameter will be invalid!")
|
|
736
|
+
cfg_path = None
|
|
737
|
+
|
|
738
|
+
if not DynamicProfilerUtils.is_dyno_mode() and not isinstance(cfg_path, str):
|
|
739
|
+
raise TypeError("If you set 'MSMONITOR_USE_DAEMON' to not 1, The cfg_path must be a string.")
|
|
735
740
|
if not isinstance(output_path, str):
|
|
736
741
|
logger.warning(f"The output_path must be a string, "
|
|
737
742
|
f"but got type {type(output_path)}, it will be set to './dyn_profile_data'.")
|
|
@@ -740,7 +745,21 @@ if sys.version_info >= (3, 8):
|
|
|
740
745
|
|
|
741
746
|
def _get_prof_args(self):
|
|
742
747
|
""" Get prof_args py38"""
|
|
743
|
-
|
|
748
|
+
byte_length = self._get_shm_byte_length()
|
|
749
|
+
|
|
750
|
+
if byte_length == 0:
|
|
751
|
+
return {}
|
|
752
|
+
|
|
753
|
+
valid_bytes = self._shm.buf[:byte_length]
|
|
754
|
+
return DynamicProfilerConfigContext.bytes_to_json(bytes(valid_bytes))
|
|
755
|
+
|
|
756
|
+
def _get_shm_byte_length(self):
|
|
757
|
+
byte_length = 0
|
|
758
|
+
for i, byte in enumerate(self._shm.buf):
|
|
759
|
+
if byte == 0:
|
|
760
|
+
byte_length = i
|
|
761
|
+
break
|
|
762
|
+
return byte_length
|
|
744
763
|
|
|
745
764
|
@no_exception_func()
|
|
746
765
|
def _clean_resource(self):
|
|
@@ -770,6 +789,12 @@ if sys.version_info >= (3, 8):
|
|
|
770
789
|
logger.warning("Rank %s unlink shm failed, may be removed", self._rank_id)
|
|
771
790
|
self._shm = None
|
|
772
791
|
|
|
792
|
+
@no_exception_func()
|
|
793
|
+
def _finalize_dynolog(self):
|
|
794
|
+
dyno_monitor_proxy = MsDynamicMonitorProxySingleton().get_proxy()
|
|
795
|
+
dyno_monitor_proxy.finalize_dyno()
|
|
796
|
+
logger.info("Rank %d finalize dynolog success !", self._rank_id)
|
|
797
|
+
|
|
773
798
|
@no_exception_func()
|
|
774
799
|
def _create_shm(self):
|
|
775
800
|
"""Create a json monitor process based on whether the SharedMemory is successfully created py38"""
|
|
@@ -789,7 +814,7 @@ if sys.version_info >= (3, 8):
|
|
|
789
814
|
try:
|
|
790
815
|
# Step 2: only one process can create shm successfully.
|
|
791
816
|
self._shm = shared_memory.SharedMemory(name=self._shm_name,
|
|
792
|
-
create=True, size=
|
|
817
|
+
create=True, size=DynamicProfilerUtils.CFG_BUFFER_SIZE)
|
|
793
818
|
self._is_create_process = True
|
|
794
819
|
logger.info("Rank %d shared memory is created.", self._rank_id)
|
|
795
820
|
break
|
|
@@ -799,7 +824,7 @@ if sys.version_info >= (3, 8):
|
|
|
799
824
|
logger.warning("Rank %d shared memory create failed, "
|
|
800
825
|
"retry times = %d.", self._rank_id, try_times)
|
|
801
826
|
time.sleep(random.uniform(0, 0.02)) # sleep 0 ~ 20 ms
|
|
802
|
-
except Exception as e:
|
|
827
|
+
except Exception as e: # pylint: disable=W0703
|
|
803
828
|
# shm open failed because of other process create shm not finished
|
|
804
829
|
try_times -= 1
|
|
805
830
|
logger.warning("Rank %d shared memory open failed, error: %s, retry times = %d",
|
|
@@ -815,10 +840,10 @@ else:
|
|
|
815
840
|
|
|
816
841
|
class DynamicProfilerMonitor(DynamicProfilerMonitorBase):
|
|
817
842
|
r"""
|
|
818
|
-
This class to enable the dynamic
|
|
843
|
+
This class to enable the dynamic profiler monitoring of MindSpore neural networks.
|
|
819
844
|
|
|
820
845
|
Args:
|
|
821
|
-
cfg_path (str): Dynamic
|
|
846
|
+
cfg_path (str): Dynamic profiler json config file directory. The requirement is a shared path
|
|
822
847
|
that can be accessed by all nodes.
|
|
823
848
|
output_path (str, optional): Output data path. Default: ``"./dyn_profile_data"`` .
|
|
824
849
|
poll_interval (int, optional): The polling period of the monitoring process, in seconds.
|
|
@@ -858,16 +883,24 @@ else:
|
|
|
858
883
|
... model.train(10, data, callbacks=[dynprof_cb])
|
|
859
884
|
"""
|
|
860
885
|
|
|
861
|
-
def __init__(self, cfg_path, output_path="./dyn_profile_data", poll_interval=2, **kwargs):
|
|
862
|
-
if
|
|
863
|
-
|
|
886
|
+
def __init__(self, cfg_path=None, output_path="./dyn_profile_data", poll_interval=2, **kwargs):
|
|
887
|
+
if DynamicProfilerUtils.is_dyno_mode() and cfg_path is not None:
|
|
888
|
+
logger.warning("If you export 'MSMONITOR_USE_DAEMON=1', your 'cfg_path' parameter will be invalid!")
|
|
889
|
+
cfg_path = None
|
|
890
|
+
|
|
891
|
+
if not DynamicProfilerUtils.is_dyno_mode() and not isinstance(cfg_path, str):
|
|
892
|
+
raise TypeError("If you set 'MSMONITOR_USE_DAEMON' to not 1, The cfg_path must be a string.")
|
|
893
|
+
|
|
864
894
|
if not isinstance(output_path, str):
|
|
865
895
|
logger.warning(f"The output_path must be a string, "
|
|
866
896
|
f"but got type {type(output_path)}, it will be set to './dyn_profile_data'.")
|
|
867
897
|
output_path = "./dyn_profile_data"
|
|
868
898
|
self._cfg_path = cfg_path
|
|
869
899
|
self._shm_name = time.strftime("DynamicProfileShm%Y%m%d%H", time.localtime())
|
|
870
|
-
self._shm_dir =
|
|
900
|
+
self._shm_dir = (
|
|
901
|
+
"/dev/shm" if DynamicProfilerUtils.is_dyno_mode()
|
|
902
|
+
else os.path.join(self._cfg_path, "shm")
|
|
903
|
+
)
|
|
871
904
|
PathManager.make_dir_safety(self._shm_dir)
|
|
872
905
|
self._shm_path = os.path.realpath(os.path.join(self._shm_dir, self._shm_name))
|
|
873
906
|
|
|
@@ -878,7 +911,8 @@ else:
|
|
|
878
911
|
def _get_prof_args(self):
|
|
879
912
|
""" Get prof_args py37"""
|
|
880
913
|
self._shm.seek(0)
|
|
881
|
-
return
|
|
914
|
+
return DynamicProfilerConfigContext.bytes_to_json(
|
|
915
|
+
bytes(self._shm.read(DynamicProfilerUtils.CFG_BUFFER_SIZE)))
|
|
882
916
|
|
|
883
917
|
@no_exception_func()
|
|
884
918
|
def _clean_resource(self):
|
|
@@ -923,7 +957,8 @@ else:
|
|
|
923
957
|
self.fd = os.open(self._shm_path, os.O_EXCL | os.O_RDWR,
|
|
924
958
|
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP)
|
|
925
959
|
self._memory_mapped_file = os.fdopen(self.fd, 'rb')
|
|
926
|
-
self._shm = mmap.mmap(self._memory_mapped_file.fileno(),
|
|
960
|
+
self._shm = mmap.mmap(self._memory_mapped_file.fileno(),
|
|
961
|
+
length=DynamicProfilerUtils.CFG_BUFFER_SIZE)
|
|
927
962
|
self._is_create_process = False
|
|
928
963
|
logger.info("Rank %d shared memory is connected.", self._rank_id)
|
|
929
964
|
break
|
|
@@ -937,7 +972,7 @@ else:
|
|
|
937
972
|
|
|
938
973
|
# Init mmap file need to write data
|
|
939
974
|
with os.fdopen(fd, 'wb') as f:
|
|
940
|
-
data_instance =
|
|
975
|
+
data_instance = DynamicProfilerConfigContext({})
|
|
941
976
|
byte_data = data_instance.to_bytes()
|
|
942
977
|
f.write(byte_data)
|
|
943
978
|
|
|
@@ -945,7 +980,8 @@ else:
|
|
|
945
980
|
self.fd = os.open(self._shm_path, os.O_EXCL | os.O_RDWR,
|
|
946
981
|
stat.S_IWUSR | stat.S_IRUSR | stat.S_IRGRP)
|
|
947
982
|
self._memory_mapped_file = os.fdopen(self.fd, 'rb')
|
|
948
|
-
self._shm = mmap.mmap(self._memory_mapped_file.fileno(), length=
|
|
983
|
+
self._shm = mmap.mmap(self._memory_mapped_file.fileno(), length=DynamicProfilerUtils.
|
|
984
|
+
CFG_BUFFER_SIZE)
|
|
949
985
|
self._is_create_process = True
|
|
950
986
|
logger.info("Rank %d shared memory is created.", self._rank_id)
|
|
951
987
|
break
|