mindspore 2.4.10__cp310-cp310-win_amd64.whl → 2.5.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +8 -3
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +0 -5
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/compile_config.py +64 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
- mindspore/_extends/parse/parser.py +23 -5
- mindspore/_extends/parse/standard_method.py +123 -27
- mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
- mindspore/amp.py +7 -1
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/boost_cell_wrapper.py +136 -41
- mindspore/common/__init__.py +3 -1
- mindspore/common/_register_for_tensor.py +0 -1
- mindspore/common/_stub_tensor.py +25 -4
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +6132 -0
- mindspore/common/api.py +98 -21
- mindspore/common/dtype.py +34 -34
- mindspore/common/dump.py +2 -1
- mindspore/common/file_system.py +8 -3
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +3 -1
- mindspore/common/initializer.py +3 -4
- mindspore/common/lazy_inline.py +8 -2
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/parameter.py +31 -15
- mindspore/common/tensor.py +713 -1337
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +5 -0
- mindspore/communication/comm_func.py +215 -173
- mindspore/communication/management.py +23 -20
- mindspore/context.py +285 -191
- mindspore/dataset/__init__.py +23 -19
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +84 -3
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +5 -4
- mindspore/dataset/engine/datasets.py +192 -149
- mindspore/dataset/engine/datasets_audio.py +14 -0
- mindspore/dataset/engine/datasets_standard_format.py +11 -11
- mindspore/dataset/engine/datasets_text.py +38 -1
- mindspore/dataset/engine/datasets_user_defined.py +100 -66
- mindspore/dataset/engine/datasets_vision.py +81 -8
- mindspore/dataset/engine/iterators.py +281 -63
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +26 -2
- mindspore/dataset/engine/serializer_deserializer.py +1 -1
- mindspore/dataset/engine/validators.py +43 -11
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +29 -12
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +94 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +127 -0
- mindspore/device_context/cpu/__init__.py +25 -0
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +134 -0
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/__init__.py +1 -0
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/optim/adadelta.py +26 -22
- mindspore/experimental/optim/adam.py +3 -0
- mindspore/experimental/optim/lr_scheduler.py +33 -24
- mindspore/experimental/optim/radam.py +33 -30
- mindspore/hal/device.py +28 -0
- mindspore/hal/event.py +17 -0
- mindspore/hal/memory.py +94 -3
- mindspore/hal/stream.py +91 -6
- mindspore/include/api/context.h +0 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +12 -0
- mindspore/mindrecord/__init__.py +1 -1
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +824 -218
- mindspore/mint/distributed/__init__.py +66 -4
- mindspore/mint/distributed/distributed.py +2594 -44
- mindspore/mint/linalg/__init__.py +6 -0
- mindspore/mint/nn/__init__.py +473 -14
- mindspore/mint/nn/functional.py +486 -11
- mindspore/mint/nn/layer/__init__.py +17 -4
- mindspore/mint/nn/layer/_functions.py +330 -0
- mindspore/mint/nn/layer/activation.py +169 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +727 -0
- mindspore/mint/nn/layer/normalization.py +215 -19
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +170 -0
- mindspore/mint/optim/__init__.py +2 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/special/__init__.py +2 -1
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/cell.py +126 -19
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +6 -6
- mindspore/nn/layer/basic.py +35 -25
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/embedding.py +3 -3
- mindspore/nn/layer/normalization.py +8 -7
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +47 -13
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +48 -26
- mindspore/nn/learning_rate_schedule.py +5 -3
- mindspore/nn/loss/loss.py +31 -36
- mindspore/nn/optim/ada_grad.py +1 -0
- mindspore/nn/optim/adadelta.py +2 -2
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/rprop.py +2 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/utils/init.py +13 -11
- mindspore/nn/wrap/cell_wrapper.py +4 -6
- mindspore/nn/wrap/loss_scale.py +3 -4
- mindspore/numpy/array_creations.py +60 -62
- mindspore/numpy/array_ops.py +148 -143
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +361 -359
- mindspore/numpy/utils.py +16 -16
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +2 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +94 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_vmap/vmap_array_ops.py +20 -19
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
- mindspore/ops/_vmap/vmap_math_ops.py +11 -9
- mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
- mindspore/ops/auto_generate/gen_extend_func.py +554 -60
- mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
- mindspore/ops/auto_generate/gen_ops_prim.py +8024 -3409
- mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
- mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
- mindspore/ops/function/__init__.py +12 -0
- mindspore/ops/function/array_func.py +561 -159
- mindspore/ops/function/clip_func.py +64 -0
- mindspore/ops/function/debug_func.py +28 -20
- mindspore/ops/function/image_func.py +1 -1
- mindspore/ops/function/linalg_func.py +5 -4
- mindspore/ops/function/math_func.py +1659 -290
- mindspore/ops/function/nn_func.py +988 -317
- mindspore/ops/function/parameter_func.py +3 -56
- mindspore/ops/function/random_func.py +243 -33
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/functional.py +18 -5
- mindspore/ops/functional_overload.py +897 -0
- mindspore/ops/operations/__init__.py +3 -2
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -34
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +38 -8
- mindspore/ops/operations/array_ops.py +45 -303
- mindspore/ops/operations/comm_ops.py +19 -16
- mindspore/ops/operations/custom_ops.py +11 -55
- mindspore/ops/operations/debug_ops.py +42 -47
- mindspore/ops/operations/inner_ops.py +6 -4
- mindspore/ops/operations/linalg_ops.py +3 -2
- mindspore/ops/operations/manually_defined/ops_def.py +185 -104
- mindspore/ops/operations/math_ops.py +11 -216
- mindspore/ops/operations/nn_ops.py +146 -308
- mindspore/ops/primitive.py +23 -21
- mindspore/ops/tensor_method.py +1669 -0
- mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
- mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
- mindspore/ops_generate/arg_handler.py +0 -61
- mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
- mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/base_generator.py +11 -0
- mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
- mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
- mindspore/ops_generate/functional_overload_py_generator.py +110 -0
- mindspore/ops_generate/functions_cc_generator.py +233 -0
- mindspore/ops_generate/gen_aclnn_implement.py +110 -114
- mindspore/ops_generate/gen_constants.py +157 -3
- mindspore/ops_generate/gen_ops.py +245 -990
- mindspore/ops_generate/gen_pyboost_func.py +97 -998
- mindspore/ops_generate/gen_utils.py +119 -33
- mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
- mindspore/ops_generate/op_api_proto.py +206 -0
- mindspore/ops_generate/op_def_py_generator.py +131 -0
- mindspore/ops_generate/op_prim_py_generator.py +480 -0
- mindspore/ops_generate/op_proto.py +373 -108
- mindspore/ops_generate/op_template_parser.py +436 -0
- mindspore/ops_generate/ops_def_cc_generator.py +288 -0
- mindspore/ops_generate/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/ops_name_h_generator.py +68 -0
- mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
- mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
- mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
- mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
- mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
- mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
- mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
- mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
- mindspore/ops_generate/pyboost_utils.py +92 -33
- mindspore/ops_generate/template.py +294 -44
- mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
- mindspore/parallel/__init__.py +3 -3
- mindspore/parallel/_auto_parallel_context.py +24 -33
- mindspore/parallel/_parallel_serialization.py +13 -2
- mindspore/parallel/_utils.py +4 -1
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +44 -0
- mindspore/parallel/cluster/process_entity/_api.py +131 -37
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +20 -3
- mindspore/parallel/parameter_broadcast.py +1 -1
- mindspore/parallel/shard.py +3 -0
- mindspore/parallel/transform_safetensors.py +119 -253
- mindspore/profiler/__init__.py +17 -4
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +174 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +202 -0
- mindspore/profiler/common/path_manager.py +371 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +476 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +210 -0
- mindspore/profiler/common/profiler_path_manager.py +120 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +270 -37
- mindspore/profiler/envprofiler.py +138 -0
- mindspore/profiler/mstx.py +199 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +309 -0
- mindspore/profiler/profiler.py +580 -93
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +114 -0
- mindspore/profiler/schedule.py +208 -0
- mindspore/rewrite/api/symbol_tree.py +1 -2
- mindspore/run_check/_check_version.py +2 -6
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +148 -0
- mindspore/runtime/memory.py +392 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +2 -2
- mindspore/train/_utils.py +53 -18
- mindspore/train/amp.py +8 -4
- mindspore/train/callback/_checkpoint.py +32 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +105 -69
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_summary_collector.py +44 -6
- mindspore/train/callback/_tft_register.py +31 -10
- mindspore/train/dataset_helper.py +11 -11
- mindspore/train/metrics/precision.py +4 -5
- mindspore/train/mind_ir_pb2.py +167 -46
- mindspore/train/model.py +13 -15
- mindspore/train/serialization.py +462 -76
- mindspore/train/summary/summary_record.py +1 -2
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +4 -2
- mindspore/utils/dryrun.py +138 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/METADATA +2 -3
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/RECORD +362 -238
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
- {mindspore-2.4.10.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiler.py
CHANGED
|
@@ -13,98 +13,319 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Profiling api file."""
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
from
|
|
19
|
-
from
|
|
16
|
+
import os
|
|
17
|
+
import json
|
|
18
|
+
from typing import Optional, Dict
|
|
19
|
+
from sys import getsizeof
|
|
20
|
+
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
20
21
|
|
|
22
|
+
from mindspore import log as logger
|
|
23
|
+
from mindspore.profiler.common.constant import ProfilerStepNameConstant, DeviceTarget
|
|
24
|
+
from mindspore.profiler.common.profiler_context import ProfilerContext
|
|
25
|
+
from mindspore.profiler.platform.npu_profiler import NPUProfilerAnalysis
|
|
26
|
+
from mindspore.profiler.profiler_action_controller import ProfilerActionController
|
|
27
|
+
from mindspore.profiler.profiler_interface import ProfilerInterface
|
|
28
|
+
from mindspore.profiler.schedule import _default_schedule_fn, ProfilerAction
|
|
29
|
+
from mindspore.profiler.common.record_function import RecordFunction
|
|
30
|
+
from mindspore.profiler.common.path_manager import PathManager
|
|
31
|
+
from mindspore.profiler.common.file_manager import FileManager
|
|
32
|
+
from mindspore.profiler.common.profiler_path_manager import ProfilerPathManager
|
|
21
33
|
|
|
22
|
-
|
|
34
|
+
|
|
35
|
+
def tensor_board_trace_handler():
|
|
23
36
|
"""
|
|
24
|
-
|
|
37
|
+
For each step in dynamic graph mode, call this method for online analyse.
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
>>> import numpy as np
|
|
41
|
+
>>> import mindspore as ms
|
|
42
|
+
>>> import mindspore.dataset as ds
|
|
43
|
+
>>> from mindspore import context, nn, Profiler
|
|
44
|
+
>>> from mindspore.profiler import schedule, tensor_board_trace_handler
|
|
45
|
+
>>>
|
|
46
|
+
>>> class Net(nn.Cell):
|
|
47
|
+
... def __init__(self):
|
|
48
|
+
... super(Net, self).__init__()
|
|
49
|
+
... self.fc = nn.Dense(2, 2)
|
|
50
|
+
...
|
|
51
|
+
... def construct(self, x):
|
|
52
|
+
... return self.fc(x)
|
|
53
|
+
>>>
|
|
54
|
+
>>> def generator_net():
|
|
55
|
+
... for _ in range(2):
|
|
56
|
+
... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
|
|
57
|
+
>>>
|
|
58
|
+
>>> def train(test_net):
|
|
59
|
+
... optimizer = nn.Momentum(test_net.trainable_params(), 1, 0.9)
|
|
60
|
+
... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
61
|
+
... data = ds.GeneratorDataset(generator_net(), ["data", "label"])
|
|
62
|
+
... model = ms.train.Model(test_net, loss, optimizer)
|
|
63
|
+
... model.train(1, data)
|
|
64
|
+
>>>
|
|
65
|
+
>>> if __name__ == '__main__':
|
|
66
|
+
... context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend")
|
|
67
|
+
...
|
|
68
|
+
... net = Net()
|
|
69
|
+
... STEP_NUM = 15
|
|
70
|
+
...
|
|
71
|
+
... with Profiler(schedule=schedule(wait=1, warmup=1, active=2, repeat=1, skip_first=2),
|
|
72
|
+
... on_trace_ready=tensor_board_trace_handler) as prof:
|
|
73
|
+
... for i in range(STEP_NUM):
|
|
74
|
+
... train(net)
|
|
75
|
+
... prof.step()
|
|
25
76
|
"""
|
|
26
77
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
profile_memory: bool = False,
|
|
34
|
-
parallel_strategy: bool = False,
|
|
35
|
-
start_profile: bool = True,
|
|
36
|
-
aicore_metrics: int = 0,
|
|
37
|
-
l2_cache: bool = False,
|
|
38
|
-
hbm_ddr: bool = False,
|
|
39
|
-
pcie: bool = False,
|
|
40
|
-
sync_enable: bool = True,
|
|
41
|
-
data_process: bool = False,
|
|
42
|
-
timeline_limit: int = 500,
|
|
43
|
-
profile_framework: str = None,
|
|
44
|
-
with_stack: bool = False,
|
|
45
|
-
data_simplification: bool = True,
|
|
46
|
-
**kwargs) -> None:
|
|
47
|
-
|
|
48
|
-
self._prof_context = ProfContext(
|
|
49
|
-
output_path=output_path,
|
|
50
|
-
profiler_level=profiler_level,
|
|
51
|
-
op_time=op_time,
|
|
52
|
-
profile_communication=profile_communication,
|
|
53
|
-
profile_memory=profile_memory,
|
|
54
|
-
parallel_strategy=parallel_strategy,
|
|
55
|
-
start_profile=start_profile,
|
|
56
|
-
aicore_metrics=aicore_metrics,
|
|
57
|
-
l2_cache=l2_cache,
|
|
58
|
-
hbm_ddr=hbm_ddr,
|
|
59
|
-
pcie=pcie,
|
|
60
|
-
sync_enable=sync_enable,
|
|
61
|
-
data_process=data_process,
|
|
62
|
-
timeline_limit=timeline_limit,
|
|
63
|
-
profile_framework=profile_framework,
|
|
64
|
-
with_stack=with_stack,
|
|
65
|
-
data_simplification=data_simplification
|
|
66
|
-
)
|
|
78
|
+
try:
|
|
79
|
+
NPUProfilerAnalysis.online_analyse()
|
|
80
|
+
if ProfilerContext().data_simplification:
|
|
81
|
+
ProfilerPathManager().simplify_data()
|
|
82
|
+
except Exception as e: # pylint: disable=W0703
|
|
83
|
+
logger.error("Call tensorboard_trace_handler failed. Exception: %s", str(e))
|
|
67
84
|
|
|
68
|
-
self._has_started = False
|
|
69
85
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
86
|
+
class Profiler:
|
|
87
|
+
r"""
|
|
88
|
+
This class to enable the profiling of MindSpore neural networks.
|
|
89
|
+
MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
|
|
90
|
+
and use Profiler.analyse() to stop profiling and analyse the results.
|
|
91
|
+
Users can visualize the results using the `MindStudio Insight
|
|
92
|
+
<https://www.hiascend.com/developer/download/community/result?module=pt+sto+cann>`_ tool.
|
|
93
|
+
Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
|
|
94
|
+
correspondence, cluster, etc data analysis.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
98
|
+
data collection based on conditions. Default: ``True`` .
|
|
99
|
+
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
100
|
+
profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling.
|
|
101
|
+
Default: ``ProfilerLevel.Level0``.
|
|
102
|
+
|
|
103
|
+
- ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
|
|
104
|
+
time of the computational operators on the NPU and communication large operator information.
|
|
105
|
+
- ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
|
|
106
|
+
communication mini operator information based on Level0.
|
|
107
|
+
- ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
|
|
108
|
+
activities (list, optional): The activities to collect.
|
|
109
|
+
Default: ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
|
|
110
|
+
|
|
111
|
+
- ProfilerActivity.CPU: Collect MindSpore framework data.
|
|
112
|
+
- ProfilerActivity.NPU: Collect CANN software stack and NPU data.
|
|
113
|
+
- ProfilerActivity.GPU: Collect GPU data.
|
|
114
|
+
schedule (schedule, optional): Sets the action strategy for the capture, defined by the schedule class,
|
|
115
|
+
to be used with the step interface. Default: ``None``.
|
|
116
|
+
on_trace_ready (Callable, optional): Sets the callback function to be executed when the performance data
|
|
117
|
+
is collected. Default: ``None``.
|
|
118
|
+
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
119
|
+
When using this parameter, `activities` must set to ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
|
|
120
|
+
Collecting operator memory data when the graph compilation level is O2 requires collecting from the
|
|
121
|
+
first step. Default: ``False`` . The operator name currently collected by this parameter is incomplete.
|
|
122
|
+
This issue will be resolved in later versions. It is recommended to use the environment variable
|
|
123
|
+
``MS_ALLOC_CONF`` instead.
|
|
124
|
+
aicore_metrics (AicoreMetrics, optional): (Ascend only) Types of AICORE performance data collected,
|
|
125
|
+
when using this parameter, `activities` must include ``ProfilerActivity.NPU`` , and the value
|
|
126
|
+
must be a member of AicoreMetrics. Default: ``AicoreMetrics.AiCoreNone`` .
|
|
127
|
+
The data items contained in each metric are as follows:
|
|
78
128
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
129
|
+
- AicoreMetrics.AiCoreNone: Does not collect AICORE data.
|
|
130
|
+
- AicoreMetrics.ArithmeticUtilization: ArithmeticUtilization contains mac_fp16/int8_ratio,
|
|
131
|
+
vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
|
|
132
|
+
- AicoreMetrics.PipeUtilization: PipeUtilization contains vec_ratio, mac_ratio, scalar_ratio,
|
|
133
|
+
mte1/mte2/mte3_ratio, icache_miss_rate etc.
|
|
134
|
+
- AicoreMetrics.Memory: Memory contains ub_read/write_bw, l1_read/write_bw, l2_read/write_bw,
|
|
135
|
+
main_mem_read/write_bw etc.
|
|
136
|
+
- AicoreMetrics.MemoryL0: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
|
|
137
|
+
- AicoreMetrics.ResourceConflictRatio: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio
|
|
138
|
+
etc.
|
|
139
|
+
- AicoreMetrics.MemoryUB: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector,
|
|
140
|
+
ub\_/write_bw_scalar etc.
|
|
141
|
+
- AicoreMetrics.L2Cache: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit,
|
|
142
|
+
r1_read_cache_hit etc. This function only support Atlas A2 training series products.
|
|
143
|
+
with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
|
|
144
|
+
data is presented in the form of a flame graph in the timeline. When using this parameter, `activities` must
|
|
145
|
+
include ``ProfilerActivity.CPU``. Default value: ``False`` .
|
|
146
|
+
data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
|
|
147
|
+
If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
|
|
148
|
+
directory are retained to save disk space.
|
|
149
|
+
Default value: ``True`` .
|
|
150
|
+
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
151
|
+
Default: ``False`` .
|
|
152
|
+
hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
|
|
153
|
+
collect when True. Default: ``False`` .
|
|
154
|
+
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
155
|
+
Default: ``False`` .
|
|
156
|
+
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
157
|
+
Default value: ``False`` .
|
|
158
|
+
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
159
|
+
Default value: ``False`` .
|
|
160
|
+
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
161
|
+
Default: ``True`` .
|
|
162
|
+
|
|
163
|
+
- True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
|
|
164
|
+
Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
|
|
165
|
+
The duration of the operator is the difference between the two timestamps.
|
|
166
|
+
- False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
|
|
167
|
+
This method can reduce the impact of adding profiler on overall training time.
|
|
168
|
+
Raises:
|
|
169
|
+
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
170
|
+
MindSpore cannot parse the generated ascend_job_id directory structure.
|
|
171
|
+
|
|
172
|
+
Supported Platforms:
|
|
173
|
+
``Ascend`` ``GPU``
|
|
174
|
+
|
|
175
|
+
Examples:
|
|
176
|
+
>>> import numpy as np
|
|
177
|
+
>>> import mindspore as ms
|
|
178
|
+
>>> from mindspore import nn
|
|
179
|
+
>>> import mindspore.dataset as ds
|
|
180
|
+
>>> from mindspore import Profiler
|
|
181
|
+
>>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics
|
|
182
|
+
>>>
|
|
183
|
+
>>> class Net(nn.Cell):
|
|
184
|
+
... def __init__(self):
|
|
185
|
+
... super(Net, self).__init__()
|
|
186
|
+
... self.fc = nn.Dense(2,2)
|
|
187
|
+
... def construct(self, x):
|
|
188
|
+
... return self.fc(x)
|
|
189
|
+
>>>
|
|
190
|
+
>>> def generator():
|
|
191
|
+
... for i in range(2):
|
|
192
|
+
... yield (np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32))
|
|
193
|
+
>>>
|
|
194
|
+
>>> def train(net):
|
|
195
|
+
... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
|
|
196
|
+
... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
197
|
+
... data = ds.GeneratorDataset(generator, ["data", "label"])
|
|
198
|
+
... model = ms.train.Model(net, loss, optimizer)
|
|
199
|
+
... model.train(1, data)
|
|
200
|
+
>>>
|
|
201
|
+
>>> if __name__ == '__main__':
|
|
202
|
+
... # If the device_target is GPU, set the device_target to "GPU"
|
|
203
|
+
... ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
|
|
204
|
+
...
|
|
205
|
+
... # Init Profiler
|
|
206
|
+
... # Note that the Profiler should be initialized before model.train
|
|
207
|
+
... profiler = Profiler(profiler_level=ProfilerLevel.Level0,
|
|
208
|
+
... activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
|
|
209
|
+
... aicore_metrics=AicoreMetrics.AiCoreNone)
|
|
210
|
+
...
|
|
211
|
+
... # Train Model
|
|
212
|
+
... net = Net()
|
|
213
|
+
... train(net)
|
|
214
|
+
...
|
|
215
|
+
... # Profiler end
|
|
216
|
+
... profiler.analyse()
|
|
217
|
+
"""
|
|
218
|
+
MAX_META_SIZE = 100 * 1024 * 1024 # 100MB
|
|
219
|
+
|
|
220
|
+
def __init__(self, **kwargs) -> None:
|
|
221
|
+
self._metadata: Dict[str, str] = {}
|
|
222
|
+
self._prof_context: ProfilerContext = ProfilerContext()
|
|
223
|
+
self._prof_context.set_params(**kwargs)
|
|
224
|
+
self._has_started: bool = False
|
|
225
|
+
self.schedule_arg = kwargs.get('schedule')
|
|
226
|
+
if self.schedule_arg is not None:
|
|
227
|
+
self.schedule = self._prof_context.schedule
|
|
228
|
+
self._record_steps: bool = True
|
|
229
|
+
self._schedule_no_use_step = True
|
|
230
|
+
else:
|
|
231
|
+
self.schedule = _default_schedule_fn
|
|
232
|
+
self._record_steps: bool = False
|
|
233
|
+
self._schedule_no_use_step = None
|
|
234
|
+
self._step_rec_fn: Optional[RecordFunction] = None
|
|
235
|
+
self.step_num = 0
|
|
236
|
+
self.current_action: ProfilerAction = self.schedule(self.step_num)
|
|
237
|
+
self.action_controller = ProfilerActionController(ProfilerInterface, self._prof_context.on_trace_ready)
|
|
238
|
+
if self._prof_context.start_profile:
|
|
239
|
+
self.start()
|
|
83
240
|
|
|
84
241
|
def start(self) -> None:
|
|
85
242
|
"""
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
|
|
243
|
+
Turn on Profiler data collection. Profiler can be turned on by condition.
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
RuntimeError: If the profiler has already started.
|
|
247
|
+
RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
|
|
92
248
|
|
|
93
|
-
|
|
94
|
-
|
|
249
|
+
Examples:
|
|
250
|
+
>>> from mindspore.train import Callback
|
|
251
|
+
>>> from mindspore import Profiler
|
|
252
|
+
>>> class StopAtStep(Callback):
|
|
253
|
+
... def __init__(self, start_step, stop_step):
|
|
254
|
+
... super(StopAtStep, self).__init__()
|
|
255
|
+
... self.start_step = start_step
|
|
256
|
+
... self.stop_step = stop_step
|
|
257
|
+
... self.profiler = Profiler(start_profile=False)
|
|
258
|
+
...
|
|
259
|
+
... def step_begin(self, run_context):
|
|
260
|
+
... cb_params = run_context.original_args()
|
|
261
|
+
... step_num = cb_params.cur_step_num
|
|
262
|
+
... if step_num == self.start_step:
|
|
263
|
+
... self.profiler.start()
|
|
264
|
+
...
|
|
265
|
+
... def step_end(self, run_context):
|
|
266
|
+
... cb_params = run_context.original_args()
|
|
267
|
+
... step_num = cb_params.cur_step_num
|
|
268
|
+
... if step_num == self.stop_step:
|
|
269
|
+
... self.profiler.stop()
|
|
270
|
+
...
|
|
271
|
+
... def end(self, run_context):
|
|
272
|
+
... self.profiler.analyse()
|
|
273
|
+
"""
|
|
274
|
+
if self._has_started:
|
|
275
|
+
logger.warning("The profiler has already started. Do not turn on again in the open state.")
|
|
276
|
+
return
|
|
277
|
+
self._has_started = True
|
|
278
|
+
self.action_controller.transit_action(ProfilerAction.NONE, self.current_action)
|
|
279
|
+
if self._record_steps:
|
|
280
|
+
self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
|
|
281
|
+
self._step_rec_fn.start()
|
|
95
282
|
|
|
96
283
|
def stop(self) -> None:
|
|
97
284
|
"""
|
|
98
|
-
|
|
285
|
+
Turn off Profiler data collection. Profiler can be turned off by condition.
|
|
286
|
+
|
|
287
|
+
Raises:
|
|
288
|
+
RuntimeError: If the profiler has not started, this function is disabled.
|
|
289
|
+
|
|
290
|
+
Examples:
|
|
291
|
+
>>> from mindspore.train import Callback
|
|
292
|
+
>>> from mindspore import Profiler
|
|
293
|
+
>>> class StopAtEpoch(Callback):
|
|
294
|
+
... def __init__(self, start_epoch, stop_epoch):
|
|
295
|
+
... super(StopAtEpoch, self).__init__()
|
|
296
|
+
... self.start_epoch = start_epoch
|
|
297
|
+
... self.stop_epoch = stop_epoch
|
|
298
|
+
... self.profiler = Profiler(start_profile=False)
|
|
299
|
+
...
|
|
300
|
+
... def epoch_begin(self, run_context):
|
|
301
|
+
... cb_params = run_context.original_args()
|
|
302
|
+
... epoch_num = cb_params.cur_epoch_num
|
|
303
|
+
... if epoch_num == self.start_epoch:
|
|
304
|
+
... self.profiler.start()
|
|
305
|
+
...
|
|
306
|
+
... def epoch_end(self, run_context):
|
|
307
|
+
... cb_params = run_context.original_args()
|
|
308
|
+
... epoch_num = cb_params.cur_epoch_num
|
|
309
|
+
... if epoch_num == self.stop_epoch:
|
|
310
|
+
... self.profiler.stop()
|
|
311
|
+
...
|
|
312
|
+
... def end(self, run_context):
|
|
313
|
+
... self.profiler.analyse()
|
|
99
314
|
"""
|
|
100
|
-
if self.
|
|
101
|
-
|
|
315
|
+
if self._schedule_no_use_step:
|
|
316
|
+
logger.warning("The profiler has schedule. Please use step() to collect data.")
|
|
317
|
+
return
|
|
318
|
+
if not self._has_started:
|
|
319
|
+
logger.error("The profiler has not started. Do not turn off again in the closed state.")
|
|
320
|
+
return
|
|
321
|
+
self._has_started = False
|
|
322
|
+
if self._record_steps and self._step_rec_fn:
|
|
323
|
+
self._step_rec_fn.stop()
|
|
324
|
+
if self.schedule_arg:
|
|
325
|
+
self.action_controller.transit_action(self.current_action, None)
|
|
102
326
|
else:
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
self._cpu_profiler.stop()
|
|
107
|
-
self._device_profiler.stop()
|
|
327
|
+
ProfilerInterface.stop()
|
|
328
|
+
self._dump_metadata()
|
|
108
329
|
|
|
109
330
|
def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync") -> None:
|
|
110
331
|
"""
|
|
@@ -115,31 +336,61 @@ class NewProfiler:
|
|
|
115
336
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
116
337
|
for online mode. Default: ``None``.
|
|
117
338
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
118
|
-
step_list (list, optional): A list of steps that need to be analyzed
|
|
119
|
-
By default, all steps will be analyzed.
|
|
339
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
340
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
120
341
|
mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
|
|
121
342
|
|
|
122
343
|
- sync: analyse data in current process, it will block the current process.
|
|
123
|
-
- async: analyse data in subprocess, it will not the current process.Since the parsing process
|
|
344
|
+
- async: analyse data in subprocess, it will not block the current process. Since the parsing process
|
|
124
345
|
will take up extra CPU resources, please enable this mode according to the actual resource situation.
|
|
125
346
|
|
|
347
|
+
Examples:
|
|
348
|
+
>>> from mindspore.train import Callback
|
|
349
|
+
>>> from mindspore import Profiler
|
|
350
|
+
>>> class StopAtStep(Callback):
|
|
351
|
+
... def __init__(self, start_step=1, stop_step=5):
|
|
352
|
+
... super(StopAtStep, self).__init__()
|
|
353
|
+
... self.start_step = start_step
|
|
354
|
+
... self.stop_step = stop_step
|
|
355
|
+
... self.profiler = Profiler(start_profile=False)
|
|
356
|
+
...
|
|
357
|
+
... def step_begin(self, run_context):
|
|
358
|
+
... cb_params = run_context.original_args()
|
|
359
|
+
... step_num = cb_params.cur_step_num
|
|
360
|
+
... if step_num == self.start_step:
|
|
361
|
+
... self.profiler.start()
|
|
362
|
+
...
|
|
363
|
+
... def step_end(self, run_context):
|
|
364
|
+
... cb_params = run_context.original_args()
|
|
365
|
+
... step_num = cb_params.cur_step_num
|
|
366
|
+
... if step_num == self.stop_step:
|
|
367
|
+
... self.profiler.stop()
|
|
368
|
+
...
|
|
369
|
+
... def end(self, run_context):
|
|
370
|
+
... self.profiler.analyse(step_list=[2,3,4], mode="sync")
|
|
126
371
|
"""
|
|
127
|
-
self.
|
|
128
|
-
|
|
372
|
+
if self._has_started:
|
|
373
|
+
ProfilerInterface.stop()
|
|
374
|
+
self._has_started = False
|
|
129
375
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
376
|
+
if self.schedule_arg:
|
|
377
|
+
logger.warning("The profiler has schedule. Please use 'on_trace_ready' to analyse data.")
|
|
378
|
+
return
|
|
133
379
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
380
|
+
if offline_path:
|
|
381
|
+
logger.warning("The parameter 'offline_path' for Profiler.analyse() is deprecated, "
|
|
382
|
+
"please use Profiler.offline_analyse() instead.")
|
|
383
|
+
|
|
384
|
+
self._prof_context.pretty = pretty
|
|
385
|
+
self._prof_context.step_list = step_list
|
|
386
|
+
self._prof_context.mode = mode
|
|
387
|
+
|
|
388
|
+
ProfilerInterface.finalize()
|
|
389
|
+
ProfilerInterface.analyse()
|
|
390
|
+
ProfilerInterface.clear()
|
|
140
391
|
|
|
141
392
|
@classmethod
|
|
142
|
-
def offline_analyse(cls, path: str, pretty=False, step_list=None) -> None:
|
|
393
|
+
def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True) -> None:
|
|
143
394
|
"""
|
|
144
395
|
Analyze training performance data offline, which is invoked after performance data collection is completed.
|
|
145
396
|
|
|
@@ -147,7 +398,243 @@ class NewProfiler:
|
|
|
147
398
|
path (str): The profiling data path which need to be analyzed offline.
|
|
148
399
|
There needs to be a profiler directory in this path.
|
|
149
400
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
150
|
-
step_list (list, optional): A list of steps that need to be analyzed
|
|
151
|
-
By default, all steps will be analyzed.
|
|
401
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
402
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
403
|
+
data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
|
|
404
|
+
|
|
405
|
+
Examples:
|
|
406
|
+
>>> from mindspore import Profiler
|
|
407
|
+
>>> Profiler.offline_analyse("./profiling_path")
|
|
152
408
|
"""
|
|
153
|
-
|
|
409
|
+
real_path = PathManager.get_real_path(path)
|
|
410
|
+
PathManager.check_input_directory_path(real_path)
|
|
411
|
+
ascend_ms_path_list = PathManager.get_ascend_ms_path_list(real_path)
|
|
412
|
+
|
|
413
|
+
if not ascend_ms_path_list:
|
|
414
|
+
msg = (f"Invalid path: {real_path}. Expected a *_ascend_ms_* directory "
|
|
415
|
+
"or a parent directory of multiple *_ascend_ms_*")
|
|
416
|
+
logger.error(msg)
|
|
417
|
+
return
|
|
418
|
+
|
|
419
|
+
worker_number = min(os.cpu_count() // 2, len(ascend_ms_path_list))
|
|
420
|
+
with ProcessPoolExecutor(max_workers=worker_number) as executor:
|
|
421
|
+
futures = [
|
|
422
|
+
executor.submit(
|
|
423
|
+
NPUProfilerAnalysis.offline_analyse,
|
|
424
|
+
ascend_ms_path,
|
|
425
|
+
pretty,
|
|
426
|
+
step_list,
|
|
427
|
+
data_simplification
|
|
428
|
+
) for ascend_ms_path in ascend_ms_path_list
|
|
429
|
+
]
|
|
430
|
+
# 等待所有任务完成
|
|
431
|
+
for future in as_completed(futures):
|
|
432
|
+
try:
|
|
433
|
+
future.result()
|
|
434
|
+
except Exception as e: # pylint: disable=W0703
|
|
435
|
+
logger.error("offline analysis failed: %s", str(e))
|
|
436
|
+
|
|
437
|
+
def step(self) -> None:
|
|
438
|
+
"""
|
|
439
|
+
Used for Ascend, distinguish step collection and parsing performance data through schedule and on_trace_ready.
|
|
440
|
+
|
|
441
|
+
Raises:
|
|
442
|
+
RuntimeError: If the `start_profile` parameter is not set or the Profiler is not started.
|
|
443
|
+
RuntimeError: If the `schedule` parameter is not set.
|
|
444
|
+
|
|
445
|
+
Examples:
|
|
446
|
+
>>> import numpy as np
|
|
447
|
+
>>> import mindspore as ms
|
|
448
|
+
>>> import mindspore.dataset as ds
|
|
449
|
+
>>> from mindspore import context, nn, Profiler
|
|
450
|
+
>>> from mindspore.profiler import schedule, tensor_board_trace_handler
|
|
451
|
+
>>>
|
|
452
|
+
>>> class Net(nn.Cell):
|
|
453
|
+
... def __init__(self):
|
|
454
|
+
... super(Net, self).__init__()
|
|
455
|
+
... self.fc = nn.Dense(2, 2)
|
|
456
|
+
...
|
|
457
|
+
... def construct(self, x):
|
|
458
|
+
... return self.fc(x)
|
|
459
|
+
>>>
|
|
460
|
+
>>> def generator_net():
|
|
461
|
+
... for _ in range(2):
|
|
462
|
+
... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
|
|
463
|
+
>>>
|
|
464
|
+
>>> def train(test_net):
|
|
465
|
+
... optimizer = nn.Momentum(test_net.trainable_params(), 1, 0.9)
|
|
466
|
+
... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
467
|
+
... data = ds.GeneratorDataset(generator_net(), ["data", "label"])
|
|
468
|
+
... model = ms.train.Model(test_net, loss, optimizer)
|
|
469
|
+
... model.train(1, data)
|
|
470
|
+
>>>
|
|
471
|
+
>>> if __name__ == '__main__':
|
|
472
|
+
... context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend")
|
|
473
|
+
...
|
|
474
|
+
... net = Net()
|
|
475
|
+
... STEP_NUM = 15
|
|
476
|
+
...
|
|
477
|
+
... with Profiler(schedule=schedule(wait=1, warmup=1, active=2, repeat=1, skip_first=2),
|
|
478
|
+
... on_trace_ready=tensor_board_trace_handler) as prof:
|
|
479
|
+
... for i in range(STEP_NUM):
|
|
480
|
+
... train(net)
|
|
481
|
+
... prof.step()
|
|
482
|
+
"""
|
|
483
|
+
if self.schedule_arg is None:
|
|
484
|
+
logger.error("With no schedule in the Profiler, step takes no effect!")
|
|
485
|
+
return
|
|
486
|
+
if not self._has_started:
|
|
487
|
+
logger.error("Profiler is stopped, step takes no effect!")
|
|
488
|
+
return
|
|
489
|
+
if self._step_rec_fn:
|
|
490
|
+
self._step_rec_fn.stop()
|
|
491
|
+
prev_action = self.current_action
|
|
492
|
+
self.step_num += 1
|
|
493
|
+
self.current_action = self.schedule(self.step_num)
|
|
494
|
+
self.action_controller.transit_action(prev_action, self.current_action)
|
|
495
|
+
self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
|
|
496
|
+
self._step_rec_fn.start()
|
|
497
|
+
self._schedule_no_use_step = False
|
|
498
|
+
|
|
499
|
+
def add_metadata(self, key: str, value: str):
|
|
500
|
+
"""
|
|
501
|
+
Report custom metadata key-value pair data.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
key (str): The key to the metadata.
|
|
505
|
+
value (str): The value to the metadata.
|
|
506
|
+
|
|
507
|
+
Examples:
|
|
508
|
+
>>> from mindspore import Profiler
|
|
509
|
+
>>> # Profiler init.
|
|
510
|
+
>>> profiler = Profiler()
|
|
511
|
+
>>> # Call Profiler add_metadata
|
|
512
|
+
>>> profiler.add_metadata("test_key", "test_value")
|
|
513
|
+
>>> # Profiler end
|
|
514
|
+
>>> profiler.analyse()
|
|
515
|
+
"""
|
|
516
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
517
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
518
|
+
return
|
|
519
|
+
|
|
520
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
521
|
+
if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
|
|
522
|
+
if key in self._metadata:
|
|
523
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
524
|
+
self._metadata[key] = value
|
|
525
|
+
else:
|
|
526
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
527
|
+
|
|
528
|
+
def add_metadata_json(self, key: str, value: str):
|
|
529
|
+
"""
|
|
530
|
+
Report custom metadata key-value pair data with the value as a JSON string data.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
key (str): The key to the metadata.
|
|
534
|
+
value (str): The json str format value to the metadata.
|
|
535
|
+
|
|
536
|
+
Examples:
|
|
537
|
+
>>> import json
|
|
538
|
+
>>> from mindspore import Profiler
|
|
539
|
+
>>> # Profiler init.
|
|
540
|
+
>>> profiler = Profiler()
|
|
541
|
+
>>> # Call Profiler add_metadata_json
|
|
542
|
+
>>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
|
|
543
|
+
>>> # Profiler end, metadata will be saved in profiler_metadata.json
|
|
544
|
+
>>> profiler.analyse()
|
|
545
|
+
"""
|
|
546
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
547
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
548
|
+
return
|
|
549
|
+
|
|
550
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
551
|
+
if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
|
|
552
|
+
try:
|
|
553
|
+
if key in self._metadata:
|
|
554
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
555
|
+
self._metadata[key] = json.loads(value)
|
|
556
|
+
except ValueError:
|
|
557
|
+
logger.warning("The metadata value must be json format string. Skip this metadata")
|
|
558
|
+
else:
|
|
559
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
560
|
+
|
|
561
|
+
def op_analyse(self, op_name, device_id=None):
|
|
562
|
+
"""
|
|
563
|
+
Profiler users can use this interface to obtain operator performance data.
|
|
564
|
+
|
|
565
|
+
Args:
|
|
566
|
+
op_name (str or list): The primitive operator name to query.
|
|
567
|
+
device_id (int, optional): ID of the target device. This parameter is optional during network training or
|
|
568
|
+
inference, and users can use device_id parameter to specify which card operator performance data to
|
|
569
|
+
parse. If this interface is used for offline data parsing, the default value is ``None`` .
|
|
570
|
+
|
|
571
|
+
Raises:
|
|
572
|
+
TypeError: If the `op_name` parameter type is incorrect.
|
|
573
|
+
TypeError: If the `device_id` parameter type is incorrect.
|
|
574
|
+
RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
|
|
575
|
+
|
|
576
|
+
Supported Platforms:
|
|
577
|
+
``GPU`` ``CPU``
|
|
578
|
+
|
|
579
|
+
Examples:
|
|
580
|
+
>>> from mindspore import Profiler
|
|
581
|
+
>>> from mindspore import nn
|
|
582
|
+
>>> from mindspore import Model
|
|
583
|
+
>>> # Profiler init.
|
|
584
|
+
>>> profiler = Profiler()
|
|
585
|
+
>>> # Train Model or eval Model, taking LeNet5 as an example.
|
|
586
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
|
|
587
|
+
>>> net = LeNet5()
|
|
588
|
+
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
589
|
+
>>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
590
|
+
>>> # Create the dataset taking MNIST as an example.
|
|
591
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
|
|
592
|
+
>>> dataloader = create_dataset()
|
|
593
|
+
>>> model = Model(net, loss, optimizer)
|
|
594
|
+
>>> model.train(5, dataloader, dataset_sink_mode=False)
|
|
595
|
+
>>>
|
|
596
|
+
>>> # Profiler end
|
|
597
|
+
>>> profiler.analyse()
|
|
598
|
+
>>>
|
|
599
|
+
>>> profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
|
|
600
|
+
"""
|
|
601
|
+
if self._prof_context.device_target == DeviceTarget.NPU.value:
|
|
602
|
+
raise RuntimeError("The Interface 'Profiler.op_analyse()' is not supported on Ascend currently.")
|
|
603
|
+
|
|
604
|
+
if device_id and not isinstance(device_id, int):
|
|
605
|
+
raise TypeError(f"For 'Profiler.op_analyse()', the parameter device_id must be int, "
|
|
606
|
+
f"but got type {type(device_id)}")
|
|
607
|
+
|
|
608
|
+
if not isinstance(op_name, str) and not isinstance(op_name, list):
|
|
609
|
+
raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name must be str or list, "
|
|
610
|
+
f"but got type {type(op_name)}")
|
|
611
|
+
if not op_name:
|
|
612
|
+
raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name cannot be "", '' or [].")
|
|
613
|
+
|
|
614
|
+
from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser
|
|
615
|
+
dev_id = self._prof_context.device_id if device_id is None else device_id
|
|
616
|
+
parser = GpuFrameWorkParser(self._prof_context.framework_path, dev_id, op_name)
|
|
617
|
+
op_info = parser.parse()
|
|
618
|
+
return op_info
|
|
619
|
+
|
|
620
|
+
def _dump_metadata(self):
|
|
621
|
+
"""Dump metadata to file."""
|
|
622
|
+
if not self._metadata:
|
|
623
|
+
return
|
|
624
|
+
save_path = os.path.join(self._prof_context.ascend_ms_dir, "profiler_metadata.json")
|
|
625
|
+
FileManager.create_json_file(save_path, self._metadata, indent=4)
|
|
626
|
+
self._metadata.clear()
|
|
627
|
+
|
|
628
|
+
def __enter__(self) -> 'Profiler':
|
|
629
|
+
if not self._has_started:
|
|
630
|
+
self.start()
|
|
631
|
+
return self
|
|
632
|
+
|
|
633
|
+
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
|
634
|
+
if self._has_started:
|
|
635
|
+
self.stop()
|
|
636
|
+
|
|
637
|
+
def __del__(self):
|
|
638
|
+
if self._has_started:
|
|
639
|
+
self.stop()
|
|
640
|
+
logger.warning("Profiler is stopped at the end of the program.")
|