mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +3 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +50 -9
- mindspore/_extends/parse/compile_config.py +41 -0
- mindspore/_extends/parse/parser.py +9 -7
- mindspore/_extends/parse/standard_method.py +52 -14
- mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
- mindspore/amp.py +24 -10
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/__init__.py +6 -4
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_tensor.py +2 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/api.py +102 -87
- mindspore/common/dump.py +5 -6
- mindspore/common/generator.py +1 -7
- mindspore/common/hook_handle.py +14 -26
- mindspore/common/initializer.py +51 -15
- mindspore/common/mindir_util.py +2 -2
- mindspore/common/parameter.py +62 -15
- mindspore/common/recompute.py +39 -9
- mindspore/common/sparse_tensor.py +7 -3
- mindspore/common/tensor.py +183 -37
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +38 -3
- mindspore/communication/comm_func.py +315 -60
- mindspore/communication/management.py +14 -14
- mindspore/context.py +132 -22
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/__init__.py +1 -1
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +72 -44
- mindspore/dataset/engine/datasets_audio.py +7 -7
- mindspore/dataset/engine/datasets_standard_format.py +53 -3
- mindspore/dataset/engine/datasets_text.py +20 -20
- mindspore/dataset/engine/datasets_user_defined.py +174 -104
- mindspore/dataset/engine/datasets_vision.py +33 -33
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/queue.py +114 -60
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +34 -14
- mindspore/dataset/text/__init__.py +1 -4
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +1 -4
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/dataset/vision/validators.py +2 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/adamw.py +85 -0
- mindspore/experimental/optim/optimizer.py +3 -0
- mindspore/hal/__init__.py +3 -3
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/stream.py +18 -0
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/types.h +10 -10
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +2 -2
- mindspore/include/dataset/vision.h +4 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filewriter.py +68 -51
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +983 -46
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/mint/nn/__init__.py +268 -23
- mindspore/mint/nn/functional.py +125 -19
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/adamw.py +26 -13
- mindspore/mint/special/__init__.py +63 -0
- mindspore/multiprocessing/__init__.py +2 -1
- mindspore/nn/__init__.py +0 -1
- mindspore/nn/cell.py +276 -96
- mindspore/nn/layer/activation.py +211 -44
- mindspore/nn/layer/basic.py +137 -10
- mindspore/nn/layer/embedding.py +137 -2
- mindspore/nn/layer/normalization.py +101 -5
- mindspore/nn/layer/padding.py +34 -48
- mindspore/nn/layer/pooling.py +161 -7
- mindspore/nn/layer/transformer.py +3 -3
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +84 -6
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +124 -0
- mindspore/nn/wrap/cell_wrapper.py +12 -23
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/nn/wrap/loss_scale.py +17 -3
- mindspore/numpy/__init__.py +1 -1
- mindspore/numpy/array_creations.py +65 -68
- mindspore/numpy/array_ops.py +64 -60
- mindspore/numpy/fft.py +610 -75
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +85 -84
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -4
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
- mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
- mindspore/ops/_vmap/vmap_array_ops.py +2 -4
- mindspore/ops/_vmap/vmap_math_ops.py +17 -1
- mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
- mindspore/ops/auto_generate/gen_extend_func.py +767 -13
- mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
- mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
- mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
- mindspore/ops/composite/base.py +85 -48
- mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
- mindspore/ops/function/__init__.py +22 -0
- mindspore/ops/function/array_func.py +492 -153
- mindspore/ops/function/debug_func.py +113 -1
- mindspore/ops/function/fft_func.py +15 -2
- mindspore/ops/function/grad/grad_func.py +3 -2
- mindspore/ops/function/math_func.py +564 -207
- mindspore/ops/function/nn_func.py +817 -383
- mindspore/ops/function/other_func.py +3 -2
- mindspore/ops/function/random_func.py +402 -12
- mindspore/ops/function/reshard_func.py +13 -11
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +3 -2
- mindspore/ops/functional.py +24 -14
- mindspore/ops/op_info_register.py +3 -3
- mindspore/ops/operations/__init__.py +7 -2
- mindspore/ops/operations/_grad_ops.py +2 -76
- mindspore/ops/operations/_infer_ops.py +1 -1
- mindspore/ops/operations/_inner_ops.py +71 -94
- mindspore/ops/operations/array_ops.py +14 -146
- mindspore/ops/operations/comm_ops.py +63 -53
- mindspore/ops/operations/custom_ops.py +83 -19
- mindspore/ops/operations/debug_ops.py +42 -10
- mindspore/ops/operations/manually_defined/_inner.py +12 -0
- mindspore/ops/operations/manually_defined/ops_def.py +273 -20
- mindspore/ops/operations/math_ops.py +12 -223
- mindspore/ops/operations/nn_ops.py +20 -114
- mindspore/ops/operations/other_ops.py +7 -4
- mindspore/ops/operations/random_ops.py +46 -1
- mindspore/ops/primitive.py +18 -6
- mindspore/ops_generate/arg_dtype_cast.py +2 -0
- mindspore/ops_generate/gen_aclnn_implement.py +11 -11
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +67 -52
- mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
- mindspore/ops_generate/gen_pyboost_func.py +131 -47
- mindspore/ops_generate/op_proto.py +10 -3
- mindspore/ops_generate/pyboost_utils.py +14 -1
- mindspore/ops_generate/template.py +43 -21
- mindspore/parallel/__init__.py +3 -1
- mindspore/parallel/_auto_parallel_context.py +31 -9
- mindspore/parallel/_cell_wrapper.py +85 -0
- mindspore/parallel/_parallel_serialization.py +47 -19
- mindspore/parallel/_tensor.py +127 -13
- mindspore/parallel/_utils.py +53 -22
- mindspore/parallel/algo_parameter_config.py +5 -5
- mindspore/parallel/checkpoint_transform.py +46 -39
- mindspore/parallel/cluster/process_entity/__init__.py +1 -1
- mindspore/parallel/cluster/process_entity/_api.py +31 -23
- mindspore/parallel/cluster/process_entity/_utils.py +2 -27
- mindspore/parallel/parameter_broadcast.py +3 -4
- mindspore/parallel/shard.py +162 -31
- mindspore/parallel/transform_safetensors.py +1146 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/util.py +28 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +17 -19
- mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
- mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
- mindspore/profiler/parser/base_timeline_generator.py +19 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +1 -391
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/profiler_info.py +78 -6
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +285 -413
- mindspore/rewrite/__init__.py +1 -2
- mindspore/rewrite/common/namespace.py +4 -4
- mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
- mindspore/run_check/_check_version.py +39 -104
- mindspore/safeguard/rewrite_obfuscation.py +591 -247
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +4 -3
- mindspore/train/_utils.py +105 -19
- mindspore/train/amp.py +171 -53
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +97 -31
- mindspore/train/callback/_cluster_monitor.py +1 -1
- mindspore/train/callback/_flops_collector.py +1 -0
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +145 -31
- mindspore/train/callback/_summary_collector.py +5 -5
- mindspore/train/callback/_tft_register.py +375 -0
- mindspore/train/dataset_helper.py +15 -3
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/mind_ir_pb2.py +44 -39
- mindspore/train/model.py +154 -58
- mindspore/train/serialization.py +342 -128
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +260 -254
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +1 -1
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -283
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/extend/basic.py +0 -140
- mindspore/nn/extend/embedding.py +0 -143
- mindspore/nn/extend/layer/normalization.py +0 -109
- mindspore/nn/extend/pooling.py +0 -117
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/extend/__init__.py +0 -53
- mindspore/ops/extend/array_func.py +0 -218
- mindspore/ops/extend/math_func.py +0 -76
- mindspore/ops/extend/nn_func.py +0 -308
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -14,19 +14,16 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Profiling api file."""
|
|
16
16
|
import os
|
|
17
|
-
import re
|
|
18
|
-
import shutil
|
|
19
17
|
import stat
|
|
20
18
|
import time
|
|
21
19
|
import json
|
|
22
20
|
from json import JSONDecodeError
|
|
23
21
|
import glob
|
|
24
|
-
import subprocess
|
|
25
|
-
import csv
|
|
26
22
|
import socket
|
|
23
|
+
import multiprocessing
|
|
27
24
|
from enum import Enum
|
|
28
|
-
from multiprocessing import Process
|
|
29
25
|
from typing import List
|
|
26
|
+
from sys import getsizeof
|
|
30
27
|
import numpy as np
|
|
31
28
|
|
|
32
29
|
from mindspore import log as logger, context
|
|
@@ -34,7 +31,7 @@ from mindspore.context import get_auto_parallel_context
|
|
|
34
31
|
from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
|
|
35
32
|
import mindspore._c_expression as c_expression
|
|
36
33
|
import mindspore._c_dataengine as cde
|
|
37
|
-
from mindspore._c_expression import _framework_profiler_enable_mi
|
|
34
|
+
from mindspore._c_expression import _framework_profiler_enable_mi, _framework_profiler_disable_mi
|
|
38
35
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
|
39
36
|
ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
|
|
40
37
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
|
|
@@ -47,13 +44,11 @@ from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
|
47
44
|
from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
|
|
48
45
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
49
46
|
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
50
|
-
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
51
47
|
from mindspore.profiler.parser.minddata_parser import MinddataParser
|
|
52
48
|
from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
|
|
53
49
|
from mindspore.profiler.parser.minddata_pipeline_parser import \
|
|
54
50
|
MinddataPipelineParser
|
|
55
|
-
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
|
|
56
|
-
from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
51
|
+
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
|
|
57
52
|
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
58
53
|
from mindspore.common.api import _pynative_executor
|
|
59
54
|
from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
|
|
@@ -67,6 +62,11 @@ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
|
67
62
|
from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
|
|
68
63
|
from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
|
|
69
64
|
from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
|
|
65
|
+
from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
|
|
66
|
+
from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
|
|
67
|
+
from mindspore.profiler.parser.ascend_analysis.constant import Constant
|
|
68
|
+
from mindspore.profiler.common.util import timeit
|
|
69
|
+
|
|
70
70
|
|
|
71
71
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
72
72
|
|
|
@@ -105,7 +105,7 @@ class DeviceSupportParam(Enum):
|
|
|
105
105
|
ASCEND = [
|
|
106
106
|
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
107
107
|
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
|
|
108
|
-
'ascend_job_id', 'profile_framework', '
|
|
108
|
+
'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
|
|
109
109
|
]
|
|
110
110
|
|
|
111
111
|
|
|
@@ -114,7 +114,6 @@ ALWAYS_VALID_PARAM = [
|
|
|
114
114
|
'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
|
|
115
115
|
]
|
|
116
116
|
|
|
117
|
-
|
|
118
117
|
ANALYSIS_ASYNC_MODE = 'async'
|
|
119
118
|
ANALYSIS_SYNC_MODE = 'sync'
|
|
120
119
|
DEFAULT_MODEL_ID = 4294967295
|
|
@@ -164,147 +163,6 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
|
|
|
164
163
|
logger.warning("Can not map the start time for item: %s.", row)
|
|
165
164
|
|
|
166
165
|
|
|
167
|
-
def _calculate_dataset_execution_time(input_file, output_file):
|
|
168
|
-
r"""
|
|
169
|
-
Parse the host info into timeline file, so as to show on UI.
|
|
170
|
-
|
|
171
|
-
Args:
|
|
172
|
-
input_file: the original host_info file, in csv format.
|
|
173
|
-
output_file: the output file, in csv format.
|
|
174
|
-
"""
|
|
175
|
-
input_file = validate_and_normalize_path(input_file)
|
|
176
|
-
# execution_time_map is used to store the ExecutionCalculator for each stage.
|
|
177
|
-
execution_time_map = {}
|
|
178
|
-
# ts_map is used to store the start time of each event_stage_tid_pid.
|
|
179
|
-
ts_map = {}
|
|
180
|
-
with open(input_file, 'r') as f:
|
|
181
|
-
for row in csv.DictReader(f):
|
|
182
|
-
try:
|
|
183
|
-
module_name = row['module_name']
|
|
184
|
-
if module_name != 'Dataset':
|
|
185
|
-
continue
|
|
186
|
-
_calculate_dataset_item(row, execution_time_map, ts_map)
|
|
187
|
-
except KeyError as e:
|
|
188
|
-
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
189
|
-
continue
|
|
190
|
-
if ts_map:
|
|
191
|
-
logger.warning("Only start time is record for these items:")
|
|
192
|
-
for k, v in ts_map.items():
|
|
193
|
-
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
194
|
-
output_file = validate_and_normalize_path(output_file)
|
|
195
|
-
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
|
|
196
|
-
modes = stat.S_IWUSR | stat.S_IRUSR
|
|
197
|
-
with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
|
|
198
|
-
csv_writer = csv.writer(f)
|
|
199
|
-
csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
|
|
200
|
-
for _, v in execution_time_map.items():
|
|
201
|
-
csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
|
|
202
|
-
os.chmod(output_file, modes)
|
|
203
|
-
logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _extract_timeline_item(row, time_line, ts_map):
|
|
207
|
-
"""Process one row, try to extract a timeline item."""
|
|
208
|
-
start_end = row['start_end']
|
|
209
|
-
event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
|
|
210
|
-
# map start and end, put the mapped event into timeline.
|
|
211
|
-
if start_end == '1' and event_stage_tid_pid in ts_map:
|
|
212
|
-
title = row['event'] + '::' + row['stage']
|
|
213
|
-
event = {'name': title, 'cat': row['module_name']}
|
|
214
|
-
ts_end = int(row['time_stamp(us)'])
|
|
215
|
-
ts = ts_map[event_stage_tid_pid]
|
|
216
|
-
event['ts'] = ts
|
|
217
|
-
event['dur'] = ts_end - ts
|
|
218
|
-
event['ph'] = 'X'
|
|
219
|
-
event['pid'] = row['pid']
|
|
220
|
-
event['tid'] = row['tid']
|
|
221
|
-
event['args'] = {'parent_pid': row['parent_pid']}
|
|
222
|
-
time_line.append(event)
|
|
223
|
-
del ts_map[event_stage_tid_pid]
|
|
224
|
-
elif start_end == '0':
|
|
225
|
-
ts = int(row['time_stamp(us)'])
|
|
226
|
-
ts_map[event_stage_tid_pid] = ts
|
|
227
|
-
# Put the instance event into timeline.
|
|
228
|
-
elif start_end == '2':
|
|
229
|
-
title = row['event'] + '::' + row['stage']
|
|
230
|
-
event = {
|
|
231
|
-
'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
|
|
232
|
-
'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
|
|
233
|
-
}
|
|
234
|
-
time_line.append(event)
|
|
235
|
-
else:
|
|
236
|
-
logger.warning("Can not map the start time for item: %s.", row)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
|
|
240
|
-
r"""
|
|
241
|
-
Parse the host info into timeline file, so as to show on UI.
|
|
242
|
-
|
|
243
|
-
Args:
|
|
244
|
-
input_file: the original host_info file, in csv format.
|
|
245
|
-
output_timeline_file: the output timeline file, in json format.
|
|
246
|
-
output_memory_file: the output memory_usage file, in csv format.
|
|
247
|
-
is_develop_user: some data only shown to develop users, other users no need to analyse it.
|
|
248
|
-
"""
|
|
249
|
-
input_file = validate_and_normalize_path(input_file)
|
|
250
|
-
time_line = []
|
|
251
|
-
# ts_map is used to store the start time of each event_stage_tid_pid
|
|
252
|
-
ts_map = {}
|
|
253
|
-
memory_header = [
|
|
254
|
-
'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
|
|
255
|
-
'memory_usage(kB)', 'time_stamp(us)'
|
|
256
|
-
]
|
|
257
|
-
memory_info = []
|
|
258
|
-
with open(input_file, 'r') as f:
|
|
259
|
-
for row in csv.DictReader(f):
|
|
260
|
-
try:
|
|
261
|
-
level = row['level']
|
|
262
|
-
if level == '0' and not is_develop_user:
|
|
263
|
-
continue
|
|
264
|
-
if int(row['time_stamp(us)']) > 0:
|
|
265
|
-
_extract_timeline_item(row, time_line, ts_map)
|
|
266
|
-
if int(row['memory_usage(kB)']) > 0:
|
|
267
|
-
memory_info.append(row)
|
|
268
|
-
except KeyError as e:
|
|
269
|
-
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
270
|
-
continue
|
|
271
|
-
if memory_info:
|
|
272
|
-
with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
|
|
273
|
-
csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
|
|
274
|
-
csv_writer.writeheader()
|
|
275
|
-
for item in memory_info:
|
|
276
|
-
csv_writer.writerow(item)
|
|
277
|
-
os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
|
|
278
|
-
else:
|
|
279
|
-
logger.warning("No memory_usage is record in file: %s", input_file)
|
|
280
|
-
|
|
281
|
-
if ts_map:
|
|
282
|
-
logger.warning("Only start time is record for these items:")
|
|
283
|
-
for k, v in ts_map.items():
|
|
284
|
-
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
285
|
-
last_dash = k.rfind('_')
|
|
286
|
-
if last_dash == -1:
|
|
287
|
-
logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
|
|
288
|
-
continue
|
|
289
|
-
second_last_dash = k.rfind('_', 0, last_dash - 1)
|
|
290
|
-
if second_last_dash == -1:
|
|
291
|
-
logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
|
|
292
|
-
continue
|
|
293
|
-
pid = k[last_dash + 1:]
|
|
294
|
-
tid = k[second_last_dash + 1: last_dash]
|
|
295
|
-
title = k[:second_last_dash]
|
|
296
|
-
unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
|
|
297
|
-
time_line.append(unfinished_timeline)
|
|
298
|
-
|
|
299
|
-
if time_line:
|
|
300
|
-
timeline_file = validate_and_normalize_path(output_timeline_file)
|
|
301
|
-
with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
302
|
-
json.dump(time_line, json_file)
|
|
303
|
-
os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
|
|
304
|
-
else:
|
|
305
|
-
logger.warning("No valid time_stamp is record in file: %s", input_file)
|
|
306
|
-
|
|
307
|
-
|
|
308
166
|
def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
|
|
309
167
|
"""Executing the msprof export mode."""
|
|
310
168
|
try:
|
|
@@ -351,20 +209,21 @@ class Profiler:
|
|
|
351
209
|
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
352
210
|
profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
|
|
353
211
|
|
|
354
|
-
-
|
|
212
|
+
- ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
|
|
355
213
|
time of the computational operators on the NPU and communication large operator information.
|
|
356
|
-
-
|
|
357
|
-
mini operator information based on Level0.
|
|
358
|
-
-
|
|
214
|
+
- ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
|
|
215
|
+
communication mini operator information based on Level0.
|
|
216
|
+
- ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
|
|
359
217
|
|
|
360
218
|
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
|
|
361
219
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
362
220
|
a multi devices training,collect when True. Setting this parameter has no effect during single card
|
|
363
221
|
training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
|
|
364
222
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
365
|
-
When using this parameter, `op_time` must be set to True.
|
|
223
|
+
When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
|
|
224
|
+
compilation level is O2 requires collecting from the first step. Default: ``False`` .
|
|
366
225
|
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
367
|
-
Default value: ``
|
|
226
|
+
Default value: ``False`` .
|
|
368
227
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
369
228
|
data collection based on conditions. Default: ``True`` .
|
|
370
229
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
@@ -380,11 +239,12 @@ class Profiler:
|
|
|
380
239
|
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
381
240
|
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
382
241
|
- 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
|
|
242
|
+
This function only support Atlas A2 training series products.
|
|
383
243
|
|
|
384
244
|
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
385
245
|
Default: ``False`` .
|
|
386
|
-
hbm_ddr (bool, optional): (Ascend only) Whether to collect
|
|
387
|
-
Default: ``False`` .
|
|
246
|
+
hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
|
|
247
|
+
collect when True. Default: ``False`` .
|
|
388
248
|
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
389
249
|
Default: ``False`` .
|
|
390
250
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
@@ -396,25 +256,32 @@ class Profiler:
|
|
|
396
256
|
- False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
|
|
397
257
|
This method can reduce the impact of adding profiler on overall training time.
|
|
398
258
|
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
399
|
-
Default value: ``
|
|
259
|
+
Default value: ``False`` .
|
|
400
260
|
timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
|
|
401
261
|
When using this parameter, `op_time` must be set to True. Default value: ``500`` .
|
|
402
262
|
profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
|
|
403
|
-
["all", "time",
|
|
404
|
-
|
|
405
|
-
Default:
|
|
263
|
+
["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
|
|
264
|
+
parameter, the op_time parameter must be enabled.
|
|
265
|
+
Default: None.
|
|
406
266
|
|
|
407
|
-
- "all": Record
|
|
408
|
-
- "time":
|
|
409
|
-
- "memory": Only record host memory usage.
|
|
267
|
+
- "all": Record host timestamp.
|
|
268
|
+
- "time": The same as "all".
|
|
410
269
|
- None: Not record host information.
|
|
411
270
|
data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
|
|
412
271
|
If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
|
|
413
272
|
directory are retained to save disk space.
|
|
414
273
|
Default value: ``True`` .
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
274
|
+
with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
|
|
275
|
+
data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
|
|
276
|
+
profile_framework parameters must be enabled. Default value: ``False`` .
|
|
277
|
+
analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
|
|
278
|
+
data. This parameter is experimental parameter and does not need to be set by the user.
|
|
279
|
+
Default value: ``False`` .
|
|
280
|
+
rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
|
|
281
|
+
experimental parameter and does not need to be set by the user. Default value: ``0`` .
|
|
282
|
+
env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
|
|
283
|
+
This parameter is experimental parameter and does not need to be set by the user.
|
|
284
|
+
Default value: ``False`` .
|
|
418
285
|
Raises:
|
|
419
286
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
420
287
|
MindSpore cannot parse the generated ascend_job_id directory structure.
|
|
@@ -428,6 +295,7 @@ class Profiler:
|
|
|
428
295
|
>>> from mindspore import nn
|
|
429
296
|
>>> import mindspore.dataset as ds
|
|
430
297
|
>>> from mindspore import Profiler
|
|
298
|
+
>>> from mindspore.profiler import ProfilerLevel
|
|
431
299
|
>>>
|
|
432
300
|
>>> class Net(nn.Cell):
|
|
433
301
|
... def __init__(self):
|
|
@@ -453,7 +321,7 @@ class Profiler:
|
|
|
453
321
|
...
|
|
454
322
|
... # Init Profiler
|
|
455
323
|
... # Note that the Profiler should be initialized before model.train
|
|
456
|
-
... profiler = Profiler()
|
|
324
|
+
... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
|
|
457
325
|
...
|
|
458
326
|
... # Train Model
|
|
459
327
|
... net = Net()
|
|
@@ -462,11 +330,6 @@ class Profiler:
|
|
|
462
330
|
... # Profiler end
|
|
463
331
|
... profiler.analyse()
|
|
464
332
|
"""
|
|
465
|
-
|
|
466
|
-
_hwts_output_filename_target = "output_format_data_hwts_"
|
|
467
|
-
_opcompute_output_filename_target = "output_op_compute_time_"
|
|
468
|
-
_aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
|
|
469
|
-
_has_analysed = False
|
|
470
333
|
_has_initialized = False
|
|
471
334
|
_ascend_profiling_options = ""
|
|
472
335
|
_ascend_job_id = ""
|
|
@@ -492,6 +355,9 @@ class Profiler:
|
|
|
492
355
|
self._rank_size = 1
|
|
493
356
|
self._rank_id = 0
|
|
494
357
|
self._ascend_profiler = None
|
|
358
|
+
self.metadata = {}
|
|
359
|
+
self.max_str_len = 4096
|
|
360
|
+
self.max_meta_size = 50 * 1024
|
|
495
361
|
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
496
362
|
self._parallel_strategy = True
|
|
497
363
|
self._model_iteration_dict = None
|
|
@@ -512,13 +378,13 @@ class Profiler:
|
|
|
512
378
|
self._sync_enable = True
|
|
513
379
|
self._stop_time = 0
|
|
514
380
|
self._dynamic_status = False
|
|
515
|
-
self._profile_framework =
|
|
381
|
+
self._profile_framework = None
|
|
516
382
|
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
517
383
|
self.profiler_level = None
|
|
518
384
|
self._pretty_json = False
|
|
519
385
|
self._analyse_only = kwargs.get("analyse_only", False)
|
|
520
386
|
self._data_simplification = kwargs.get("data_simplification", True)
|
|
521
|
-
self.
|
|
387
|
+
self._with_stack = False
|
|
522
388
|
if self._msprof_enable:
|
|
523
389
|
return
|
|
524
390
|
self._start_time = int(time.time() * 1e6) # us
|
|
@@ -540,20 +406,6 @@ class Profiler:
|
|
|
540
406
|
if self.start_profile:
|
|
541
407
|
self.start()
|
|
542
408
|
|
|
543
|
-
@staticmethod
|
|
544
|
-
def _get_prof_rank(prof_path: str):
|
|
545
|
-
"""get rank id."""
|
|
546
|
-
sub_dirs = os.listdir(os.path.realpath(prof_path))
|
|
547
|
-
info_json_path = ""
|
|
548
|
-
for sub_dir in sub_dirs:
|
|
549
|
-
if sub_dir.startswith("device_"):
|
|
550
|
-
device_id = sub_dir.split("_")[-1]
|
|
551
|
-
info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
|
|
552
|
-
if not os.path.exists(info_json_path):
|
|
553
|
-
return -1
|
|
554
|
-
rank_id, _ = Profiler._parse_info_json(info_json_path)
|
|
555
|
-
return rank_id
|
|
556
|
-
|
|
557
409
|
@staticmethod
|
|
558
410
|
def _check_output_path(output_path):
|
|
559
411
|
"""Checking path validity."""
|
|
@@ -602,30 +454,8 @@ class Profiler:
|
|
|
602
454
|
logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
|
|
603
455
|
return None
|
|
604
456
|
|
|
605
|
-
@staticmethod
|
|
606
|
-
def _parse_info_json(info_file):
|
|
607
|
-
"""
|
|
608
|
-
Parse info log file, get the rank id and device id of the job.
|
|
609
|
-
Args:
|
|
610
|
-
input_file (str): The file path of the parse info log file.
|
|
611
|
-
|
|
612
|
-
Returns:
|
|
613
|
-
rank id, device id
|
|
614
|
-
"""
|
|
615
|
-
with open(info_file, "r") as f:
|
|
616
|
-
info_dict = json.load(f)
|
|
617
|
-
|
|
618
|
-
rank_id = info_dict.get("rank_id", 0)
|
|
619
|
-
dev_info = info_dict.get("DeviceInfo", [])
|
|
620
|
-
dev_id = dev_info[0].get("id", -1)
|
|
621
|
-
|
|
622
|
-
if int(rank_id) < 0:
|
|
623
|
-
rank_id = 0
|
|
624
|
-
|
|
625
|
-
return str(rank_id), str(dev_id)
|
|
626
|
-
|
|
627
457
|
@classmethod
|
|
628
|
-
def offline_analyse(cls, path: str, pretty=False, step_list=None):
|
|
458
|
+
def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
|
|
629
459
|
"""
|
|
630
460
|
Analyze training performance data offline, which is invoked after performance data collection is completed.
|
|
631
461
|
|
|
@@ -633,37 +463,50 @@ class Profiler:
|
|
|
633
463
|
path (str): The profiling data path which need to be analyzed offline.
|
|
634
464
|
There needs to be a profiler directory in this path.
|
|
635
465
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
636
|
-
step_list (list, optional): A list of steps that need to be analyzed
|
|
637
|
-
By default, all steps will be analyzed.
|
|
466
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
467
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
468
|
+
data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
|
|
638
469
|
|
|
639
470
|
Examples:
|
|
640
471
|
>>> from mindspore import Profiler
|
|
641
472
|
>>> Profiler.offline_analyse("./profiling_path")
|
|
642
473
|
"""
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
474
|
+
real_path = os.path.realpath(path)
|
|
475
|
+
PathManager.check_input_directory_path(real_path)
|
|
476
|
+
profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
|
|
477
|
+
if not isinstance(data_simplification, bool):
|
|
478
|
+
logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
|
|
479
|
+
f"but got type {type(data_simplification)}, it will be set to True.")
|
|
480
|
+
data_simplification = True
|
|
481
|
+
if not profiler_parent_path_list:
|
|
482
|
+
raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
|
|
483
|
+
f'single-device profiler data, or multiple subdirectories each containing '
|
|
484
|
+
f'a "profiler" directory for multi-device profiler data. ')
|
|
485
|
+
# get rank id
|
|
486
|
+
rank_list = []
|
|
487
|
+
for parent_path in profiler_parent_path_list:
|
|
488
|
+
profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
|
|
489
|
+
rank_id = ProfilerInfo.get_rank_id(profiler_path)
|
|
490
|
+
if int(rank_id) < 0:
|
|
491
|
+
logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
|
|
492
|
+
rank_list.append(rank_id)
|
|
493
|
+
# start offline analyse
|
|
494
|
+
if len(profiler_parent_path_list) == 1:
|
|
495
|
+
PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
|
|
496
|
+
profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
|
|
497
|
+
profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
|
|
498
|
+
else:
|
|
499
|
+
# Multiprocess Parsing
|
|
500
|
+
multiprocessing.set_start_method("fork", force=True)
|
|
501
|
+
process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
|
|
502
|
+
pool = multiprocessing.Pool(processes=process_number)
|
|
503
|
+
for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
|
|
504
|
+
PathManager.check_directory_path_writeable(profiler_parent_path)
|
|
505
|
+
profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
|
|
506
|
+
data_simplification=data_simplification)
|
|
507
|
+
pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
|
|
508
|
+
pool.close()
|
|
509
|
+
pool.join()
|
|
667
510
|
|
|
668
511
|
def op_analyse(self, op_name, device_id=None):
|
|
669
512
|
"""
|
|
@@ -739,14 +582,38 @@ class Profiler:
|
|
|
739
582
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
740
583
|
for online mode. Default: ``None``.
|
|
741
584
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
742
|
-
step_list (list, optional): A list of steps that need to be analyzed
|
|
743
|
-
By default, all steps will be analyzed.
|
|
585
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
586
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
744
587
|
mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
|
|
745
588
|
|
|
746
589
|
- sync: analyse data in current process, it will block the current process.
|
|
747
|
-
- async: analyse data in subprocess, it will not the current process.Since the parsing process
|
|
590
|
+
- async: analyse data in subprocess, it will not block the current process. Since the parsing process
|
|
748
591
|
will take up extra CPU resources, please enable this mode according to the actual resource situation.
|
|
749
592
|
|
|
593
|
+
Examples:
|
|
594
|
+
>>> from mindspore.train import Callback
|
|
595
|
+
>>> from mindspore import Profiler
|
|
596
|
+
>>> class StopAtStep(Callback):
|
|
597
|
+
... def __init__(self, start_step=1, stop_step=5):
|
|
598
|
+
... super(StopAtStep, self).__init__()
|
|
599
|
+
... self.start_step = start_step
|
|
600
|
+
... self.stop_step = stop_step
|
|
601
|
+
... self.profiler = Profiler(start_profile=False)
|
|
602
|
+
...
|
|
603
|
+
... def step_begin(self, run_context):
|
|
604
|
+
... cb_params = run_context.original_args()
|
|
605
|
+
... step_num = cb_params.cur_step_num
|
|
606
|
+
... if step_num == self.start_step:
|
|
607
|
+
... self.profiler.start()
|
|
608
|
+
...
|
|
609
|
+
... def step_end(self, run_context):
|
|
610
|
+
... cb_params = run_context.original_args()
|
|
611
|
+
... step_num = cb_params.cur_step_num
|
|
612
|
+
... if step_num == self.stop_step:
|
|
613
|
+
... self.profiler.stop()
|
|
614
|
+
...
|
|
615
|
+
... def end(self, run_context):
|
|
616
|
+
... self.profiler.analyse(step_list=[2,3,4], mode="sync")
|
|
750
617
|
"""
|
|
751
618
|
try:
|
|
752
619
|
if isinstance(pretty, bool):
|
|
@@ -793,11 +660,12 @@ class Profiler:
|
|
|
793
660
|
|
|
794
661
|
ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
|
|
795
662
|
if offline_path:
|
|
663
|
+
# Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
|
|
664
|
+
ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
|
|
796
665
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
797
666
|
self._ascend_graph_analyse(offline_path=offline_path)
|
|
798
667
|
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
799
668
|
ProfilerInfo.save(self._output_path)
|
|
800
|
-
_offline_parse(offline_path)
|
|
801
669
|
return
|
|
802
670
|
if self._msprof_enable:
|
|
803
671
|
return
|
|
@@ -817,18 +685,16 @@ class Profiler:
|
|
|
817
685
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
818
686
|
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
|
819
687
|
self._cpu_analyse()
|
|
688
|
+
if self._profile_framework:
|
|
689
|
+
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
|
|
690
|
+
"data.")
|
|
820
691
|
|
|
821
692
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
822
693
|
self._gpu_analyse()
|
|
823
694
|
|
|
824
695
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
825
696
|
self._ascend_analyse()
|
|
826
|
-
|
|
827
|
-
if self._device_target != DeviceTarget.CPU.value:
|
|
828
|
-
self._host_info_analyse()
|
|
829
|
-
else:
|
|
830
|
-
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
|
|
831
|
-
" directory in the output path.")
|
|
697
|
+
|
|
832
698
|
logger.info("Profiling: all the data have been analyzed.")
|
|
833
699
|
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
834
700
|
ProfilerInfo.save(self._output_path)
|
|
@@ -895,8 +761,13 @@ class Profiler:
|
|
|
895
761
|
self._ascend_graph_start()
|
|
896
762
|
ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
897
763
|
ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
|
|
898
|
-
ProfilerInfo.set_system_time(int(c_expression.get_clock_time()
|
|
899
|
-
|
|
764
|
+
ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
|
|
765
|
+
if context.get_context("mode") == context.GRAPH_MODE:
|
|
766
|
+
jit_config = context.get_jit_config()
|
|
767
|
+
jit_level = jit_config.get("jit_level", "")
|
|
768
|
+
ProfilerInfo.set_jit_level(jit_level)
|
|
769
|
+
if self._profile_framework:
|
|
770
|
+
_framework_profiler_enable_mi()
|
|
900
771
|
|
|
901
772
|
def stop(self):
|
|
902
773
|
"""
|
|
@@ -953,12 +824,96 @@ class Profiler:
|
|
|
953
824
|
self._ascend_profiler.stop()
|
|
954
825
|
|
|
955
826
|
self._stop_time = int(time.time() * 10000000)
|
|
827
|
+
|
|
828
|
+
if self._profile_framework:
|
|
829
|
+
_framework_profiler_disable_mi()
|
|
830
|
+
|
|
956
831
|
ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
957
832
|
self._init_profiler_info()
|
|
958
833
|
ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
|
|
959
834
|
ProfilerInfo.save(self._output_path)
|
|
835
|
+
self._dump_metadata()
|
|
960
836
|
logger.info("Profiling: stop time: %d", self._stop_time)
|
|
961
837
|
|
|
838
|
+
def add_metadata(self, key: str, value: str):
|
|
839
|
+
"""
|
|
840
|
+
Report custom metadata key-value pair data.
|
|
841
|
+
|
|
842
|
+
Args:
|
|
843
|
+
key (str): The key to the metadata.
|
|
844
|
+
value (str): The value to the metadata.
|
|
845
|
+
|
|
846
|
+
Examples:
|
|
847
|
+
>>> from mindspore import Profiler
|
|
848
|
+
>>> # Profiler init.
|
|
849
|
+
>>> profiler = Profiler()
|
|
850
|
+
>>> # Call Profiler add_metadata
|
|
851
|
+
>>> profiler.add_metadata("test_key", "test_value")
|
|
852
|
+
>>> # Profiler end
|
|
853
|
+
>>> profiler.analyse()
|
|
854
|
+
"""
|
|
855
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
856
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
857
|
+
return
|
|
858
|
+
if not self._check_str_valid(key) or not self._check_str_valid(value):
|
|
859
|
+
logger.warning("Invalid input key or value. Skip this metadata.")
|
|
860
|
+
return
|
|
861
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
862
|
+
if getsizeof(self.metadata) + add_size < self.max_meta_size:
|
|
863
|
+
if key in self.metadata:
|
|
864
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
865
|
+
self.metadata[key] = value
|
|
866
|
+
else:
|
|
867
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
868
|
+
|
|
869
|
+
def add_metadata_json(self, key: str, value: str):
|
|
870
|
+
"""
|
|
871
|
+
Report custom metadata key-value pair data with the value as a JSON string data.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
key (str): The key to the metadata.
|
|
875
|
+
value (str): The json str format value to the metadata.
|
|
876
|
+
|
|
877
|
+
Examples:
|
|
878
|
+
>>> import json
|
|
879
|
+
>>> from mindspore import Profiler
|
|
880
|
+
>>> # Profiler init.
|
|
881
|
+
>>> profiler = Profiler()
|
|
882
|
+
>>> # Call Profiler add_metadata_json
|
|
883
|
+
>>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
|
|
884
|
+
>>> # Profiler end, metadata will be saved in profiler_metadata.json
|
|
885
|
+
>>> profiler.analyse()
|
|
886
|
+
"""
|
|
887
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
888
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
889
|
+
return
|
|
890
|
+
if not self._check_str_valid(key) or not self._check_str_valid(value):
|
|
891
|
+
logger.warning("Invalid input key or value. Skip this metadata.")
|
|
892
|
+
return
|
|
893
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
894
|
+
if getsizeof(self.metadata) + add_size < self.max_meta_size:
|
|
895
|
+
try:
|
|
896
|
+
if key in self.metadata:
|
|
897
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
898
|
+
self.metadata[key] = json.loads(value)
|
|
899
|
+
except ValueError:
|
|
900
|
+
logger.warning("The metadata value must be json format string. Skip this metadata")
|
|
901
|
+
else:
|
|
902
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
903
|
+
|
|
904
|
+
def _dump_metadata(self):
|
|
905
|
+
"""Dump metadata to file."""
|
|
906
|
+
if not self.metadata:
|
|
907
|
+
return
|
|
908
|
+
FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
|
|
909
|
+
self.metadata.clear()
|
|
910
|
+
|
|
911
|
+
def _check_str_valid(self, input_str: str):
|
|
912
|
+
"""Check str length"""
|
|
913
|
+
if len(input_str) > self.max_str_len:
|
|
914
|
+
return False
|
|
915
|
+
return True
|
|
916
|
+
|
|
962
917
|
def _set_ascend_job_id(self, ascend_job_id):
|
|
963
918
|
"""Set output_path for offline parsing performance data."""
|
|
964
919
|
if not ascend_job_id:
|
|
@@ -983,7 +938,7 @@ class Profiler:
|
|
|
983
938
|
self._profile_communication = options.get('profile_communication')
|
|
984
939
|
self._op_time = options.get('op_time')
|
|
985
940
|
self._device_target = context.get_context("device_target").lower()
|
|
986
|
-
self._profile_framework = options.get('profile_framework',
|
|
941
|
+
self._profile_framework = options.get('profile_framework', None)
|
|
987
942
|
self._profiler_manager = c_expression.ProfilerManager.get_instance()
|
|
988
943
|
self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
|
|
989
944
|
if self._data_process:
|
|
@@ -1034,32 +989,32 @@ class Profiler:
|
|
|
1034
989
|
|
|
1035
990
|
def _gpu_profiler_init(self, kwargs):
|
|
1036
991
|
"""Gpu profiler init."""
|
|
992
|
+
self._parse_parameter_for_gpu(kwargs)
|
|
1037
993
|
# Setup and start MindData Profiling
|
|
1038
994
|
if self._data_process:
|
|
1039
995
|
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
1040
996
|
self._md_profiler.init()
|
|
1041
|
-
self._parse_parameter_for_gpu(kwargs)
|
|
1042
997
|
|
|
1043
998
|
gpu_profiler = c_expression.Profiler
|
|
1044
999
|
self._gpu_profiler = gpu_profiler.get_instance("GPU")
|
|
1045
|
-
self._gpu_profiler.init(self._output_path)
|
|
1046
|
-
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
1047
1000
|
if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
|
|
1048
1001
|
self._dev_id = str(get_rank())
|
|
1049
1002
|
os.environ['DEVICE_ID'] = self._dev_id
|
|
1050
1003
|
self._rank_id = self._dev_id
|
|
1004
|
+
self._gpu_profiler.init(self._output_path, int(self._rank_id))
|
|
1005
|
+
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
1051
1006
|
|
|
1052
1007
|
def _ascend_profiler_init(self, kwargs):
|
|
1053
1008
|
"""Ascend profiler init."""
|
|
1009
|
+
self._parse_parameter_for_ascend(kwargs)
|
|
1054
1010
|
# Setup and start MindData Profiling
|
|
1055
1011
|
if self._data_process:
|
|
1056
1012
|
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
1057
1013
|
self._md_profiler.init()
|
|
1058
1014
|
self._init_time = int(time.time() * 10000000)
|
|
1059
1015
|
logger.info("Profiling: profiling init time: %d", self._init_time)
|
|
1060
|
-
self._parse_parameter_for_ascend(kwargs)
|
|
1061
|
-
os.environ['DEVICE_ID'] = self._dev_id
|
|
1062
1016
|
|
|
1017
|
+
os.environ['DEVICE_ID'] = self._dev_id
|
|
1063
1018
|
self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
|
|
1064
1019
|
# Characters longer than 2048 are ignored, resulting in profiling option resolution errors
|
|
1065
1020
|
if len(self._ascend_profiling_options) > 2048:
|
|
@@ -1075,7 +1030,7 @@ class Profiler:
|
|
|
1075
1030
|
data_path = os.path.join(container_path, "data")
|
|
1076
1031
|
data_path = validate_and_normalize_path(data_path)
|
|
1077
1032
|
if not os.path.exists(data_path):
|
|
1078
|
-
os.makedirs(data_path, exist_ok=True)
|
|
1033
|
+
os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1079
1034
|
|
|
1080
1035
|
def _construct_profiling_options(self):
|
|
1081
1036
|
"""
|
|
@@ -1101,9 +1056,9 @@ class Profiler:
|
|
|
1101
1056
|
"op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1102
1057
|
"profile_framework": self._profile_framework,
|
|
1103
1058
|
"profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
|
|
1104
|
-
"
|
|
1059
|
+
"with_stack": "on" if self._with_stack else "off"
|
|
1105
1060
|
}
|
|
1106
|
-
|
|
1061
|
+
ProfilerInfo.set_profiling_options(profiling_options)
|
|
1107
1062
|
return profiling_options
|
|
1108
1063
|
|
|
1109
1064
|
def _parse_parameter_for_gpu(self, kwargs):
|
|
@@ -1175,11 +1130,11 @@ class Profiler:
|
|
|
1175
1130
|
pcie_enable = False
|
|
1176
1131
|
self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
|
|
1177
1132
|
|
|
1178
|
-
self._parallel_strategy = kwargs.pop("parallel_strategy",
|
|
1133
|
+
self._parallel_strategy = kwargs.pop("parallel_strategy", False)
|
|
1179
1134
|
if not isinstance(self._parallel_strategy, bool):
|
|
1180
1135
|
logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
|
|
1181
|
-
f"but got type {type(self._parallel_strategy)}, it will be set to
|
|
1182
|
-
self._parallel_strategy =
|
|
1136
|
+
f"but got type {type(self._parallel_strategy)}, it will be set to False.")
|
|
1137
|
+
self._parallel_strategy = False
|
|
1183
1138
|
|
|
1184
1139
|
self.profiler_level = kwargs.pop("profiler_level", None)
|
|
1185
1140
|
if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
|
|
@@ -1381,7 +1336,7 @@ class Profiler:
|
|
|
1381
1336
|
finally:
|
|
1382
1337
|
pass
|
|
1383
1338
|
|
|
1384
|
-
def _ascend_graph_memory_analyse(self
|
|
1339
|
+
def _ascend_graph_memory_analyse(self):
|
|
1385
1340
|
"""Analyse memory usage info."""
|
|
1386
1341
|
if not self._profile_memory:
|
|
1387
1342
|
return
|
|
@@ -1390,7 +1345,7 @@ class Profiler:
|
|
|
1390
1345
|
"PyNative mode currently.")
|
|
1391
1346
|
try:
|
|
1392
1347
|
logger.info("Profiling: analyzing the memory usage info.")
|
|
1393
|
-
self._analyse_memory_usage(
|
|
1348
|
+
self._analyse_memory_usage()
|
|
1394
1349
|
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
1395
1350
|
logger.warning(err.message)
|
|
1396
1351
|
finally:
|
|
@@ -1408,28 +1363,37 @@ class Profiler:
|
|
|
1408
1363
|
|
|
1409
1364
|
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1410
1365
|
ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
|
|
1411
|
-
|
|
1366
|
+
PathManager.make_dir_safety(ascend_profiler_output_path)
|
|
1412
1367
|
|
|
1413
1368
|
source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
|
|
1414
1369
|
target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
|
|
1415
|
-
|
|
1370
|
+
PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
|
|
1371
|
+
|
|
1372
|
+
source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
|
|
1373
|
+
target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
|
|
1374
|
+
PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
|
|
1416
1375
|
|
|
1417
1376
|
source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
|
|
1418
1377
|
target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
|
|
1419
|
-
|
|
1378
|
+
PathManager.copy_file(source_timeline_path, target_timeline_path)
|
|
1420
1379
|
|
|
1421
1380
|
src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
shutil.copy(src_op_mem_file, dst_op_mem_file)
|
|
1381
|
+
dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
|
|
1382
|
+
PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
|
|
1425
1383
|
|
|
1426
|
-
ms_output_path = os.path.
|
|
1384
|
+
ms_output_path = os.path.realpath(
|
|
1427
1385
|
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1428
1386
|
static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
|
|
1429
1387
|
src_static_op_mem_path = glob.glob(static_op_mem_path)
|
|
1430
1388
|
if src_static_op_mem_path:
|
|
1431
1389
|
dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
|
|
1432
|
-
|
|
1390
|
+
PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
|
|
1391
|
+
|
|
1392
|
+
src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
|
|
1393
|
+
src_op_statistics_path = glob.glob(src_op_statistics_path)
|
|
1394
|
+
if src_op_statistics_path:
|
|
1395
|
+
dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
|
|
1396
|
+
PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
|
|
1433
1397
|
|
|
1434
1398
|
self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
|
|
1435
1399
|
self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
|
|
@@ -1468,7 +1432,7 @@ class Profiler:
|
|
|
1468
1432
|
f"communication_matrix.json")
|
|
1469
1433
|
communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
|
|
1470
1434
|
|
|
1471
|
-
analyze_path = os.path.
|
|
1435
|
+
analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
|
|
1472
1436
|
communicate_analyser = AscendCommunicationGenerator(analyze_path)
|
|
1473
1437
|
communicate_analyser.parse()
|
|
1474
1438
|
communicate_analyser.write(communication_file_path, communication_matrix_file_path)
|
|
@@ -1500,26 +1464,6 @@ class Profiler:
|
|
|
1500
1464
|
finally:
|
|
1501
1465
|
pass
|
|
1502
1466
|
|
|
1503
|
-
def _ascend_graph_msadvisor_analyse(self, job_id):
|
|
1504
|
-
"""Call MSAdvisor function."""
|
|
1505
|
-
logger.info("MSAdvisor starts running.")
|
|
1506
|
-
msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
|
|
1507
|
-
try:
|
|
1508
|
-
msadvisor.analyse()
|
|
1509
|
-
except FileNotFoundError as err:
|
|
1510
|
-
logger.warning("MSAdvisor: command not found,"
|
|
1511
|
-
"please check if installed ascend-toolkit and set environment path correctly. %s", err)
|
|
1512
|
-
except OSError as err:
|
|
1513
|
-
logger.warning("Cannot execute binary file: Exec format error. %s", err)
|
|
1514
|
-
except subprocess.CalledProcessError:
|
|
1515
|
-
logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
|
|
1516
|
-
except (ValueError, ProfilerFileNotFoundException) as err:
|
|
1517
|
-
logger.warning("MSAdvisor running failed. %s", err)
|
|
1518
|
-
finally:
|
|
1519
|
-
pass
|
|
1520
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
1521
|
-
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
1522
|
-
|
|
1523
1467
|
def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
|
|
1524
1468
|
"""Get the mapping between framework operator and device kernel."""
|
|
1525
1469
|
if not kernels:
|
|
@@ -1535,8 +1479,6 @@ class Profiler:
|
|
|
1535
1479
|
key = name if name.startswith("hcom_") else (name, ts)
|
|
1536
1480
|
launch_op = kernel_map.get(key)
|
|
1537
1481
|
if not launch_op:
|
|
1538
|
-
if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
|
|
1539
|
-
logger.warning(f"Failed to get launch operator for {name}!")
|
|
1540
1482
|
continue
|
|
1541
1483
|
launch_ops[index] = launch_op.name
|
|
1542
1484
|
return launch_ops
|
|
@@ -1547,6 +1489,7 @@ class Profiler:
|
|
|
1547
1489
|
else:
|
|
1548
1490
|
MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
|
|
1549
1491
|
|
|
1492
|
+
@timeit("Profiler analyse done")
|
|
1550
1493
|
def _ascend_graph_analyse_inner(self, offline_path=None):
|
|
1551
1494
|
"""Ascend graph mode analyse."""
|
|
1552
1495
|
job_id = self._get_profiling_job_id(offline_path)
|
|
@@ -1558,7 +1501,7 @@ class Profiler:
|
|
|
1558
1501
|
source_path = os.path.join(self._output_path, job_id)
|
|
1559
1502
|
self._minddata_analyse()
|
|
1560
1503
|
if self._op_time:
|
|
1561
|
-
mindstudio_profiler_output = os.path.
|
|
1504
|
+
mindstudio_profiler_output = os.path.realpath(
|
|
1562
1505
|
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1563
1506
|
flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
|
|
1564
1507
|
if not flag:
|
|
@@ -1567,14 +1510,17 @@ class Profiler:
|
|
|
1567
1510
|
ProfilerInfo.set_export_flag(flag)
|
|
1568
1511
|
op_summary, op_statistic, steptrace, steptrace_model \
|
|
1569
1512
|
= _ascend_graph_msprof_analyse(mindstudio_profiler_output)
|
|
1513
|
+
kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
|
|
1514
|
+
|
|
1570
1515
|
if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
|
|
1571
1516
|
not isinstance(op_statistic, np.ndarray) and not op_statistic:
|
|
1517
|
+
logger.warning('Op statistic data is empty!')
|
|
1572
1518
|
return
|
|
1573
|
-
|
|
1519
|
+
|
|
1574
1520
|
launch_ops = self._get_kernel_op_map(op_summary, kernels)
|
|
1575
1521
|
self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
|
|
1576
1522
|
graph_ids = np.unique(op_summary['Model ID']).tolist()
|
|
1577
|
-
|
|
1523
|
+
self._ascend_fpbp_analyse(op_summary, steptrace)
|
|
1578
1524
|
if len(graph_ids) == 1:
|
|
1579
1525
|
self._ascend_step_trace_analyse(steptrace)
|
|
1580
1526
|
else:
|
|
@@ -1582,13 +1528,13 @@ class Profiler:
|
|
|
1582
1528
|
if self._dynamic_status:
|
|
1583
1529
|
self._ascend_dynamic_net_analyse(op_summary)
|
|
1584
1530
|
self._ascend_flops_analyse(op_summary, launch_ops)
|
|
1585
|
-
self._ascend_graph_memory_analyse(
|
|
1531
|
+
self._ascend_graph_memory_analyse()
|
|
1586
1532
|
self._ascend_ms_analyze(mindstudio_profiler_output)
|
|
1587
1533
|
self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
|
|
1588
|
-
self._ascend_graph_msadvisor_analyse(job_id)
|
|
1589
1534
|
self._minddata_aicpu_analyse(self._output_path, job_id)
|
|
1590
1535
|
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1591
1536
|
try:
|
|
1537
|
+
ProfilerInfo.set_data_simplification(self._data_simplification)
|
|
1592
1538
|
ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
|
|
1593
1539
|
except RuntimeError as err:
|
|
1594
1540
|
logger.error('Profilier simplify data failed, %s', str(err))
|
|
@@ -1690,7 +1636,7 @@ class Profiler:
|
|
|
1690
1636
|
try:
|
|
1691
1637
|
timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
|
|
1692
1638
|
timeline_generator.init_timeline(pretty=self._pretty_json)
|
|
1693
|
-
timeline_generator.write_timeline(
|
|
1639
|
+
timeline_generator.write_timeline()
|
|
1694
1640
|
timeline_generator.write_timeline_summary()
|
|
1695
1641
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1696
1642
|
logger.warning('Fail to write timeline data: %s', err)
|
|
@@ -1699,15 +1645,13 @@ class Profiler:
|
|
|
1699
1645
|
raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
|
|
1700
1646
|
"data.")
|
|
1701
1647
|
|
|
1702
|
-
def _analyse_step_trace(self,
|
|
1703
|
-
is_gpu_kernel_async_launch_flag=False):
|
|
1648
|
+
def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
|
|
1704
1649
|
"""
|
|
1705
1650
|
Analyse step trace data and save the result.
|
|
1706
1651
|
|
|
1707
1652
|
Args:
|
|
1708
|
-
source_path (str): The directory that contains the step trace original data.
|
|
1709
|
-
framework_parser (FrameworkParser): The framework parse instance.
|
|
1710
1653
|
is_training_mode_flag (bool): Whether in training mode or not.
|
|
1654
|
+
is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
|
|
1711
1655
|
"""
|
|
1712
1656
|
logger.info("Begin to parse step trace.")
|
|
1713
1657
|
# construct output path
|
|
@@ -1738,56 +1682,31 @@ class Profiler:
|
|
|
1738
1682
|
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1739
1683
|
logger.info("The point info is: %s", point_info)
|
|
1740
1684
|
|
|
1741
|
-
return point_info, is_training_mode_flag
|
|
1742
|
-
return {}, is_training_mode_flag
|
|
1743
|
-
|
|
1744
|
-
# whether keep the first step
|
|
1745
|
-
skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
|
|
1746
|
-
# recognize inference or training mode
|
|
1747
|
-
is_training_mode_flag = framework_parser.check_op_name("Gradients")
|
|
1748
|
-
# parser the step trace files and save the result to disk
|
|
1749
|
-
source_path = validate_and_normalize_path(source_path)
|
|
1750
|
-
parser = AscendStepTraceParser(input_dir=source_path,
|
|
1751
|
-
output_file_path=step_trace_intermediate_file_path,
|
|
1752
|
-
skip_first_step=skip_first_step_flag,
|
|
1753
|
-
is_training_mode=is_training_mode_flag)
|
|
1754
|
-
parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
|
|
1755
|
-
parser.parse_and_save()
|
|
1756
|
-
point_info = parser.record_point_info(point_info_file_path)
|
|
1757
|
-
|
|
1758
|
-
# print parser result
|
|
1759
|
-
parser.show()
|
|
1760
|
-
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1761
|
-
logger.info("The point info is: %s", point_info)
|
|
1762
|
-
|
|
1763
|
-
return point_info, is_training_mode_flag
|
|
1764
|
-
|
|
1765
1685
|
def _generate_timeline(self, reduce_op_type):
|
|
1766
1686
|
"""Used for gpu, generate timeline info, write to json format file."""
|
|
1767
1687
|
try:
|
|
1768
1688
|
timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
|
|
1769
1689
|
context.get_context("mode"))
|
|
1770
1690
|
timeline_generator.init_timeline(reduce_op_type)
|
|
1771
|
-
self._timeline_meta = timeline_generator.write_timeline(
|
|
1691
|
+
self._timeline_meta = timeline_generator.write_timeline()
|
|
1772
1692
|
timeline_generator.write_timeline_summary()
|
|
1693
|
+
timeline_generator.parse_fwk_data()
|
|
1694
|
+
timeline_generator.write_fwk_timeline()
|
|
1773
1695
|
return timeline_generator
|
|
1774
1696
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1775
1697
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1776
1698
|
raise RuntimeError('Fail to write timeline data.') from err
|
|
1777
1699
|
|
|
1778
|
-
def _analyse_memory_usage(self
|
|
1700
|
+
def _analyse_memory_usage(self):
|
|
1779
1701
|
"""Analyse memory usage data."""
|
|
1780
1702
|
integrator = Integrator(self._output_path, self._rank_id)
|
|
1781
|
-
|
|
1782
|
-
memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1783
|
-
memory_parser.init_memory_usage_info(aicore_detail_data, points)
|
|
1784
|
-
memory_parser.write_memory_files()
|
|
1703
|
+
integrator.get_aicore_detail_data()
|
|
1785
1704
|
|
|
1786
1705
|
def _get_profiling_job_id(self, offline_path):
|
|
1787
1706
|
"""Get profiling job id, which was generated by ada service.
|
|
1788
1707
|
|
|
1789
1708
|
Returns:
|
|
1790
|
-
str, profiling job id
|
|
1709
|
+
str, profiling job id, eg: PROF_XXX/device_*.
|
|
1791
1710
|
"""
|
|
1792
1711
|
|
|
1793
1712
|
if offline_path:
|
|
@@ -1816,18 +1735,17 @@ class Profiler:
|
|
|
1816
1735
|
"profiler will ignore this job dir.", job_dir)
|
|
1817
1736
|
continue
|
|
1818
1737
|
|
|
1819
|
-
prof_rank_id
|
|
1738
|
+
prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
|
|
1739
|
+
prof_device_id = ProfilerInfo.get_device_id(prof_dir)
|
|
1820
1740
|
job_start_time = self._parse_job_start_time(prof_dir)
|
|
1821
1741
|
|
|
1822
1742
|
if offline_path:
|
|
1823
|
-
if self._rank_id != prof_rank_id:
|
|
1824
|
-
continue
|
|
1825
1743
|
self._start_time = int(job_start_time)
|
|
1826
1744
|
else:
|
|
1827
1745
|
if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
|
|
1828
|
-
logger.
|
|
1829
|
-
|
|
1830
|
-
|
|
1746
|
+
logger.warning("Find profiling find job path %s, but not current training device id. "
|
|
1747
|
+
"Current training rank id %s, but job path rank id: %s, "
|
|
1748
|
+
"profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
|
|
1831
1749
|
continue
|
|
1832
1750
|
|
|
1833
1751
|
if job_start_time < self._start_time:
|
|
@@ -1936,19 +1854,21 @@ class Profiler:
|
|
|
1936
1854
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1937
1855
|
else:
|
|
1938
1856
|
output_path = kwargs.pop("output_path")
|
|
1857
|
+
if not isinstance(output_path, str):
|
|
1858
|
+
logger.warning(
|
|
1859
|
+
f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
|
|
1860
|
+
output_path = "data"
|
|
1939
1861
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1940
1862
|
|
|
1941
1863
|
self._output_path = os.path.join(self._output_path, "profiler")
|
|
1942
1864
|
if not os.path.exists(self._output_path):
|
|
1943
|
-
os.makedirs(self._output_path, exist_ok=True)
|
|
1944
|
-
os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1865
|
+
os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1945
1866
|
else:
|
|
1946
1867
|
logger.warning("The target dir already exists. "
|
|
1947
1868
|
"There may be some old profiling data, and they will be rewritten in the end.")
|
|
1948
1869
|
self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
|
|
1949
1870
|
if not os.path.exists(self._framework_path):
|
|
1950
|
-
os.makedirs(self._framework_path, exist_ok=True)
|
|
1951
|
-
os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1871
|
+
os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1952
1872
|
|
|
1953
1873
|
def _parser_kwargs(self, kwargs):
|
|
1954
1874
|
"""Parse kwargs vale."""
|
|
@@ -1969,11 +1889,11 @@ class Profiler:
|
|
|
1969
1889
|
f"but got type {type(self._op_time)}, it will be set to True.")
|
|
1970
1890
|
self._op_time = True
|
|
1971
1891
|
|
|
1972
|
-
self._data_process = kwargs.pop("data_process",
|
|
1892
|
+
self._data_process = kwargs.pop("data_process", False)
|
|
1973
1893
|
if not isinstance(self._data_process, bool):
|
|
1974
1894
|
logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
|
|
1975
|
-
f"but got type {type(self._data_process)}, it will be set to
|
|
1976
|
-
self._data_process =
|
|
1895
|
+
f"but got type {type(self._data_process)}, it will be set to False.")
|
|
1896
|
+
self._data_process = False
|
|
1977
1897
|
|
|
1978
1898
|
timeline_limit = kwargs.pop("timeline_limit", 500)
|
|
1979
1899
|
if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
|
|
@@ -1985,70 +1905,22 @@ class Profiler:
|
|
|
1985
1905
|
"[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
|
|
1986
1906
|
timeline_limit = 500
|
|
1987
1907
|
self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
|
|
1988
|
-
self._profile_framework = kwargs.pop("profile_framework",
|
|
1989
|
-
if self._profile_framework not in ["
|
|
1990
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of [
|
|
1991
|
-
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to
|
|
1992
|
-
self._profile_framework =
|
|
1993
|
-
if not isinstance(self._data_simplification, bool):
|
|
1994
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
1995
|
-
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
1996
|
-
self._data_simplification = True
|
|
1908
|
+
self._profile_framework = kwargs.pop("profile_framework", None)
|
|
1909
|
+
if self._profile_framework not in ["time", "all", None]:
|
|
1910
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
|
|
1911
|
+
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
|
|
1912
|
+
self._profile_framework = None
|
|
1997
1913
|
|
|
1998
1914
|
if not isinstance(self._data_simplification, bool):
|
|
1999
1915
|
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
2000
1916
|
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
2001
1917
|
self._data_simplification = True
|
|
2002
1918
|
|
|
2003
|
-
self.
|
|
2004
|
-
if not isinstance(self.
|
|
2005
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter
|
|
2006
|
-
f"type {type(self.
|
|
2007
|
-
self.
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
|
|
2012
|
-
"""
|
|
2013
|
-
logger.info("Profiling HostInfo start.")
|
|
2014
|
-
host_dir = os.path.join(self._output_path, 'host_info')
|
|
2015
|
-
host_dir = validate_and_normalize_path(host_dir)
|
|
2016
|
-
if not os.path.exists(host_dir):
|
|
2017
|
-
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
2018
|
-
return
|
|
2019
|
-
csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
|
|
2020
|
-
json_file_name = 'timeline_' + str(self._rank_id) + '.json'
|
|
2021
|
-
memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
|
|
2022
|
-
dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
|
|
2023
|
-
host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
|
|
2024
|
-
timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
|
|
2025
|
-
memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
|
|
2026
|
-
dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
|
|
2027
|
-
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
2028
|
-
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
2029
|
-
logger.info("Profile HostInfo finished.")
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
def _offline_parse(offline_path):
|
|
2033
|
-
"""Parse data in abnormal scenario, only support for host_info at present."""
|
|
2034
|
-
logger.info("Profiling HostInfo offline start.")
|
|
2035
|
-
host_dir = os.path.join(offline_path, 'profiler', 'host_info')
|
|
2036
|
-
host_dir = validate_and_normalize_path(host_dir)
|
|
2037
|
-
if not os.path.exists(host_dir):
|
|
2038
|
-
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
2039
|
-
return
|
|
2040
|
-
files = os.listdir(host_dir)
|
|
2041
|
-
for file in files:
|
|
2042
|
-
if not file.startswith("host_info_") or not file.endswith(".csv"):
|
|
2043
|
-
continue
|
|
2044
|
-
rank_id = file.split('_')[-1].split('.')[0]
|
|
2045
|
-
if not rank_id.isdigit():
|
|
2046
|
-
logger.info("Cannot get rank_id from file: %s, skip it", file)
|
|
2047
|
-
return
|
|
2048
|
-
host_info_file = os.path.join(host_dir, file)
|
|
2049
|
-
timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
|
|
2050
|
-
memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
|
|
2051
|
-
dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
|
|
2052
|
-
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
2053
|
-
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
2054
|
-
logger.info("Profile HostInfo offline finished.")
|
|
1919
|
+
self._with_stack = kwargs.pop("with_stack", False)
|
|
1920
|
+
if not isinstance(self._with_stack, bool):
|
|
1921
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
|
|
1922
|
+
f"type {type(self._with_stack)}, it will be set to False.")
|
|
1923
|
+
self._with_stack = False
|
|
1924
|
+
if self._with_stack and self._profile_framework not in ["time", "all"]:
|
|
1925
|
+
logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
|
|
1926
|
+
self._with_stack = False
|