mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +3 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +50 -9
- mindspore/_extends/parse/compile_config.py +41 -0
- mindspore/_extends/parse/parser.py +9 -7
- mindspore/_extends/parse/standard_method.py +52 -14
- mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
- mindspore/amp.py +24 -10
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/__init__.py +6 -4
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_tensor.py +2 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/api.py +102 -87
- mindspore/common/dump.py +5 -6
- mindspore/common/generator.py +1 -7
- mindspore/common/hook_handle.py +14 -26
- mindspore/common/mindir_util.py +2 -2
- mindspore/common/parameter.py +46 -13
- mindspore/common/recompute.py +39 -9
- mindspore/common/sparse_tensor.py +7 -3
- mindspore/common/tensor.py +209 -29
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +38 -3
- mindspore/communication/comm_func.py +310 -55
- mindspore/communication/management.py +14 -14
- mindspore/context.py +123 -22
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/__init__.py +1 -1
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +72 -44
- mindspore/dataset/engine/datasets_audio.py +7 -7
- mindspore/dataset/engine/datasets_standard_format.py +53 -3
- mindspore/dataset/engine/datasets_text.py +20 -20
- mindspore/dataset/engine/datasets_user_defined.py +174 -104
- mindspore/dataset/engine/datasets_vision.py +33 -33
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/queue.py +114 -60
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +34 -14
- mindspore/dataset/text/__init__.py +1 -4
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +1 -4
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/dataset/vision/validators.py +2 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/adamw.py +85 -0
- mindspore/experimental/optim/optimizer.py +3 -0
- mindspore/hal/__init__.py +3 -3
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/stream.py +18 -0
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/types.h +10 -10
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +2 -2
- mindspore/include/dataset/vision.h +4 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filewriter.py +68 -51
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +495 -46
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/mint/nn/__init__.py +266 -21
- mindspore/mint/nn/functional.py +125 -19
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/adamw.py +28 -7
- mindspore/mint/special/__init__.py +63 -0
- mindspore/multiprocessing/__init__.py +2 -1
- mindspore/nn/__init__.py +0 -1
- mindspore/nn/cell.py +275 -93
- mindspore/nn/layer/activation.py +211 -44
- mindspore/nn/layer/basic.py +113 -3
- mindspore/nn/layer/embedding.py +120 -2
- mindspore/nn/layer/normalization.py +101 -5
- mindspore/nn/layer/padding.py +34 -48
- mindspore/nn/layer/pooling.py +161 -7
- mindspore/nn/layer/transformer.py +3 -3
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +84 -6
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +127 -0
- mindspore/nn/wrap/cell_wrapper.py +12 -23
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/nn/wrap/loss_scale.py +17 -3
- mindspore/numpy/__init__.py +1 -1
- mindspore/numpy/array_creations.py +65 -68
- mindspore/numpy/array_ops.py +64 -60
- mindspore/numpy/fft.py +610 -75
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +85 -84
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -4
- mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
- mindspore/ops/_vmap/vmap_array_ops.py +2 -4
- mindspore/ops/_vmap/vmap_math_ops.py +17 -1
- mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
- mindspore/ops/auto_generate/gen_extend_func.py +734 -13
- mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
- mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
- mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
- mindspore/ops/composite/base.py +85 -48
- mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
- mindspore/ops/function/__init__.py +22 -0
- mindspore/ops/function/array_func.py +490 -153
- mindspore/ops/function/debug_func.py +113 -1
- mindspore/ops/function/fft_func.py +15 -2
- mindspore/ops/function/grad/grad_func.py +3 -2
- mindspore/ops/function/math_func.py +558 -207
- mindspore/ops/function/nn_func.py +817 -383
- mindspore/ops/function/other_func.py +3 -2
- mindspore/ops/function/random_func.py +184 -8
- mindspore/ops/function/reshard_func.py +13 -11
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +3 -2
- mindspore/ops/functional.py +24 -14
- mindspore/ops/op_info_register.py +3 -3
- mindspore/ops/operations/__init__.py +6 -1
- mindspore/ops/operations/_grad_ops.py +2 -76
- mindspore/ops/operations/_infer_ops.py +1 -1
- mindspore/ops/operations/_inner_ops.py +71 -94
- mindspore/ops/operations/array_ops.py +12 -146
- mindspore/ops/operations/comm_ops.py +42 -53
- mindspore/ops/operations/custom_ops.py +83 -19
- mindspore/ops/operations/debug_ops.py +42 -10
- mindspore/ops/operations/manually_defined/_inner.py +12 -0
- mindspore/ops/operations/manually_defined/ops_def.py +265 -10
- mindspore/ops/operations/math_ops.py +12 -223
- mindspore/ops/operations/nn_ops.py +20 -114
- mindspore/ops/operations/other_ops.py +7 -4
- mindspore/ops/operations/random_ops.py +46 -1
- mindspore/ops/primitive.py +18 -6
- mindspore/ops_generate/arg_dtype_cast.py +2 -0
- mindspore/ops_generate/gen_aclnn_implement.py +11 -11
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +67 -52
- mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
- mindspore/ops_generate/gen_pyboost_func.py +131 -47
- mindspore/ops_generate/op_proto.py +10 -3
- mindspore/ops_generate/pyboost_utils.py +14 -1
- mindspore/ops_generate/template.py +43 -21
- mindspore/parallel/__init__.py +3 -1
- mindspore/parallel/_auto_parallel_context.py +28 -8
- mindspore/parallel/_cell_wrapper.py +83 -0
- mindspore/parallel/_parallel_serialization.py +47 -19
- mindspore/parallel/_tensor.py +81 -11
- mindspore/parallel/_utils.py +13 -1
- mindspore/parallel/algo_parameter_config.py +5 -5
- mindspore/parallel/checkpoint_transform.py +46 -39
- mindspore/parallel/cluster/process_entity/__init__.py +1 -1
- mindspore/parallel/cluster/process_entity/_api.py +31 -23
- mindspore/parallel/cluster/process_entity/_utils.py +2 -27
- mindspore/parallel/parameter_broadcast.py +3 -4
- mindspore/parallel/shard.py +162 -31
- mindspore/parallel/transform_safetensors.py +993 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/util.py +28 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +17 -19
- mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
- mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
- mindspore/profiler/parser/base_timeline_generator.py +19 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +1 -391
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/profiler_info.py +78 -6
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +280 -412
- mindspore/rewrite/__init__.py +1 -2
- mindspore/rewrite/common/namespace.py +4 -4
- mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
- mindspore/run_check/_check_version.py +36 -103
- mindspore/safeguard/rewrite_obfuscation.py +591 -247
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +4 -3
- mindspore/train/_utils.py +28 -2
- mindspore/train/amp.py +171 -53
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +85 -22
- mindspore/train/callback/_cluster_monitor.py +1 -1
- mindspore/train/callback/_flops_collector.py +1 -0
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +134 -31
- mindspore/train/callback/_summary_collector.py +5 -5
- mindspore/train/callback/_tft_register.py +352 -0
- mindspore/train/dataset_helper.py +7 -3
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/mind_ir_pb2.py +44 -39
- mindspore/train/model.py +134 -58
- mindspore/train/serialization.py +336 -112
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +258 -252
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -283
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/extend/basic.py +0 -140
- mindspore/nn/extend/embedding.py +0 -143
- mindspore/nn/extend/layer/normalization.py +0 -109
- mindspore/nn/extend/pooling.py +0 -117
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/extend/__init__.py +0 -53
- mindspore/ops/extend/array_func.py +0 -218
- mindspore/ops/extend/math_func.py +0 -76
- mindspore/ops/extend/nn_func.py +0 -308
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -14,19 +14,16 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Profiling api file."""
|
|
16
16
|
import os
|
|
17
|
-
import re
|
|
18
|
-
import shutil
|
|
19
17
|
import stat
|
|
20
18
|
import time
|
|
21
19
|
import json
|
|
22
20
|
from json import JSONDecodeError
|
|
23
21
|
import glob
|
|
24
|
-
import subprocess
|
|
25
|
-
import csv
|
|
26
22
|
import socket
|
|
23
|
+
import multiprocessing
|
|
27
24
|
from enum import Enum
|
|
28
|
-
from multiprocessing import Process
|
|
29
25
|
from typing import List
|
|
26
|
+
from sys import getsizeof
|
|
30
27
|
import numpy as np
|
|
31
28
|
|
|
32
29
|
from mindspore import log as logger, context
|
|
@@ -47,13 +44,11 @@ from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
|
47
44
|
from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
|
|
48
45
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
49
46
|
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
50
|
-
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
51
47
|
from mindspore.profiler.parser.minddata_parser import MinddataParser
|
|
52
48
|
from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
|
|
53
49
|
from mindspore.profiler.parser.minddata_pipeline_parser import \
|
|
54
50
|
MinddataPipelineParser
|
|
55
|
-
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
|
|
56
|
-
from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
51
|
+
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
|
|
57
52
|
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
58
53
|
from mindspore.common.api import _pynative_executor
|
|
59
54
|
from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
|
|
@@ -67,6 +62,11 @@ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
|
67
62
|
from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
|
|
68
63
|
from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
|
|
69
64
|
from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
|
|
65
|
+
from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
|
|
66
|
+
from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
|
|
67
|
+
from mindspore.profiler.parser.ascend_analysis.constant import Constant
|
|
68
|
+
from mindspore.profiler.common.util import timeit
|
|
69
|
+
|
|
70
70
|
|
|
71
71
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
72
72
|
|
|
@@ -105,7 +105,7 @@ class DeviceSupportParam(Enum):
|
|
|
105
105
|
ASCEND = [
|
|
106
106
|
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
107
107
|
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
|
|
108
|
-
'ascend_job_id', 'profile_framework', '
|
|
108
|
+
'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
|
|
109
109
|
]
|
|
110
110
|
|
|
111
111
|
|
|
@@ -114,7 +114,6 @@ ALWAYS_VALID_PARAM = [
|
|
|
114
114
|
'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
|
|
115
115
|
]
|
|
116
116
|
|
|
117
|
-
|
|
118
117
|
ANALYSIS_ASYNC_MODE = 'async'
|
|
119
118
|
ANALYSIS_SYNC_MODE = 'sync'
|
|
120
119
|
DEFAULT_MODEL_ID = 4294967295
|
|
@@ -164,147 +163,6 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
|
|
|
164
163
|
logger.warning("Can not map the start time for item: %s.", row)
|
|
165
164
|
|
|
166
165
|
|
|
167
|
-
def _calculate_dataset_execution_time(input_file, output_file):
|
|
168
|
-
r"""
|
|
169
|
-
Parse the host info into timeline file, so as to show on UI.
|
|
170
|
-
|
|
171
|
-
Args:
|
|
172
|
-
input_file: the original host_info file, in csv format.
|
|
173
|
-
output_file: the output file, in csv format.
|
|
174
|
-
"""
|
|
175
|
-
input_file = validate_and_normalize_path(input_file)
|
|
176
|
-
# execution_time_map is used to store the ExecutionCalculator for each stage.
|
|
177
|
-
execution_time_map = {}
|
|
178
|
-
# ts_map is used to store the start time of each event_stage_tid_pid.
|
|
179
|
-
ts_map = {}
|
|
180
|
-
with open(input_file, 'r') as f:
|
|
181
|
-
for row in csv.DictReader(f):
|
|
182
|
-
try:
|
|
183
|
-
module_name = row['module_name']
|
|
184
|
-
if module_name != 'Dataset':
|
|
185
|
-
continue
|
|
186
|
-
_calculate_dataset_item(row, execution_time_map, ts_map)
|
|
187
|
-
except KeyError as e:
|
|
188
|
-
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
189
|
-
continue
|
|
190
|
-
if ts_map:
|
|
191
|
-
logger.warning("Only start time is record for these items:")
|
|
192
|
-
for k, v in ts_map.items():
|
|
193
|
-
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
194
|
-
output_file = validate_and_normalize_path(output_file)
|
|
195
|
-
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
|
|
196
|
-
modes = stat.S_IWUSR | stat.S_IRUSR
|
|
197
|
-
with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
|
|
198
|
-
csv_writer = csv.writer(f)
|
|
199
|
-
csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
|
|
200
|
-
for _, v in execution_time_map.items():
|
|
201
|
-
csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
|
|
202
|
-
os.chmod(output_file, modes)
|
|
203
|
-
logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def _extract_timeline_item(row, time_line, ts_map):
|
|
207
|
-
"""Process one row, try to extract a timeline item."""
|
|
208
|
-
start_end = row['start_end']
|
|
209
|
-
event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
|
|
210
|
-
# map start and end, put the mapped event into timeline.
|
|
211
|
-
if start_end == '1' and event_stage_tid_pid in ts_map:
|
|
212
|
-
title = row['event'] + '::' + row['stage']
|
|
213
|
-
event = {'name': title, 'cat': row['module_name']}
|
|
214
|
-
ts_end = int(row['time_stamp(us)'])
|
|
215
|
-
ts = ts_map[event_stage_tid_pid]
|
|
216
|
-
event['ts'] = ts
|
|
217
|
-
event['dur'] = ts_end - ts
|
|
218
|
-
event['ph'] = 'X'
|
|
219
|
-
event['pid'] = row['pid']
|
|
220
|
-
event['tid'] = row['tid']
|
|
221
|
-
event['args'] = {'parent_pid': row['parent_pid']}
|
|
222
|
-
time_line.append(event)
|
|
223
|
-
del ts_map[event_stage_tid_pid]
|
|
224
|
-
elif start_end == '0':
|
|
225
|
-
ts = int(row['time_stamp(us)'])
|
|
226
|
-
ts_map[event_stage_tid_pid] = ts
|
|
227
|
-
# Put the instance event into timeline.
|
|
228
|
-
elif start_end == '2':
|
|
229
|
-
title = row['event'] + '::' + row['stage']
|
|
230
|
-
event = {
|
|
231
|
-
'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
|
|
232
|
-
'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
|
|
233
|
-
}
|
|
234
|
-
time_line.append(event)
|
|
235
|
-
else:
|
|
236
|
-
logger.warning("Can not map the start time for item: %s.", row)
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
|
|
240
|
-
r"""
|
|
241
|
-
Parse the host info into timeline file, so as to show on UI.
|
|
242
|
-
|
|
243
|
-
Args:
|
|
244
|
-
input_file: the original host_info file, in csv format.
|
|
245
|
-
output_timeline_file: the output timeline file, in json format.
|
|
246
|
-
output_memory_file: the output memory_usage file, in csv format.
|
|
247
|
-
is_develop_user: some data only shown to develop users, other users no need to analyse it.
|
|
248
|
-
"""
|
|
249
|
-
input_file = validate_and_normalize_path(input_file)
|
|
250
|
-
time_line = []
|
|
251
|
-
# ts_map is used to store the start time of each event_stage_tid_pid
|
|
252
|
-
ts_map = {}
|
|
253
|
-
memory_header = [
|
|
254
|
-
'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
|
|
255
|
-
'memory_usage(kB)', 'time_stamp(us)'
|
|
256
|
-
]
|
|
257
|
-
memory_info = []
|
|
258
|
-
with open(input_file, 'r') as f:
|
|
259
|
-
for row in csv.DictReader(f):
|
|
260
|
-
try:
|
|
261
|
-
level = row['level']
|
|
262
|
-
if level == '0' and not is_develop_user:
|
|
263
|
-
continue
|
|
264
|
-
if int(row['time_stamp(us)']) > 0:
|
|
265
|
-
_extract_timeline_item(row, time_line, ts_map)
|
|
266
|
-
if int(row['memory_usage(kB)']) > 0:
|
|
267
|
-
memory_info.append(row)
|
|
268
|
-
except KeyError as e:
|
|
269
|
-
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
270
|
-
continue
|
|
271
|
-
if memory_info:
|
|
272
|
-
with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
|
|
273
|
-
csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
|
|
274
|
-
csv_writer.writeheader()
|
|
275
|
-
for item in memory_info:
|
|
276
|
-
csv_writer.writerow(item)
|
|
277
|
-
os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
|
|
278
|
-
else:
|
|
279
|
-
logger.warning("No memory_usage is record in file: %s", input_file)
|
|
280
|
-
|
|
281
|
-
if ts_map:
|
|
282
|
-
logger.warning("Only start time is record for these items:")
|
|
283
|
-
for k, v in ts_map.items():
|
|
284
|
-
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
285
|
-
last_dash = k.rfind('_')
|
|
286
|
-
if last_dash == -1:
|
|
287
|
-
logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
|
|
288
|
-
continue
|
|
289
|
-
second_last_dash = k.rfind('_', 0, last_dash - 1)
|
|
290
|
-
if second_last_dash == -1:
|
|
291
|
-
logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
|
|
292
|
-
continue
|
|
293
|
-
pid = k[last_dash + 1:]
|
|
294
|
-
tid = k[second_last_dash + 1: last_dash]
|
|
295
|
-
title = k[:second_last_dash]
|
|
296
|
-
unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
|
|
297
|
-
time_line.append(unfinished_timeline)
|
|
298
|
-
|
|
299
|
-
if time_line:
|
|
300
|
-
timeline_file = validate_and_normalize_path(output_timeline_file)
|
|
301
|
-
with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
302
|
-
json.dump(time_line, json_file)
|
|
303
|
-
os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
|
|
304
|
-
else:
|
|
305
|
-
logger.warning("No valid time_stamp is record in file: %s", input_file)
|
|
306
|
-
|
|
307
|
-
|
|
308
166
|
def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
|
|
309
167
|
"""Executing the msprof export mode."""
|
|
310
168
|
try:
|
|
@@ -351,20 +209,21 @@ class Profiler:
|
|
|
351
209
|
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
352
210
|
profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
|
|
353
211
|
|
|
354
|
-
-
|
|
212
|
+
- ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
|
|
355
213
|
time of the computational operators on the NPU and communication large operator information.
|
|
356
|
-
-
|
|
357
|
-
mini operator information based on Level0.
|
|
358
|
-
-
|
|
214
|
+
- ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
|
|
215
|
+
communication mini operator information based on Level0.
|
|
216
|
+
- ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
|
|
359
217
|
|
|
360
218
|
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
|
|
361
219
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
362
220
|
a multi devices training,collect when True. Setting this parameter has no effect during single card
|
|
363
221
|
training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
|
|
364
222
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
365
|
-
When using this parameter, `op_time` must be set to True.
|
|
223
|
+
When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
|
|
224
|
+
compilation level is O2 requires collecting from the first step. Default: ``False`` .
|
|
366
225
|
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
367
|
-
Default value: ``
|
|
226
|
+
Default value: ``False`` .
|
|
368
227
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
369
228
|
data collection based on conditions. Default: ``True`` .
|
|
370
229
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
@@ -380,11 +239,12 @@ class Profiler:
|
|
|
380
239
|
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
381
240
|
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
382
241
|
- 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
|
|
242
|
+
This function only support Atlas A2 training series products.
|
|
383
243
|
|
|
384
244
|
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
385
245
|
Default: ``False`` .
|
|
386
|
-
hbm_ddr (bool, optional): (Ascend only) Whether to collect
|
|
387
|
-
Default: ``False`` .
|
|
246
|
+
hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
|
|
247
|
+
collect when True. Default: ``False`` .
|
|
388
248
|
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
389
249
|
Default: ``False`` .
|
|
390
250
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
@@ -396,25 +256,32 @@ class Profiler:
|
|
|
396
256
|
- False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
|
|
397
257
|
This method can reduce the impact of adding profiler on overall training time.
|
|
398
258
|
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
399
|
-
Default value: ``
|
|
259
|
+
Default value: ``False`` .
|
|
400
260
|
timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
|
|
401
261
|
When using this parameter, `op_time` must be set to True. Default value: ``500`` .
|
|
402
262
|
profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
|
|
403
|
-
["all", "time",
|
|
404
|
-
|
|
405
|
-
Default:
|
|
263
|
+
["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
|
|
264
|
+
parameter, the op_time parameter must be enabled.
|
|
265
|
+
Default: None.
|
|
406
266
|
|
|
407
|
-
- "all": Record
|
|
408
|
-
- "time":
|
|
409
|
-
- "memory": Only record host memory usage.
|
|
267
|
+
- "all": Record host timestamp.
|
|
268
|
+
- "time": The same as "all".
|
|
410
269
|
- None: Not record host information.
|
|
411
270
|
data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
|
|
412
271
|
If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
|
|
413
272
|
directory are retained to save disk space.
|
|
414
273
|
Default value: ``True`` .
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
274
|
+
with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
|
|
275
|
+
data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
|
|
276
|
+
profile_framework parameters must be enabled. Default value: ``False`` .
|
|
277
|
+
analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
|
|
278
|
+
data. This parameter is experimental parameter and does not need to be set by the user.
|
|
279
|
+
Default value: ``False`` .
|
|
280
|
+
rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
|
|
281
|
+
experimental parameter and does not need to be set by the user. Default value: ``0`` .
|
|
282
|
+
env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
|
|
283
|
+
This parameter is experimental parameter and does not need to be set by the user.
|
|
284
|
+
Default value: ``False`` .
|
|
418
285
|
Raises:
|
|
419
286
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
420
287
|
MindSpore cannot parse the generated ascend_job_id directory structure.
|
|
@@ -428,6 +295,7 @@ class Profiler:
|
|
|
428
295
|
>>> from mindspore import nn
|
|
429
296
|
>>> import mindspore.dataset as ds
|
|
430
297
|
>>> from mindspore import Profiler
|
|
298
|
+
>>> from mindspore.profiler import ProfilerLevel
|
|
431
299
|
>>>
|
|
432
300
|
>>> class Net(nn.Cell):
|
|
433
301
|
... def __init__(self):
|
|
@@ -453,7 +321,7 @@ class Profiler:
|
|
|
453
321
|
...
|
|
454
322
|
... # Init Profiler
|
|
455
323
|
... # Note that the Profiler should be initialized before model.train
|
|
456
|
-
... profiler = Profiler()
|
|
324
|
+
... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
|
|
457
325
|
...
|
|
458
326
|
... # Train Model
|
|
459
327
|
... net = Net()
|
|
@@ -462,11 +330,6 @@ class Profiler:
|
|
|
462
330
|
... # Profiler end
|
|
463
331
|
... profiler.analyse()
|
|
464
332
|
"""
|
|
465
|
-
|
|
466
|
-
_hwts_output_filename_target = "output_format_data_hwts_"
|
|
467
|
-
_opcompute_output_filename_target = "output_op_compute_time_"
|
|
468
|
-
_aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
|
|
469
|
-
_has_analysed = False
|
|
470
333
|
_has_initialized = False
|
|
471
334
|
_ascend_profiling_options = ""
|
|
472
335
|
_ascend_job_id = ""
|
|
@@ -492,6 +355,9 @@ class Profiler:
|
|
|
492
355
|
self._rank_size = 1
|
|
493
356
|
self._rank_id = 0
|
|
494
357
|
self._ascend_profiler = None
|
|
358
|
+
self.metadata = {}
|
|
359
|
+
self.max_str_len = 4096
|
|
360
|
+
self.max_meta_size = 50 * 1024
|
|
495
361
|
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
496
362
|
self._parallel_strategy = True
|
|
497
363
|
self._model_iteration_dict = None
|
|
@@ -512,13 +378,13 @@ class Profiler:
|
|
|
512
378
|
self._sync_enable = True
|
|
513
379
|
self._stop_time = 0
|
|
514
380
|
self._dynamic_status = False
|
|
515
|
-
self._profile_framework =
|
|
381
|
+
self._profile_framework = None
|
|
516
382
|
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
517
383
|
self.profiler_level = None
|
|
518
384
|
self._pretty_json = False
|
|
519
385
|
self._analyse_only = kwargs.get("analyse_only", False)
|
|
520
386
|
self._data_simplification = kwargs.get("data_simplification", True)
|
|
521
|
-
self.
|
|
387
|
+
self._with_stack = False
|
|
522
388
|
if self._msprof_enable:
|
|
523
389
|
return
|
|
524
390
|
self._start_time = int(time.time() * 1e6) # us
|
|
@@ -540,20 +406,6 @@ class Profiler:
|
|
|
540
406
|
if self.start_profile:
|
|
541
407
|
self.start()
|
|
542
408
|
|
|
543
|
-
@staticmethod
|
|
544
|
-
def _get_prof_rank(prof_path: str):
|
|
545
|
-
"""get rank id."""
|
|
546
|
-
sub_dirs = os.listdir(os.path.realpath(prof_path))
|
|
547
|
-
info_json_path = ""
|
|
548
|
-
for sub_dir in sub_dirs:
|
|
549
|
-
if sub_dir.startswith("device_"):
|
|
550
|
-
device_id = sub_dir.split("_")[-1]
|
|
551
|
-
info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
|
|
552
|
-
if not os.path.exists(info_json_path):
|
|
553
|
-
return -1
|
|
554
|
-
rank_id, _ = Profiler._parse_info_json(info_json_path)
|
|
555
|
-
return rank_id
|
|
556
|
-
|
|
557
409
|
@staticmethod
|
|
558
410
|
def _check_output_path(output_path):
|
|
559
411
|
"""Checking path validity."""
|
|
@@ -602,30 +454,8 @@ class Profiler:
|
|
|
602
454
|
logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
|
|
603
455
|
return None
|
|
604
456
|
|
|
605
|
-
@staticmethod
|
|
606
|
-
def _parse_info_json(info_file):
|
|
607
|
-
"""
|
|
608
|
-
Parse info log file, get the rank id and device id of the job.
|
|
609
|
-
Args:
|
|
610
|
-
input_file (str): The file path of the parse info log file.
|
|
611
|
-
|
|
612
|
-
Returns:
|
|
613
|
-
rank id, device id
|
|
614
|
-
"""
|
|
615
|
-
with open(info_file, "r") as f:
|
|
616
|
-
info_dict = json.load(f)
|
|
617
|
-
|
|
618
|
-
rank_id = info_dict.get("rank_id", 0)
|
|
619
|
-
dev_info = info_dict.get("DeviceInfo", [])
|
|
620
|
-
dev_id = dev_info[0].get("id", -1)
|
|
621
|
-
|
|
622
|
-
if int(rank_id) < 0:
|
|
623
|
-
rank_id = 0
|
|
624
|
-
|
|
625
|
-
return str(rank_id), str(dev_id)
|
|
626
|
-
|
|
627
457
|
@classmethod
|
|
628
|
-
def offline_analyse(cls, path: str, pretty=False, step_list=None):
|
|
458
|
+
def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
|
|
629
459
|
"""
|
|
630
460
|
Analyze training performance data offline, which is invoked after performance data collection is completed.
|
|
631
461
|
|
|
@@ -633,37 +463,50 @@ class Profiler:
|
|
|
633
463
|
path (str): The profiling data path which need to be analyzed offline.
|
|
634
464
|
There needs to be a profiler directory in this path.
|
|
635
465
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
636
|
-
step_list (list, optional): A list of steps that need to be analyzed
|
|
637
|
-
By default, all steps will be analyzed.
|
|
466
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
467
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
468
|
+
data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
|
|
638
469
|
|
|
639
470
|
Examples:
|
|
640
471
|
>>> from mindspore import Profiler
|
|
641
472
|
>>> Profiler.offline_analyse("./profiling_path")
|
|
642
473
|
"""
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
474
|
+
real_path = os.path.realpath(path)
|
|
475
|
+
PathManager.check_input_directory_path(real_path)
|
|
476
|
+
profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
|
|
477
|
+
if not isinstance(data_simplification, bool):
|
|
478
|
+
logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
|
|
479
|
+
f"but got type {type(data_simplification)}, it will be set to True.")
|
|
480
|
+
data_simplification = True
|
|
481
|
+
if not profiler_parent_path_list:
|
|
482
|
+
raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
|
|
483
|
+
f'single-device profiler data, or multiple subdirectories each containing '
|
|
484
|
+
f'a "profiler" directory for multi-device profiler data. ')
|
|
485
|
+
# get rank id
|
|
486
|
+
rank_list = []
|
|
487
|
+
for parent_path in profiler_parent_path_list:
|
|
488
|
+
profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
|
|
489
|
+
rank_id = ProfilerInfo.get_rank_id(profiler_path)
|
|
490
|
+
if int(rank_id) < 0:
|
|
491
|
+
logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
|
|
492
|
+
rank_list.append(rank_id)
|
|
493
|
+
# start offline analyse
|
|
494
|
+
if len(profiler_parent_path_list) == 1:
|
|
495
|
+
PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
|
|
496
|
+
profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
|
|
497
|
+
profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
|
|
498
|
+
else:
|
|
499
|
+
# Multiprocess Parsing
|
|
500
|
+
multiprocessing.set_start_method("fork", force=True)
|
|
501
|
+
process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
|
|
502
|
+
pool = multiprocessing.Pool(processes=process_number)
|
|
503
|
+
for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
|
|
504
|
+
PathManager.check_directory_path_writeable(profiler_parent_path)
|
|
505
|
+
profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
|
|
506
|
+
data_simplification=data_simplification)
|
|
507
|
+
pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
|
|
508
|
+
pool.close()
|
|
509
|
+
pool.join()
|
|
667
510
|
|
|
668
511
|
def op_analyse(self, op_name, device_id=None):
|
|
669
512
|
"""
|
|
@@ -739,14 +582,38 @@ class Profiler:
|
|
|
739
582
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
740
583
|
for online mode. Default: ``None``.
|
|
741
584
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
742
|
-
step_list (list, optional): A list of steps that need to be analyzed
|
|
743
|
-
By default, all steps will be analyzed.
|
|
585
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
586
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
744
587
|
mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
|
|
745
588
|
|
|
746
589
|
- sync: analyse data in current process, it will block the current process.
|
|
747
|
-
- async: analyse data in subprocess, it will not the current process.Since the parsing process
|
|
590
|
+
- async: analyse data in subprocess, it will not block the current process. Since the parsing process
|
|
748
591
|
will take up extra CPU resources, please enable this mode according to the actual resource situation.
|
|
749
592
|
|
|
593
|
+
Examples:
|
|
594
|
+
>>> from mindspore.train import Callback
|
|
595
|
+
>>> from mindspore import Profiler
|
|
596
|
+
>>> class StopAtStep(Callback):
|
|
597
|
+
... def __init__(self, start_step=1, stop_step=5):
|
|
598
|
+
... super(StopAtStep, self).__init__()
|
|
599
|
+
... self.start_step = start_step
|
|
600
|
+
... self.stop_step = stop_step
|
|
601
|
+
... self.profiler = Profiler(start_profile=False)
|
|
602
|
+
...
|
|
603
|
+
... def step_begin(self, run_context):
|
|
604
|
+
... cb_params = run_context.original_args()
|
|
605
|
+
... step_num = cb_params.cur_step_num
|
|
606
|
+
... if step_num == self.start_step:
|
|
607
|
+
... self.profiler.start()
|
|
608
|
+
...
|
|
609
|
+
... def step_end(self, run_context):
|
|
610
|
+
... cb_params = run_context.original_args()
|
|
611
|
+
... step_num = cb_params.cur_step_num
|
|
612
|
+
... if step_num == self.stop_step:
|
|
613
|
+
... self.profiler.stop()
|
|
614
|
+
...
|
|
615
|
+
... def end(self, run_context):
|
|
616
|
+
... self.profiler.analyse(step_list=[2,3,4], mode="sync")
|
|
750
617
|
"""
|
|
751
618
|
try:
|
|
752
619
|
if isinstance(pretty, bool):
|
|
@@ -793,11 +660,12 @@ class Profiler:
|
|
|
793
660
|
|
|
794
661
|
ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
|
|
795
662
|
if offline_path:
|
|
663
|
+
# Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
|
|
664
|
+
ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
|
|
796
665
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
797
666
|
self._ascend_graph_analyse(offline_path=offline_path)
|
|
798
667
|
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
799
668
|
ProfilerInfo.save(self._output_path)
|
|
800
|
-
_offline_parse(offline_path)
|
|
801
669
|
return
|
|
802
670
|
if self._msprof_enable:
|
|
803
671
|
return
|
|
@@ -817,18 +685,16 @@ class Profiler:
|
|
|
817
685
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
818
686
|
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
|
819
687
|
self._cpu_analyse()
|
|
688
|
+
if self._profile_framework:
|
|
689
|
+
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
|
|
690
|
+
"data.")
|
|
820
691
|
|
|
821
692
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
822
693
|
self._gpu_analyse()
|
|
823
694
|
|
|
824
695
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
825
696
|
self._ascend_analyse()
|
|
826
|
-
|
|
827
|
-
if self._device_target != DeviceTarget.CPU.value:
|
|
828
|
-
self._host_info_analyse()
|
|
829
|
-
else:
|
|
830
|
-
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
|
|
831
|
-
" directory in the output path.")
|
|
697
|
+
|
|
832
698
|
logger.info("Profiling: all the data have been analyzed.")
|
|
833
699
|
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
834
700
|
ProfilerInfo.save(self._output_path)
|
|
@@ -895,8 +761,13 @@ class Profiler:
|
|
|
895
761
|
self._ascend_graph_start()
|
|
896
762
|
ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
897
763
|
ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
|
|
898
|
-
ProfilerInfo.set_system_time(int(c_expression.get_clock_time()
|
|
899
|
-
|
|
764
|
+
ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
|
|
765
|
+
if context.get_context("mode") == context.GRAPH_MODE:
|
|
766
|
+
jit_config = context.get_jit_config()
|
|
767
|
+
jit_level = jit_config.get("jit_level", "")
|
|
768
|
+
ProfilerInfo.set_jit_level(jit_level)
|
|
769
|
+
if self._profile_framework:
|
|
770
|
+
_framework_profiler_enable_mi()
|
|
900
771
|
|
|
901
772
|
def stop(self):
|
|
902
773
|
"""
|
|
@@ -957,8 +828,88 @@ class Profiler:
|
|
|
957
828
|
self._init_profiler_info()
|
|
958
829
|
ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
|
|
959
830
|
ProfilerInfo.save(self._output_path)
|
|
831
|
+
self._dump_metadata()
|
|
960
832
|
logger.info("Profiling: stop time: %d", self._stop_time)
|
|
961
833
|
|
|
834
|
+
def add_metadata(self, key: str, value: str):
|
|
835
|
+
"""
|
|
836
|
+
Report custom metadata key-value pair data.
|
|
837
|
+
|
|
838
|
+
Args:
|
|
839
|
+
key (str): The key to the metadata.
|
|
840
|
+
value (str): The value to the metadata.
|
|
841
|
+
|
|
842
|
+
Examples:
|
|
843
|
+
>>> from mindspore import Profiler
|
|
844
|
+
>>> # Profiler init.
|
|
845
|
+
>>> profiler = Profiler()
|
|
846
|
+
>>> # Call Profiler add_metadata
|
|
847
|
+
>>> profiler.add_metadata("test_key", "test_value")
|
|
848
|
+
>>> # Profiler end
|
|
849
|
+
>>> profiler.analyse()
|
|
850
|
+
"""
|
|
851
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
852
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
853
|
+
return
|
|
854
|
+
if not self._check_str_valid(key) or not self._check_str_valid(value):
|
|
855
|
+
logger.warning("Invalid input key or value. Skip this metadata.")
|
|
856
|
+
return
|
|
857
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
858
|
+
if getsizeof(self.metadata) + add_size < self.max_meta_size:
|
|
859
|
+
if key in self.metadata:
|
|
860
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
861
|
+
self.metadata[key] = value
|
|
862
|
+
else:
|
|
863
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
864
|
+
|
|
865
|
+
def add_metadata_json(self, key: str, value: str):
|
|
866
|
+
"""
|
|
867
|
+
Report custom metadata key-value pair data with the value as a JSON string data.
|
|
868
|
+
|
|
869
|
+
Args:
|
|
870
|
+
key (str): The key to the metadata.
|
|
871
|
+
value (str): The json str format value to the metadata.
|
|
872
|
+
|
|
873
|
+
Examples:
|
|
874
|
+
>>> import json
|
|
875
|
+
>>> from mindspore import Profiler
|
|
876
|
+
>>> # Profiler init.
|
|
877
|
+
>>> profiler = Profiler()
|
|
878
|
+
>>> # Call Profiler add_metadata_json
|
|
879
|
+
>>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
|
|
880
|
+
>>> # Profiler end, metadata will be saved in profiler_metadata.json
|
|
881
|
+
>>> profiler.analyse()
|
|
882
|
+
"""
|
|
883
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
884
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
885
|
+
return
|
|
886
|
+
if not self._check_str_valid(key) or not self._check_str_valid(value):
|
|
887
|
+
logger.warning("Invalid input key or value. Skip this metadata.")
|
|
888
|
+
return
|
|
889
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
890
|
+
if getsizeof(self.metadata) + add_size < self.max_meta_size:
|
|
891
|
+
try:
|
|
892
|
+
if key in self.metadata:
|
|
893
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
894
|
+
self.metadata[key] = json.loads(value)
|
|
895
|
+
except ValueError:
|
|
896
|
+
logger.warning("The metadata value must be json format string. Skip this metadata")
|
|
897
|
+
else:
|
|
898
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
899
|
+
|
|
900
|
+
def _dump_metadata(self):
|
|
901
|
+
"""Dump metadata to file."""
|
|
902
|
+
if not self.metadata:
|
|
903
|
+
return
|
|
904
|
+
FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
|
|
905
|
+
self.metadata.clear()
|
|
906
|
+
|
|
907
|
+
def _check_str_valid(self, input_str: str):
|
|
908
|
+
"""Check str length"""
|
|
909
|
+
if len(input_str) > self.max_str_len:
|
|
910
|
+
return False
|
|
911
|
+
return True
|
|
912
|
+
|
|
962
913
|
def _set_ascend_job_id(self, ascend_job_id):
|
|
963
914
|
"""Set output_path for offline parsing performance data."""
|
|
964
915
|
if not ascend_job_id:
|
|
@@ -983,7 +934,7 @@ class Profiler:
|
|
|
983
934
|
self._profile_communication = options.get('profile_communication')
|
|
984
935
|
self._op_time = options.get('op_time')
|
|
985
936
|
self._device_target = context.get_context("device_target").lower()
|
|
986
|
-
self._profile_framework = options.get('profile_framework',
|
|
937
|
+
self._profile_framework = options.get('profile_framework', None)
|
|
987
938
|
self._profiler_manager = c_expression.ProfilerManager.get_instance()
|
|
988
939
|
self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
|
|
989
940
|
if self._data_process:
|
|
@@ -1034,32 +985,32 @@ class Profiler:
|
|
|
1034
985
|
|
|
1035
986
|
def _gpu_profiler_init(self, kwargs):
|
|
1036
987
|
"""Gpu profiler init."""
|
|
988
|
+
self._parse_parameter_for_gpu(kwargs)
|
|
1037
989
|
# Setup and start MindData Profiling
|
|
1038
990
|
if self._data_process:
|
|
1039
991
|
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
1040
992
|
self._md_profiler.init()
|
|
1041
|
-
self._parse_parameter_for_gpu(kwargs)
|
|
1042
993
|
|
|
1043
994
|
gpu_profiler = c_expression.Profiler
|
|
1044
995
|
self._gpu_profiler = gpu_profiler.get_instance("GPU")
|
|
1045
|
-
self._gpu_profiler.init(self._output_path)
|
|
1046
|
-
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
1047
996
|
if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
|
|
1048
997
|
self._dev_id = str(get_rank())
|
|
1049
998
|
os.environ['DEVICE_ID'] = self._dev_id
|
|
1050
999
|
self._rank_id = self._dev_id
|
|
1000
|
+
self._gpu_profiler.init(self._output_path, int(self._rank_id))
|
|
1001
|
+
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
1051
1002
|
|
|
1052
1003
|
def _ascend_profiler_init(self, kwargs):
|
|
1053
1004
|
"""Ascend profiler init."""
|
|
1005
|
+
self._parse_parameter_for_ascend(kwargs)
|
|
1054
1006
|
# Setup and start MindData Profiling
|
|
1055
1007
|
if self._data_process:
|
|
1056
1008
|
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
1057
1009
|
self._md_profiler.init()
|
|
1058
1010
|
self._init_time = int(time.time() * 10000000)
|
|
1059
1011
|
logger.info("Profiling: profiling init time: %d", self._init_time)
|
|
1060
|
-
self._parse_parameter_for_ascend(kwargs)
|
|
1061
|
-
os.environ['DEVICE_ID'] = self._dev_id
|
|
1062
1012
|
|
|
1013
|
+
os.environ['DEVICE_ID'] = self._dev_id
|
|
1063
1014
|
self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
|
|
1064
1015
|
# Characters longer than 2048 are ignored, resulting in profiling option resolution errors
|
|
1065
1016
|
if len(self._ascend_profiling_options) > 2048:
|
|
@@ -1075,7 +1026,7 @@ class Profiler:
|
|
|
1075
1026
|
data_path = os.path.join(container_path, "data")
|
|
1076
1027
|
data_path = validate_and_normalize_path(data_path)
|
|
1077
1028
|
if not os.path.exists(data_path):
|
|
1078
|
-
os.makedirs(data_path, exist_ok=True)
|
|
1029
|
+
os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1079
1030
|
|
|
1080
1031
|
def _construct_profiling_options(self):
|
|
1081
1032
|
"""
|
|
@@ -1101,9 +1052,9 @@ class Profiler:
|
|
|
1101
1052
|
"op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1102
1053
|
"profile_framework": self._profile_framework,
|
|
1103
1054
|
"profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
|
|
1104
|
-
"
|
|
1055
|
+
"with_stack": "on" if self._with_stack else "off"
|
|
1105
1056
|
}
|
|
1106
|
-
|
|
1057
|
+
ProfilerInfo.set_profiling_options(profiling_options)
|
|
1107
1058
|
return profiling_options
|
|
1108
1059
|
|
|
1109
1060
|
def _parse_parameter_for_gpu(self, kwargs):
|
|
@@ -1175,11 +1126,11 @@ class Profiler:
|
|
|
1175
1126
|
pcie_enable = False
|
|
1176
1127
|
self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
|
|
1177
1128
|
|
|
1178
|
-
self._parallel_strategy = kwargs.pop("parallel_strategy",
|
|
1129
|
+
self._parallel_strategy = kwargs.pop("parallel_strategy", False)
|
|
1179
1130
|
if not isinstance(self._parallel_strategy, bool):
|
|
1180
1131
|
logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
|
|
1181
|
-
f"but got type {type(self._parallel_strategy)}, it will be set to
|
|
1182
|
-
self._parallel_strategy =
|
|
1132
|
+
f"but got type {type(self._parallel_strategy)}, it will be set to False.")
|
|
1133
|
+
self._parallel_strategy = False
|
|
1183
1134
|
|
|
1184
1135
|
self.profiler_level = kwargs.pop("profiler_level", None)
|
|
1185
1136
|
if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
|
|
@@ -1381,7 +1332,7 @@ class Profiler:
|
|
|
1381
1332
|
finally:
|
|
1382
1333
|
pass
|
|
1383
1334
|
|
|
1384
|
-
def _ascend_graph_memory_analyse(self
|
|
1335
|
+
def _ascend_graph_memory_analyse(self):
|
|
1385
1336
|
"""Analyse memory usage info."""
|
|
1386
1337
|
if not self._profile_memory:
|
|
1387
1338
|
return
|
|
@@ -1390,7 +1341,7 @@ class Profiler:
|
|
|
1390
1341
|
"PyNative mode currently.")
|
|
1391
1342
|
try:
|
|
1392
1343
|
logger.info("Profiling: analyzing the memory usage info.")
|
|
1393
|
-
self._analyse_memory_usage(
|
|
1344
|
+
self._analyse_memory_usage()
|
|
1394
1345
|
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
1395
1346
|
logger.warning(err.message)
|
|
1396
1347
|
finally:
|
|
@@ -1408,28 +1359,37 @@ class Profiler:
|
|
|
1408
1359
|
|
|
1409
1360
|
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1410
1361
|
ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
|
|
1411
|
-
|
|
1362
|
+
PathManager.make_dir_safety(ascend_profiler_output_path)
|
|
1412
1363
|
|
|
1413
1364
|
source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
|
|
1414
1365
|
target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
|
|
1415
|
-
|
|
1366
|
+
PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
|
|
1367
|
+
|
|
1368
|
+
source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
|
|
1369
|
+
target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
|
|
1370
|
+
PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
|
|
1416
1371
|
|
|
1417
1372
|
source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
|
|
1418
1373
|
target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
|
|
1419
|
-
|
|
1374
|
+
PathManager.copy_file(source_timeline_path, target_timeline_path)
|
|
1420
1375
|
|
|
1421
1376
|
src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
shutil.copy(src_op_mem_file, dst_op_mem_file)
|
|
1377
|
+
dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
|
|
1378
|
+
PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
|
|
1425
1379
|
|
|
1426
|
-
ms_output_path = os.path.
|
|
1380
|
+
ms_output_path = os.path.realpath(
|
|
1427
1381
|
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1428
1382
|
static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
|
|
1429
1383
|
src_static_op_mem_path = glob.glob(static_op_mem_path)
|
|
1430
1384
|
if src_static_op_mem_path:
|
|
1431
1385
|
dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
|
|
1432
|
-
|
|
1386
|
+
PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
|
|
1387
|
+
|
|
1388
|
+
src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
|
|
1389
|
+
src_op_statistics_path = glob.glob(src_op_statistics_path)
|
|
1390
|
+
if src_op_statistics_path:
|
|
1391
|
+
dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
|
|
1392
|
+
PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
|
|
1433
1393
|
|
|
1434
1394
|
self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
|
|
1435
1395
|
self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
|
|
@@ -1468,7 +1428,7 @@ class Profiler:
|
|
|
1468
1428
|
f"communication_matrix.json")
|
|
1469
1429
|
communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
|
|
1470
1430
|
|
|
1471
|
-
analyze_path = os.path.
|
|
1431
|
+
analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
|
|
1472
1432
|
communicate_analyser = AscendCommunicationGenerator(analyze_path)
|
|
1473
1433
|
communicate_analyser.parse()
|
|
1474
1434
|
communicate_analyser.write(communication_file_path, communication_matrix_file_path)
|
|
@@ -1500,26 +1460,6 @@ class Profiler:
|
|
|
1500
1460
|
finally:
|
|
1501
1461
|
pass
|
|
1502
1462
|
|
|
1503
|
-
def _ascend_graph_msadvisor_analyse(self, job_id):
|
|
1504
|
-
"""Call MSAdvisor function."""
|
|
1505
|
-
logger.info("MSAdvisor starts running.")
|
|
1506
|
-
msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
|
|
1507
|
-
try:
|
|
1508
|
-
msadvisor.analyse()
|
|
1509
|
-
except FileNotFoundError as err:
|
|
1510
|
-
logger.warning("MSAdvisor: command not found,"
|
|
1511
|
-
"please check if installed ascend-toolkit and set environment path correctly. %s", err)
|
|
1512
|
-
except OSError as err:
|
|
1513
|
-
logger.warning("Cannot execute binary file: Exec format error. %s", err)
|
|
1514
|
-
except subprocess.CalledProcessError:
|
|
1515
|
-
logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
|
|
1516
|
-
except (ValueError, ProfilerFileNotFoundException) as err:
|
|
1517
|
-
logger.warning("MSAdvisor running failed. %s", err)
|
|
1518
|
-
finally:
|
|
1519
|
-
pass
|
|
1520
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
1521
|
-
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
1522
|
-
|
|
1523
1463
|
def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
|
|
1524
1464
|
"""Get the mapping between framework operator and device kernel."""
|
|
1525
1465
|
if not kernels:
|
|
@@ -1535,8 +1475,6 @@ class Profiler:
|
|
|
1535
1475
|
key = name if name.startswith("hcom_") else (name, ts)
|
|
1536
1476
|
launch_op = kernel_map.get(key)
|
|
1537
1477
|
if not launch_op:
|
|
1538
|
-
if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
|
|
1539
|
-
logger.warning(f"Failed to get launch operator for {name}!")
|
|
1540
1478
|
continue
|
|
1541
1479
|
launch_ops[index] = launch_op.name
|
|
1542
1480
|
return launch_ops
|
|
@@ -1547,6 +1485,7 @@ class Profiler:
|
|
|
1547
1485
|
else:
|
|
1548
1486
|
MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
|
|
1549
1487
|
|
|
1488
|
+
@timeit("Profiler analyse done")
|
|
1550
1489
|
def _ascend_graph_analyse_inner(self, offline_path=None):
|
|
1551
1490
|
"""Ascend graph mode analyse."""
|
|
1552
1491
|
job_id = self._get_profiling_job_id(offline_path)
|
|
@@ -1558,7 +1497,7 @@ class Profiler:
|
|
|
1558
1497
|
source_path = os.path.join(self._output_path, job_id)
|
|
1559
1498
|
self._minddata_analyse()
|
|
1560
1499
|
if self._op_time:
|
|
1561
|
-
mindstudio_profiler_output = os.path.
|
|
1500
|
+
mindstudio_profiler_output = os.path.realpath(
|
|
1562
1501
|
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1563
1502
|
flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
|
|
1564
1503
|
if not flag:
|
|
@@ -1567,14 +1506,17 @@ class Profiler:
|
|
|
1567
1506
|
ProfilerInfo.set_export_flag(flag)
|
|
1568
1507
|
op_summary, op_statistic, steptrace, steptrace_model \
|
|
1569
1508
|
= _ascend_graph_msprof_analyse(mindstudio_profiler_output)
|
|
1509
|
+
kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
|
|
1510
|
+
|
|
1570
1511
|
if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
|
|
1571
1512
|
not isinstance(op_statistic, np.ndarray) and not op_statistic:
|
|
1513
|
+
logger.warning('Op statistic data is empty!')
|
|
1572
1514
|
return
|
|
1573
|
-
|
|
1515
|
+
|
|
1574
1516
|
launch_ops = self._get_kernel_op_map(op_summary, kernels)
|
|
1575
1517
|
self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
|
|
1576
1518
|
graph_ids = np.unique(op_summary['Model ID']).tolist()
|
|
1577
|
-
|
|
1519
|
+
self._ascend_fpbp_analyse(op_summary, steptrace)
|
|
1578
1520
|
if len(graph_ids) == 1:
|
|
1579
1521
|
self._ascend_step_trace_analyse(steptrace)
|
|
1580
1522
|
else:
|
|
@@ -1582,13 +1524,13 @@ class Profiler:
|
|
|
1582
1524
|
if self._dynamic_status:
|
|
1583
1525
|
self._ascend_dynamic_net_analyse(op_summary)
|
|
1584
1526
|
self._ascend_flops_analyse(op_summary, launch_ops)
|
|
1585
|
-
self._ascend_graph_memory_analyse(
|
|
1527
|
+
self._ascend_graph_memory_analyse()
|
|
1586
1528
|
self._ascend_ms_analyze(mindstudio_profiler_output)
|
|
1587
1529
|
self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
|
|
1588
|
-
self._ascend_graph_msadvisor_analyse(job_id)
|
|
1589
1530
|
self._minddata_aicpu_analyse(self._output_path, job_id)
|
|
1590
1531
|
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1591
1532
|
try:
|
|
1533
|
+
ProfilerInfo.set_data_simplification(self._data_simplification)
|
|
1592
1534
|
ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
|
|
1593
1535
|
except RuntimeError as err:
|
|
1594
1536
|
logger.error('Profilier simplify data failed, %s', str(err))
|
|
@@ -1690,7 +1632,7 @@ class Profiler:
|
|
|
1690
1632
|
try:
|
|
1691
1633
|
timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
|
|
1692
1634
|
timeline_generator.init_timeline(pretty=self._pretty_json)
|
|
1693
|
-
timeline_generator.write_timeline(
|
|
1635
|
+
timeline_generator.write_timeline()
|
|
1694
1636
|
timeline_generator.write_timeline_summary()
|
|
1695
1637
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1696
1638
|
logger.warning('Fail to write timeline data: %s', err)
|
|
@@ -1699,15 +1641,13 @@ class Profiler:
|
|
|
1699
1641
|
raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
|
|
1700
1642
|
"data.")
|
|
1701
1643
|
|
|
1702
|
-
def _analyse_step_trace(self,
|
|
1703
|
-
is_gpu_kernel_async_launch_flag=False):
|
|
1644
|
+
def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
|
|
1704
1645
|
"""
|
|
1705
1646
|
Analyse step trace data and save the result.
|
|
1706
1647
|
|
|
1707
1648
|
Args:
|
|
1708
|
-
source_path (str): The directory that contains the step trace original data.
|
|
1709
|
-
framework_parser (FrameworkParser): The framework parse instance.
|
|
1710
1649
|
is_training_mode_flag (bool): Whether in training mode or not.
|
|
1650
|
+
is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
|
|
1711
1651
|
"""
|
|
1712
1652
|
logger.info("Begin to parse step trace.")
|
|
1713
1653
|
# construct output path
|
|
@@ -1738,56 +1678,31 @@ class Profiler:
|
|
|
1738
1678
|
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1739
1679
|
logger.info("The point info is: %s", point_info)
|
|
1740
1680
|
|
|
1741
|
-
return point_info, is_training_mode_flag
|
|
1742
|
-
return {}, is_training_mode_flag
|
|
1743
|
-
|
|
1744
|
-
# whether keep the first step
|
|
1745
|
-
skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
|
|
1746
|
-
# recognize inference or training mode
|
|
1747
|
-
is_training_mode_flag = framework_parser.check_op_name("Gradients")
|
|
1748
|
-
# parser the step trace files and save the result to disk
|
|
1749
|
-
source_path = validate_and_normalize_path(source_path)
|
|
1750
|
-
parser = AscendStepTraceParser(input_dir=source_path,
|
|
1751
|
-
output_file_path=step_trace_intermediate_file_path,
|
|
1752
|
-
skip_first_step=skip_first_step_flag,
|
|
1753
|
-
is_training_mode=is_training_mode_flag)
|
|
1754
|
-
parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
|
|
1755
|
-
parser.parse_and_save()
|
|
1756
|
-
point_info = parser.record_point_info(point_info_file_path)
|
|
1757
|
-
|
|
1758
|
-
# print parser result
|
|
1759
|
-
parser.show()
|
|
1760
|
-
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1761
|
-
logger.info("The point info is: %s", point_info)
|
|
1762
|
-
|
|
1763
|
-
return point_info, is_training_mode_flag
|
|
1764
|
-
|
|
1765
1681
|
def _generate_timeline(self, reduce_op_type):
|
|
1766
1682
|
"""Used for gpu, generate timeline info, write to json format file."""
|
|
1767
1683
|
try:
|
|
1768
1684
|
timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
|
|
1769
1685
|
context.get_context("mode"))
|
|
1770
1686
|
timeline_generator.init_timeline(reduce_op_type)
|
|
1771
|
-
self._timeline_meta = timeline_generator.write_timeline(
|
|
1687
|
+
self._timeline_meta = timeline_generator.write_timeline()
|
|
1772
1688
|
timeline_generator.write_timeline_summary()
|
|
1689
|
+
timeline_generator.parse_fwk_data()
|
|
1690
|
+
timeline_generator.write_fwk_timeline()
|
|
1773
1691
|
return timeline_generator
|
|
1774
1692
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1775
1693
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1776
1694
|
raise RuntimeError('Fail to write timeline data.') from err
|
|
1777
1695
|
|
|
1778
|
-
def _analyse_memory_usage(self
|
|
1696
|
+
def _analyse_memory_usage(self):
|
|
1779
1697
|
"""Analyse memory usage data."""
|
|
1780
1698
|
integrator = Integrator(self._output_path, self._rank_id)
|
|
1781
|
-
|
|
1782
|
-
memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1783
|
-
memory_parser.init_memory_usage_info(aicore_detail_data, points)
|
|
1784
|
-
memory_parser.write_memory_files()
|
|
1699
|
+
integrator.get_aicore_detail_data()
|
|
1785
1700
|
|
|
1786
1701
|
def _get_profiling_job_id(self, offline_path):
|
|
1787
1702
|
"""Get profiling job id, which was generated by ada service.
|
|
1788
1703
|
|
|
1789
1704
|
Returns:
|
|
1790
|
-
str, profiling job id
|
|
1705
|
+
str, profiling job id, eg: PROF_XXX/device_*.
|
|
1791
1706
|
"""
|
|
1792
1707
|
|
|
1793
1708
|
if offline_path:
|
|
@@ -1816,18 +1731,17 @@ class Profiler:
|
|
|
1816
1731
|
"profiler will ignore this job dir.", job_dir)
|
|
1817
1732
|
continue
|
|
1818
1733
|
|
|
1819
|
-
prof_rank_id
|
|
1734
|
+
prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
|
|
1735
|
+
prof_device_id = ProfilerInfo.get_device_id(prof_dir)
|
|
1820
1736
|
job_start_time = self._parse_job_start_time(prof_dir)
|
|
1821
1737
|
|
|
1822
1738
|
if offline_path:
|
|
1823
|
-
if self._rank_id != prof_rank_id:
|
|
1824
|
-
continue
|
|
1825
1739
|
self._start_time = int(job_start_time)
|
|
1826
1740
|
else:
|
|
1827
1741
|
if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
|
|
1828
|
-
logger.
|
|
1829
|
-
|
|
1830
|
-
|
|
1742
|
+
logger.warning("Find profiling find job path %s, but not current training device id. "
|
|
1743
|
+
"Current training rank id %s, but job path rank id: %s, "
|
|
1744
|
+
"profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
|
|
1831
1745
|
continue
|
|
1832
1746
|
|
|
1833
1747
|
if job_start_time < self._start_time:
|
|
@@ -1936,19 +1850,21 @@ class Profiler:
|
|
|
1936
1850
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1937
1851
|
else:
|
|
1938
1852
|
output_path = kwargs.pop("output_path")
|
|
1853
|
+
if not isinstance(output_path, str):
|
|
1854
|
+
logger.warning(
|
|
1855
|
+
f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
|
|
1856
|
+
output_path = "data"
|
|
1939
1857
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1940
1858
|
|
|
1941
1859
|
self._output_path = os.path.join(self._output_path, "profiler")
|
|
1942
1860
|
if not os.path.exists(self._output_path):
|
|
1943
|
-
os.makedirs(self._output_path, exist_ok=True)
|
|
1944
|
-
os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1861
|
+
os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1945
1862
|
else:
|
|
1946
1863
|
logger.warning("The target dir already exists. "
|
|
1947
1864
|
"There may be some old profiling data, and they will be rewritten in the end.")
|
|
1948
1865
|
self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
|
|
1949
1866
|
if not os.path.exists(self._framework_path):
|
|
1950
|
-
os.makedirs(self._framework_path, exist_ok=True)
|
|
1951
|
-
os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1867
|
+
os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1952
1868
|
|
|
1953
1869
|
def _parser_kwargs(self, kwargs):
|
|
1954
1870
|
"""Parse kwargs vale."""
|
|
@@ -1969,11 +1885,11 @@ class Profiler:
|
|
|
1969
1885
|
f"but got type {type(self._op_time)}, it will be set to True.")
|
|
1970
1886
|
self._op_time = True
|
|
1971
1887
|
|
|
1972
|
-
self._data_process = kwargs.pop("data_process",
|
|
1888
|
+
self._data_process = kwargs.pop("data_process", False)
|
|
1973
1889
|
if not isinstance(self._data_process, bool):
|
|
1974
1890
|
logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
|
|
1975
|
-
f"but got type {type(self._data_process)}, it will be set to
|
|
1976
|
-
self._data_process =
|
|
1891
|
+
f"but got type {type(self._data_process)}, it will be set to False.")
|
|
1892
|
+
self._data_process = False
|
|
1977
1893
|
|
|
1978
1894
|
timeline_limit = kwargs.pop("timeline_limit", 500)
|
|
1979
1895
|
if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
|
|
@@ -1985,70 +1901,22 @@ class Profiler:
|
|
|
1985
1901
|
"[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
|
|
1986
1902
|
timeline_limit = 500
|
|
1987
1903
|
self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
|
|
1988
|
-
self._profile_framework = kwargs.pop("profile_framework",
|
|
1989
|
-
if self._profile_framework not in ["
|
|
1990
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of [
|
|
1991
|
-
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to
|
|
1992
|
-
self._profile_framework =
|
|
1993
|
-
if not isinstance(self._data_simplification, bool):
|
|
1994
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
1995
|
-
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
1996
|
-
self._data_simplification = True
|
|
1904
|
+
self._profile_framework = kwargs.pop("profile_framework", None)
|
|
1905
|
+
if self._profile_framework not in ["time", "all", None]:
|
|
1906
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
|
|
1907
|
+
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
|
|
1908
|
+
self._profile_framework = None
|
|
1997
1909
|
|
|
1998
1910
|
if not isinstance(self._data_simplification, bool):
|
|
1999
1911
|
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
2000
1912
|
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
2001
1913
|
self._data_simplification = True
|
|
2002
1914
|
|
|
2003
|
-
self.
|
|
2004
|
-
if not isinstance(self.
|
|
2005
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter
|
|
2006
|
-
f"type {type(self.
|
|
2007
|
-
self.
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
|
|
2012
|
-
"""
|
|
2013
|
-
logger.info("Profiling HostInfo start.")
|
|
2014
|
-
host_dir = os.path.join(self._output_path, 'host_info')
|
|
2015
|
-
host_dir = validate_and_normalize_path(host_dir)
|
|
2016
|
-
if not os.path.exists(host_dir):
|
|
2017
|
-
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
2018
|
-
return
|
|
2019
|
-
csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
|
|
2020
|
-
json_file_name = 'timeline_' + str(self._rank_id) + '.json'
|
|
2021
|
-
memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
|
|
2022
|
-
dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
|
|
2023
|
-
host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
|
|
2024
|
-
timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
|
|
2025
|
-
memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
|
|
2026
|
-
dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
|
|
2027
|
-
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
2028
|
-
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
2029
|
-
logger.info("Profile HostInfo finished.")
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
def _offline_parse(offline_path):
|
|
2033
|
-
"""Parse data in abnormal scenario, only support for host_info at present."""
|
|
2034
|
-
logger.info("Profiling HostInfo offline start.")
|
|
2035
|
-
host_dir = os.path.join(offline_path, 'profiler', 'host_info')
|
|
2036
|
-
host_dir = validate_and_normalize_path(host_dir)
|
|
2037
|
-
if not os.path.exists(host_dir):
|
|
2038
|
-
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
2039
|
-
return
|
|
2040
|
-
files = os.listdir(host_dir)
|
|
2041
|
-
for file in files:
|
|
2042
|
-
if not file.startswith("host_info_") or not file.endswith(".csv"):
|
|
2043
|
-
continue
|
|
2044
|
-
rank_id = file.split('_')[-1].split('.')[0]
|
|
2045
|
-
if not rank_id.isdigit():
|
|
2046
|
-
logger.info("Cannot get rank_id from file: %s, skip it", file)
|
|
2047
|
-
return
|
|
2048
|
-
host_info_file = os.path.join(host_dir, file)
|
|
2049
|
-
timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
|
|
2050
|
-
memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
|
|
2051
|
-
dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
|
|
2052
|
-
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
2053
|
-
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
2054
|
-
logger.info("Profile HostInfo offline finished.")
|
|
1915
|
+
self._with_stack = kwargs.pop("with_stack", False)
|
|
1916
|
+
if not isinstance(self._with_stack, bool):
|
|
1917
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
|
|
1918
|
+
f"type {type(self._with_stack)}, it will be set to False.")
|
|
1919
|
+
self._with_stack = False
|
|
1920
|
+
if self._with_stack and self._profile_framework not in ["time", "all"]:
|
|
1921
|
+
logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
|
|
1922
|
+
self._with_stack = False
|