mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (287) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +3 -1
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +50 -9
  7. mindspore/_extends/parse/compile_config.py +41 -0
  8. mindspore/_extends/parse/parser.py +9 -7
  9. mindspore/_extends/parse/standard_method.py +52 -14
  10. mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
  11. mindspore/amp.py +24 -10
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/common/__init__.py +6 -4
  18. mindspore/common/_pijit_context.py +190 -0
  19. mindspore/common/_register_for_tensor.py +2 -1
  20. mindspore/common/_tensor_overload.py +139 -0
  21. mindspore/common/api.py +102 -87
  22. mindspore/common/dump.py +5 -6
  23. mindspore/common/generator.py +1 -7
  24. mindspore/common/hook_handle.py +14 -26
  25. mindspore/common/initializer.py +51 -15
  26. mindspore/common/mindir_util.py +2 -2
  27. mindspore/common/parameter.py +62 -15
  28. mindspore/common/recompute.py +39 -9
  29. mindspore/common/sparse_tensor.py +7 -3
  30. mindspore/common/tensor.py +183 -37
  31. mindspore/communication/__init__.py +1 -1
  32. mindspore/communication/_comm_helper.py +38 -3
  33. mindspore/communication/comm_func.py +315 -60
  34. mindspore/communication/management.py +14 -14
  35. mindspore/context.py +132 -22
  36. mindspore/dataset/__init__.py +1 -1
  37. mindspore/dataset/audio/__init__.py +1 -1
  38. mindspore/dataset/core/config.py +7 -0
  39. mindspore/dataset/core/validator_helpers.py +7 -0
  40. mindspore/dataset/engine/cache_client.py +1 -1
  41. mindspore/dataset/engine/datasets.py +72 -44
  42. mindspore/dataset/engine/datasets_audio.py +7 -7
  43. mindspore/dataset/engine/datasets_standard_format.py +53 -3
  44. mindspore/dataset/engine/datasets_text.py +20 -20
  45. mindspore/dataset/engine/datasets_user_defined.py +174 -104
  46. mindspore/dataset/engine/datasets_vision.py +33 -33
  47. mindspore/dataset/engine/iterators.py +29 -0
  48. mindspore/dataset/engine/obs/util.py +7 -0
  49. mindspore/dataset/engine/queue.py +114 -60
  50. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  51. mindspore/dataset/engine/validators.py +34 -14
  52. mindspore/dataset/text/__init__.py +1 -4
  53. mindspore/dataset/transforms/__init__.py +0 -3
  54. mindspore/dataset/utils/line_reader.py +2 -0
  55. mindspore/dataset/vision/__init__.py +1 -4
  56. mindspore/dataset/vision/utils.py +1 -1
  57. mindspore/dataset/vision/validators.py +2 -1
  58. mindspore/dnnl.dll +0 -0
  59. mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
  60. mindspore/experimental/es/embedding_service.py +883 -0
  61. mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
  62. mindspore/experimental/llm_boost/__init__.py +21 -0
  63. mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
  64. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  65. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  66. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  67. mindspore/experimental/llm_boost/register.py +129 -0
  68. mindspore/experimental/llm_boost/utils.py +31 -0
  69. mindspore/experimental/optim/adamw.py +85 -0
  70. mindspore/experimental/optim/optimizer.py +3 -0
  71. mindspore/hal/__init__.py +3 -3
  72. mindspore/hal/contiguous_tensors_handle.py +175 -0
  73. mindspore/hal/stream.py +18 -0
  74. mindspore/include/api/model_group.h +13 -1
  75. mindspore/include/api/types.h +10 -10
  76. mindspore/include/dataset/config.h +2 -2
  77. mindspore/include/dataset/constants.h +2 -2
  78. mindspore/include/dataset/execute.h +2 -2
  79. mindspore/include/dataset/vision.h +4 -0
  80. mindspore/jpeg62.dll +0 -0
  81. mindspore/log.py +1 -1
  82. mindspore/mindrecord/filewriter.py +68 -51
  83. mindspore/mindspore_backend.dll +0 -0
  84. mindspore/mindspore_common.dll +0 -0
  85. mindspore/mindspore_core.dll +0 -0
  86. mindspore/mindspore_glog.dll +0 -0
  87. mindspore/mindspore_np_dtype.dll +0 -0
  88. mindspore/mindspore_ops.dll +0 -0
  89. mindspore/mint/__init__.py +983 -46
  90. mindspore/mint/distributed/__init__.py +31 -0
  91. mindspore/mint/distributed/distributed.py +254 -0
  92. mindspore/mint/nn/__init__.py +268 -23
  93. mindspore/mint/nn/functional.py +125 -19
  94. mindspore/mint/nn/layer/__init__.py +39 -0
  95. mindspore/mint/nn/layer/activation.py +133 -0
  96. mindspore/mint/nn/layer/normalization.py +477 -0
  97. mindspore/mint/nn/layer/pooling.py +110 -0
  98. mindspore/mint/optim/adamw.py +26 -13
  99. mindspore/mint/special/__init__.py +63 -0
  100. mindspore/multiprocessing/__init__.py +2 -1
  101. mindspore/nn/__init__.py +0 -1
  102. mindspore/nn/cell.py +276 -96
  103. mindspore/nn/layer/activation.py +211 -44
  104. mindspore/nn/layer/basic.py +137 -10
  105. mindspore/nn/layer/embedding.py +137 -2
  106. mindspore/nn/layer/normalization.py +101 -5
  107. mindspore/nn/layer/padding.py +34 -48
  108. mindspore/nn/layer/pooling.py +161 -7
  109. mindspore/nn/layer/transformer.py +3 -3
  110. mindspore/nn/loss/__init__.py +2 -2
  111. mindspore/nn/loss/loss.py +84 -6
  112. mindspore/nn/optim/__init__.py +2 -1
  113. mindspore/nn/optim/adadelta.py +1 -1
  114. mindspore/nn/optim/adam.py +1 -1
  115. mindspore/nn/optim/lamb.py +1 -1
  116. mindspore/nn/optim/tft_wrapper.py +124 -0
  117. mindspore/nn/wrap/cell_wrapper.py +12 -23
  118. mindspore/nn/wrap/grad_reducer.py +5 -5
  119. mindspore/nn/wrap/loss_scale.py +17 -3
  120. mindspore/numpy/__init__.py +1 -1
  121. mindspore/numpy/array_creations.py +65 -68
  122. mindspore/numpy/array_ops.py +64 -60
  123. mindspore/numpy/fft.py +610 -75
  124. mindspore/numpy/logic_ops.py +11 -10
  125. mindspore/numpy/math_ops.py +85 -84
  126. mindspore/numpy/utils_const.py +4 -4
  127. mindspore/opencv_core452.dll +0 -0
  128. mindspore/opencv_imgcodecs452.dll +0 -0
  129. mindspore/opencv_imgproc452.dll +0 -0
  130. mindspore/ops/__init__.py +6 -4
  131. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  132. mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
  133. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
  134. mindspore/ops/_vmap/vmap_array_ops.py +2 -4
  135. mindspore/ops/_vmap/vmap_math_ops.py +17 -1
  136. mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
  137. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
  138. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
  139. mindspore/ops/auto_generate/gen_extend_func.py +767 -13
  140. mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
  141. mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
  142. mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
  143. mindspore/ops/composite/base.py +85 -48
  144. mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
  145. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
  146. mindspore/ops/function/__init__.py +22 -0
  147. mindspore/ops/function/array_func.py +492 -153
  148. mindspore/ops/function/debug_func.py +113 -1
  149. mindspore/ops/function/fft_func.py +15 -2
  150. mindspore/ops/function/grad/grad_func.py +3 -2
  151. mindspore/ops/function/math_func.py +564 -207
  152. mindspore/ops/function/nn_func.py +817 -383
  153. mindspore/ops/function/other_func.py +3 -2
  154. mindspore/ops/function/random_func.py +402 -12
  155. mindspore/ops/function/reshard_func.py +13 -11
  156. mindspore/ops/function/sparse_unary_func.py +1 -1
  157. mindspore/ops/function/vmap_func.py +3 -2
  158. mindspore/ops/functional.py +24 -14
  159. mindspore/ops/op_info_register.py +3 -3
  160. mindspore/ops/operations/__init__.py +7 -2
  161. mindspore/ops/operations/_grad_ops.py +2 -76
  162. mindspore/ops/operations/_infer_ops.py +1 -1
  163. mindspore/ops/operations/_inner_ops.py +71 -94
  164. mindspore/ops/operations/array_ops.py +14 -146
  165. mindspore/ops/operations/comm_ops.py +63 -53
  166. mindspore/ops/operations/custom_ops.py +83 -19
  167. mindspore/ops/operations/debug_ops.py +42 -10
  168. mindspore/ops/operations/manually_defined/_inner.py +12 -0
  169. mindspore/ops/operations/manually_defined/ops_def.py +273 -20
  170. mindspore/ops/operations/math_ops.py +12 -223
  171. mindspore/ops/operations/nn_ops.py +20 -114
  172. mindspore/ops/operations/other_ops.py +7 -4
  173. mindspore/ops/operations/random_ops.py +46 -1
  174. mindspore/ops/primitive.py +18 -6
  175. mindspore/ops_generate/arg_dtype_cast.py +2 -0
  176. mindspore/ops_generate/gen_aclnn_implement.py +11 -11
  177. mindspore/ops_generate/gen_constants.py +36 -0
  178. mindspore/ops_generate/gen_ops.py +67 -52
  179. mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
  180. mindspore/ops_generate/gen_pyboost_func.py +131 -47
  181. mindspore/ops_generate/op_proto.py +10 -3
  182. mindspore/ops_generate/pyboost_utils.py +14 -1
  183. mindspore/ops_generate/template.py +43 -21
  184. mindspore/parallel/__init__.py +3 -1
  185. mindspore/parallel/_auto_parallel_context.py +31 -9
  186. mindspore/parallel/_cell_wrapper.py +85 -0
  187. mindspore/parallel/_parallel_serialization.py +47 -19
  188. mindspore/parallel/_tensor.py +127 -13
  189. mindspore/parallel/_utils.py +53 -22
  190. mindspore/parallel/algo_parameter_config.py +5 -5
  191. mindspore/parallel/checkpoint_transform.py +46 -39
  192. mindspore/parallel/cluster/process_entity/__init__.py +1 -1
  193. mindspore/parallel/cluster/process_entity/_api.py +31 -23
  194. mindspore/parallel/cluster/process_entity/_utils.py +2 -27
  195. mindspore/parallel/parameter_broadcast.py +3 -4
  196. mindspore/parallel/shard.py +162 -31
  197. mindspore/parallel/transform_safetensors.py +1146 -0
  198. mindspore/profiler/__init__.py +2 -1
  199. mindspore/profiler/common/constant.py +29 -0
  200. mindspore/profiler/common/registry.py +47 -0
  201. mindspore/profiler/common/util.py +28 -0
  202. mindspore/profiler/dynamic_profiler.py +694 -0
  203. mindspore/profiler/envprofiling.py +17 -19
  204. mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
  205. mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
  206. mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
  207. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
  208. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
  209. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
  210. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  211. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
  212. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
  213. mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
  214. mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
  215. mindspore/profiler/parser/base_timeline_generator.py +19 -25
  216. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  217. mindspore/profiler/parser/framework_parser.py +1 -391
  218. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  219. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  220. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  221. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  222. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  223. mindspore/profiler/parser/profiler_info.py +78 -6
  224. mindspore/profiler/profiler.py +153 -0
  225. mindspore/profiler/profiling.py +285 -413
  226. mindspore/rewrite/__init__.py +1 -2
  227. mindspore/rewrite/common/namespace.py +4 -4
  228. mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
  229. mindspore/run_check/_check_version.py +39 -104
  230. mindspore/safeguard/rewrite_obfuscation.py +591 -247
  231. mindspore/swresample-4.dll +0 -0
  232. mindspore/swscale-6.dll +0 -0
  233. mindspore/tinyxml2.dll +0 -0
  234. mindspore/train/__init__.py +4 -3
  235. mindspore/train/_utils.py +105 -19
  236. mindspore/train/amp.py +171 -53
  237. mindspore/train/callback/__init__.py +2 -2
  238. mindspore/train/callback/_callback.py +4 -4
  239. mindspore/train/callback/_checkpoint.py +97 -31
  240. mindspore/train/callback/_cluster_monitor.py +1 -1
  241. mindspore/train/callback/_flops_collector.py +1 -0
  242. mindspore/train/callback/_loss_monitor.py +3 -3
  243. mindspore/train/callback/_on_request_exit.py +145 -31
  244. mindspore/train/callback/_summary_collector.py +5 -5
  245. mindspore/train/callback/_tft_register.py +375 -0
  246. mindspore/train/dataset_helper.py +15 -3
  247. mindspore/train/metrics/metric.py +3 -3
  248. mindspore/train/metrics/roc.py +4 -4
  249. mindspore/train/mind_ir_pb2.py +44 -39
  250. mindspore/train/model.py +154 -58
  251. mindspore/train/serialization.py +342 -128
  252. mindspore/turbojpeg.dll +0 -0
  253. mindspore/utils/__init__.py +21 -0
  254. mindspore/utils/utils.py +60 -0
  255. mindspore/version.py +1 -1
  256. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
  257. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +260 -254
  258. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +1 -1
  259. mindspore/include/c_api/ms/abstract.h +0 -67
  260. mindspore/include/c_api/ms/attribute.h +0 -197
  261. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  262. mindspore/include/c_api/ms/base/macros.h +0 -32
  263. mindspore/include/c_api/ms/base/status.h +0 -33
  264. mindspore/include/c_api/ms/base/types.h +0 -283
  265. mindspore/include/c_api/ms/context.h +0 -102
  266. mindspore/include/c_api/ms/graph.h +0 -160
  267. mindspore/include/c_api/ms/node.h +0 -606
  268. mindspore/include/c_api/ms/tensor.h +0 -161
  269. mindspore/include/c_api/ms/value.h +0 -84
  270. mindspore/mindspore_shared_lib.dll +0 -0
  271. mindspore/nn/extend/basic.py +0 -140
  272. mindspore/nn/extend/embedding.py +0 -143
  273. mindspore/nn/extend/layer/normalization.py +0 -109
  274. mindspore/nn/extend/pooling.py +0 -117
  275. mindspore/nn/layer/embedding_service.py +0 -531
  276. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  277. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  278. mindspore/ops/extend/__init__.py +0 -53
  279. mindspore/ops/extend/array_func.py +0 -218
  280. mindspore/ops/extend/math_func.py +0 -76
  281. mindspore/ops/extend/nn_func.py +0 -308
  282. mindspore/ops/silent_check.py +0 -162
  283. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  284. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  285. mindspore/train/callback/_mindio_ttp.py +0 -443
  286. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
  287. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
@@ -14,19 +14,16 @@
14
14
  # ============================================================================
15
15
  """Profiling api file."""
16
16
  import os
17
- import re
18
- import shutil
19
17
  import stat
20
18
  import time
21
19
  import json
22
20
  from json import JSONDecodeError
23
21
  import glob
24
- import subprocess
25
- import csv
26
22
  import socket
23
+ import multiprocessing
27
24
  from enum import Enum
28
- from multiprocessing import Process
29
25
  from typing import List
26
+ from sys import getsizeof
30
27
  import numpy as np
31
28
 
32
29
  from mindspore import log as logger, context
@@ -34,7 +31,7 @@ from mindspore.context import get_auto_parallel_context
34
31
  from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
35
32
  import mindspore._c_expression as c_expression
36
33
  import mindspore._c_dataengine as cde
37
- from mindspore._c_expression import _framework_profiler_enable_mi
34
+ from mindspore._c_expression import _framework_profiler_enable_mi, _framework_profiler_disable_mi
38
35
  from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
39
36
  ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
40
37
  from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
@@ -47,13 +44,11 @@ from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
47
44
  from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
48
45
  from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
49
46
  from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
50
- from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
51
47
  from mindspore.profiler.parser.minddata_parser import MinddataParser
52
48
  from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
53
49
  from mindspore.profiler.parser.minddata_pipeline_parser import \
54
50
  MinddataPipelineParser
55
- from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
56
- from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
51
+ from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
57
52
  from mindspore.profiler.parser.profiler_info import ProfilerInfo
58
53
  from mindspore.common.api import _pynative_executor
59
54
  from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
@@ -67,6 +62,11 @@ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
67
62
  from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
68
63
  from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
69
64
  from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
65
+ from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
66
+ from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
67
+ from mindspore.profiler.parser.ascend_analysis.constant import Constant
68
+ from mindspore.profiler.common.util import timeit
69
+
70
70
 
71
71
  INIT_OP_NAME = 'Default/InitDataSetQueue'
72
72
 
@@ -105,7 +105,7 @@ class DeviceSupportParam(Enum):
105
105
  ASCEND = [
106
106
  'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
107
107
  'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
108
- 'ascend_job_id', 'profile_framework', 'host_stack', 'profiler_level', 'data_simplification'
108
+ 'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
109
109
  ]
110
110
 
111
111
 
@@ -114,7 +114,6 @@ ALWAYS_VALID_PARAM = [
114
114
  'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
115
115
  ]
116
116
 
117
-
118
117
  ANALYSIS_ASYNC_MODE = 'async'
119
118
  ANALYSIS_SYNC_MODE = 'sync'
120
119
  DEFAULT_MODEL_ID = 4294967295
@@ -164,147 +163,6 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
164
163
  logger.warning("Can not map the start time for item: %s.", row)
165
164
 
166
165
 
167
- def _calculate_dataset_execution_time(input_file, output_file):
168
- r"""
169
- Parse the host info into timeline file, so as to show on UI.
170
-
171
- Args:
172
- input_file: the original host_info file, in csv format.
173
- output_file: the output file, in csv format.
174
- """
175
- input_file = validate_and_normalize_path(input_file)
176
- # execution_time_map is used to store the ExecutionCalculator for each stage.
177
- execution_time_map = {}
178
- # ts_map is used to store the start time of each event_stage_tid_pid.
179
- ts_map = {}
180
- with open(input_file, 'r') as f:
181
- for row in csv.DictReader(f):
182
- try:
183
- module_name = row['module_name']
184
- if module_name != 'Dataset':
185
- continue
186
- _calculate_dataset_item(row, execution_time_map, ts_map)
187
- except KeyError as e:
188
- logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
189
- continue
190
- if ts_map:
191
- logger.warning("Only start time is record for these items:")
192
- for k, v in ts_map.items():
193
- logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
194
- output_file = validate_and_normalize_path(output_file)
195
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
196
- modes = stat.S_IWUSR | stat.S_IRUSR
197
- with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
198
- csv_writer = csv.writer(f)
199
- csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
200
- for _, v in execution_time_map.items():
201
- csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
202
- os.chmod(output_file, modes)
203
- logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
204
-
205
-
206
- def _extract_timeline_item(row, time_line, ts_map):
207
- """Process one row, try to extract a timeline item."""
208
- start_end = row['start_end']
209
- event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
210
- # map start and end, put the mapped event into timeline.
211
- if start_end == '1' and event_stage_tid_pid in ts_map:
212
- title = row['event'] + '::' + row['stage']
213
- event = {'name': title, 'cat': row['module_name']}
214
- ts_end = int(row['time_stamp(us)'])
215
- ts = ts_map[event_stage_tid_pid]
216
- event['ts'] = ts
217
- event['dur'] = ts_end - ts
218
- event['ph'] = 'X'
219
- event['pid'] = row['pid']
220
- event['tid'] = row['tid']
221
- event['args'] = {'parent_pid': row['parent_pid']}
222
- time_line.append(event)
223
- del ts_map[event_stage_tid_pid]
224
- elif start_end == '0':
225
- ts = int(row['time_stamp(us)'])
226
- ts_map[event_stage_tid_pid] = ts
227
- # Put the instance event into timeline.
228
- elif start_end == '2':
229
- title = row['event'] + '::' + row['stage']
230
- event = {
231
- 'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
232
- 'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
233
- }
234
- time_line.append(event)
235
- else:
236
- logger.warning("Can not map the start time for item: %s.", row)
237
-
238
-
239
- def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
240
- r"""
241
- Parse the host info into timeline file, so as to show on UI.
242
-
243
- Args:
244
- input_file: the original host_info file, in csv format.
245
- output_timeline_file: the output timeline file, in json format.
246
- output_memory_file: the output memory_usage file, in csv format.
247
- is_develop_user: some data only shown to develop users, other users no need to analyse it.
248
- """
249
- input_file = validate_and_normalize_path(input_file)
250
- time_line = []
251
- # ts_map is used to store the start time of each event_stage_tid_pid
252
- ts_map = {}
253
- memory_header = [
254
- 'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
255
- 'memory_usage(kB)', 'time_stamp(us)'
256
- ]
257
- memory_info = []
258
- with open(input_file, 'r') as f:
259
- for row in csv.DictReader(f):
260
- try:
261
- level = row['level']
262
- if level == '0' and not is_develop_user:
263
- continue
264
- if int(row['time_stamp(us)']) > 0:
265
- _extract_timeline_item(row, time_line, ts_map)
266
- if int(row['memory_usage(kB)']) > 0:
267
- memory_info.append(row)
268
- except KeyError as e:
269
- logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
270
- continue
271
- if memory_info:
272
- with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
273
- csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
274
- csv_writer.writeheader()
275
- for item in memory_info:
276
- csv_writer.writerow(item)
277
- os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
278
- else:
279
- logger.warning("No memory_usage is record in file: %s", input_file)
280
-
281
- if ts_map:
282
- logger.warning("Only start time is record for these items:")
283
- for k, v in ts_map.items():
284
- logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
285
- last_dash = k.rfind('_')
286
- if last_dash == -1:
287
- logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
288
- continue
289
- second_last_dash = k.rfind('_', 0, last_dash - 1)
290
- if second_last_dash == -1:
291
- logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
292
- continue
293
- pid = k[last_dash + 1:]
294
- tid = k[second_last_dash + 1: last_dash]
295
- title = k[:second_last_dash]
296
- unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
297
- time_line.append(unfinished_timeline)
298
-
299
- if time_line:
300
- timeline_file = validate_and_normalize_path(output_timeline_file)
301
- with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
302
- json.dump(time_line, json_file)
303
- os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
304
- else:
305
- logger.warning("No valid time_stamp is record in file: %s", input_file)
306
-
307
-
308
166
  def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
309
167
  """Executing the msprof export mode."""
310
168
  try:
@@ -351,20 +209,21 @@ class Profiler:
351
209
  output_path (str, optional): Output data path. Default: ``"./data"`` .
352
210
  profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
353
211
 
354
- - Profiler.Level0: Leanest level of profiling data collection, collects information about the elapsed
212
+ - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
355
213
  time of the computational operators on the NPU and communication large operator information.
356
- - Profiler.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and communication
357
- mini operator information based on Level0.
358
- - Profiler.Level2: Collect GE and Runtime information in CANN layer on top of Level1
214
+ - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
215
+ communication mini operator information based on Level0.
216
+ - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
359
217
 
360
218
  op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
361
219
  profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
362
220
  a multi devices training,collect when True. Setting this parameter has no effect during single card
363
221
  training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
364
222
  profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
365
- When using this parameter, `op_time` must be set to True. Default: ``False`` .
223
+ When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
224
+ compilation level is O2 requires collecting from the first step. Default: ``False`` .
366
225
  parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
367
- Default value: ``True`` .
226
+ Default value: ``False`` .
368
227
  start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
369
228
  data collection based on conditions. Default: ``True`` .
370
229
  aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
@@ -380,11 +239,12 @@ class Profiler:
380
239
  - 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
381
240
  - 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
382
241
  - 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
242
+ This function only support Atlas A2 training series products.
383
243
 
384
244
  l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
385
245
  Default: ``False`` .
386
- hbm_ddr (bool, optional): (Ascend only) Whether to collect HBM/DDR read and write rate data, collect when True.
387
- Default: ``False`` .
246
+ hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
247
+ collect when True. Default: ``False`` .
388
248
  pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
389
249
  Default: ``False`` .
390
250
  sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
@@ -396,25 +256,32 @@ class Profiler:
396
256
  - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
397
257
  This method can reduce the impact of adding profiler on overall training time.
398
258
  data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
399
- Default value: ``True`` .
259
+ Default value: ``False`` .
400
260
  timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
401
261
  When using this parameter, `op_time` must be set to True. Default value: ``500`` .
402
262
  profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
403
- ["all", "time", "memory", None], When is not set to None, a subdirectory host_info will be generated in the
404
- specified profiler directory, which stores the collected memory and time files on the Host side.
405
- Default: "all".
263
+ ["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
264
+ parameter, the op_time parameter must be enabled.
265
+ Default: None.
406
266
 
407
- - "all": Record both host timestamp and host memory usage.
408
- - "time": Only record host timestamp.
409
- - "memory": Only record host memory usage.
267
+ - "all": Record host timestamp.
268
+ - "time": The same as "all".
410
269
  - None: Not record host information.
411
270
  data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
412
271
  If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
413
272
  directory are retained to save disk space.
414
273
  Default value: ``True`` .
415
- host_stack (bool, optional): (Ascend) Whether to collect frame host call stack data.
416
- Default value: ``True`` .
417
-
274
+ with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
275
+ data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
276
+ profile_framework parameters must be enabled. Default value: ``False`` .
277
+ analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
278
+ data. This parameter is experimental parameter and does not need to be set by the user.
279
+ Default value: ``False`` .
280
+ rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
281
+ experimental parameter and does not need to be set by the user. Default value: ``0`` .
282
+ env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
283
+ This parameter is experimental parameter and does not need to be set by the user.
284
+ Default value: ``False`` .
418
285
  Raises:
419
286
  RuntimeError: When the version of CANN does not match the version of MindSpore,
420
287
  MindSpore cannot parse the generated ascend_job_id directory structure.
@@ -428,6 +295,7 @@ class Profiler:
428
295
  >>> from mindspore import nn
429
296
  >>> import mindspore.dataset as ds
430
297
  >>> from mindspore import Profiler
298
+ >>> from mindspore.profiler import ProfilerLevel
431
299
  >>>
432
300
  >>> class Net(nn.Cell):
433
301
  ... def __init__(self):
@@ -453,7 +321,7 @@ class Profiler:
453
321
  ...
454
322
  ... # Init Profiler
455
323
  ... # Note that the Profiler should be initialized before model.train
456
- ... profiler = Profiler()
324
+ ... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
457
325
  ...
458
326
  ... # Train Model
459
327
  ... net = Net()
@@ -462,11 +330,6 @@ class Profiler:
462
330
  ... # Profiler end
463
331
  ... profiler.analyse()
464
332
  """
465
-
466
- _hwts_output_filename_target = "output_format_data_hwts_"
467
- _opcompute_output_filename_target = "output_op_compute_time_"
468
- _aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
469
- _has_analysed = False
470
333
  _has_initialized = False
471
334
  _ascend_profiling_options = ""
472
335
  _ascend_job_id = ""
@@ -492,6 +355,9 @@ class Profiler:
492
355
  self._rank_size = 1
493
356
  self._rank_id = 0
494
357
  self._ascend_profiler = None
358
+ self.metadata = {}
359
+ self.max_str_len = 4096
360
+ self.max_meta_size = 50 * 1024
495
361
  self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
496
362
  self._parallel_strategy = True
497
363
  self._model_iteration_dict = None
@@ -512,13 +378,13 @@ class Profiler:
512
378
  self._sync_enable = True
513
379
  self._stop_time = 0
514
380
  self._dynamic_status = False
515
- self._profile_framework = "all"
381
+ self._profile_framework = None
516
382
  self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
517
383
  self.profiler_level = None
518
384
  self._pretty_json = False
519
385
  self._analyse_only = kwargs.get("analyse_only", False)
520
386
  self._data_simplification = kwargs.get("data_simplification", True)
521
- self._host_stack = True
387
+ self._with_stack = False
522
388
  if self._msprof_enable:
523
389
  return
524
390
  self._start_time = int(time.time() * 1e6) # us
@@ -540,20 +406,6 @@ class Profiler:
540
406
  if self.start_profile:
541
407
  self.start()
542
408
 
543
- @staticmethod
544
- def _get_prof_rank(prof_path: str):
545
- """get rank id."""
546
- sub_dirs = os.listdir(os.path.realpath(prof_path))
547
- info_json_path = ""
548
- for sub_dir in sub_dirs:
549
- if sub_dir.startswith("device_"):
550
- device_id = sub_dir.split("_")[-1]
551
- info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
552
- if not os.path.exists(info_json_path):
553
- return -1
554
- rank_id, _ = Profiler._parse_info_json(info_json_path)
555
- return rank_id
556
-
557
409
  @staticmethod
558
410
  def _check_output_path(output_path):
559
411
  """Checking path validity."""
@@ -602,30 +454,8 @@ class Profiler:
602
454
  logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
603
455
  return None
604
456
 
605
- @staticmethod
606
- def _parse_info_json(info_file):
607
- """
608
- Parse info log file, get the rank id and device id of the job.
609
- Args:
610
- input_file (str): The file path of the parse info log file.
611
-
612
- Returns:
613
- rank id, device id
614
- """
615
- with open(info_file, "r") as f:
616
- info_dict = json.load(f)
617
-
618
- rank_id = info_dict.get("rank_id", 0)
619
- dev_info = info_dict.get("DeviceInfo", [])
620
- dev_id = dev_info[0].get("id", -1)
621
-
622
- if int(rank_id) < 0:
623
- rank_id = 0
624
-
625
- return str(rank_id), str(dev_id)
626
-
627
457
  @classmethod
628
- def offline_analyse(cls, path: str, pretty=False, step_list=None):
458
+ def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
629
459
  """
630
460
  Analyze training performance data offline, which is invoked after performance data collection is completed.
631
461
 
@@ -633,37 +463,50 @@ class Profiler:
633
463
  path (str): The profiling data path which need to be analyzed offline.
634
464
  There needs to be a profiler directory in this path.
635
465
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
636
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
637
- By default, all steps will be analyzed.
466
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
467
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
468
+ data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
638
469
 
639
470
  Examples:
640
471
  >>> from mindspore import Profiler
641
472
  >>> Profiler.offline_analyse("./profiling_path")
642
473
  """
643
- profiler_path = os.path.join(path, "profiler")
644
- if not os.path.exists(profiler_path):
645
- raise ProfilerPathErrorException(f'There must be a profiler folder in the data path: {path}.')
646
-
647
- rank_set = set()
648
- sub_dirs = os.listdir(os.path.realpath(profiler_path))
649
- for sub_dir in sub_dirs:
650
- sub_path = os.path.join(profiler_path, sub_dir)
651
- if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z0-9]+", sub_dir):
652
- rank = cls._get_prof_rank(sub_path)
653
- rank_set.add(rank)
654
- if not rank_set:
655
- return
656
-
657
- process_list = []
658
- for rank_id in rank_set:
659
- profiler = cls(analyse_only=True, rank_id=rank_id)
660
- process = Process(target=profiler.analyse,
661
- args=(path, pretty, step_list))
662
- process.start()
663
- process_list.append(process)
664
-
665
- for process in process_list:
666
- process.join()
474
+ real_path = os.path.realpath(path)
475
+ PathManager.check_input_directory_path(real_path)
476
+ profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
477
+ if not isinstance(data_simplification, bool):
478
+ logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
479
+ f"but got type {type(data_simplification)}, it will be set to True.")
480
+ data_simplification = True
481
+ if not profiler_parent_path_list:
482
+ raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
483
+ f'single-device profiler data, or multiple subdirectories each containing '
484
+ f'a "profiler" directory for multi-device profiler data. ')
485
+ # get rank id
486
+ rank_list = []
487
+ for parent_path in profiler_parent_path_list:
488
+ profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
489
+ rank_id = ProfilerInfo.get_rank_id(profiler_path)
490
+ if int(rank_id) < 0:
491
+ logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
492
+ rank_list.append(rank_id)
493
+ # start offline analyse
494
+ if len(profiler_parent_path_list) == 1:
495
+ PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
496
+ profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
497
+ profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
498
+ else:
499
+ # Multiprocess Parsing
500
+ multiprocessing.set_start_method("fork", force=True)
501
+ process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
502
+ pool = multiprocessing.Pool(processes=process_number)
503
+ for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
504
+ PathManager.check_directory_path_writeable(profiler_parent_path)
505
+ profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
506
+ data_simplification=data_simplification)
507
+ pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
508
+ pool.close()
509
+ pool.join()
667
510
 
668
511
  def op_analyse(self, op_name, device_id=None):
669
512
  """
@@ -739,14 +582,38 @@ class Profiler:
739
582
  Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
740
583
  for online mode. Default: ``None``.
741
584
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
742
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
743
- By default, all steps will be analyzed.
585
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
586
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
744
587
  mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
745
588
 
746
589
  - sync: analyse data in current process, it will block the current process.
747
- - async: analyse data in subprocess, it will not the current process.Since the parsing process
590
+ - async: analyse data in subprocess, it will not block the current process. Since the parsing process
748
591
  will take up extra CPU resources, please enable this mode according to the actual resource situation.
749
592
 
593
+ Examples:
594
+ >>> from mindspore.train import Callback
595
+ >>> from mindspore import Profiler
596
+ >>> class StopAtStep(Callback):
597
+ ... def __init__(self, start_step=1, stop_step=5):
598
+ ... super(StopAtStep, self).__init__()
599
+ ... self.start_step = start_step
600
+ ... self.stop_step = stop_step
601
+ ... self.profiler = Profiler(start_profile=False)
602
+ ...
603
+ ... def step_begin(self, run_context):
604
+ ... cb_params = run_context.original_args()
605
+ ... step_num = cb_params.cur_step_num
606
+ ... if step_num == self.start_step:
607
+ ... self.profiler.start()
608
+ ...
609
+ ... def step_end(self, run_context):
610
+ ... cb_params = run_context.original_args()
611
+ ... step_num = cb_params.cur_step_num
612
+ ... if step_num == self.stop_step:
613
+ ... self.profiler.stop()
614
+ ...
615
+ ... def end(self, run_context):
616
+ ... self.profiler.analyse(step_list=[2,3,4], mode="sync")
750
617
  """
751
618
  try:
752
619
  if isinstance(pretty, bool):
@@ -793,11 +660,12 @@ class Profiler:
793
660
 
794
661
  ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
795
662
  if offline_path:
663
+ # Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
664
+ ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
796
665
  ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
797
666
  self._ascend_graph_analyse(offline_path=offline_path)
798
667
  ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
799
668
  ProfilerInfo.save(self._output_path)
800
- _offline_parse(offline_path)
801
669
  return
802
670
  if self._msprof_enable:
803
671
  return
@@ -817,18 +685,16 @@ class Profiler:
817
685
  ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
818
686
  if self._device_target and self._device_target == DeviceTarget.CPU.value:
819
687
  self._cpu_analyse()
688
+ if self._profile_framework:
689
+ logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
690
+ "data.")
820
691
 
821
692
  if self._device_target and self._device_target == DeviceTarget.GPU.value:
822
693
  self._gpu_analyse()
823
694
 
824
695
  elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
825
696
  self._ascend_analyse()
826
- if self._profile_framework:
827
- if self._device_target != DeviceTarget.CPU.value:
828
- self._host_info_analyse()
829
- else:
830
- logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
831
- " directory in the output path.")
697
+
832
698
  logger.info("Profiling: all the data have been analyzed.")
833
699
  ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
834
700
  ProfilerInfo.save(self._output_path)
@@ -895,8 +761,13 @@ class Profiler:
895
761
  self._ascend_graph_start()
896
762
  ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
897
763
  ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
898
- ProfilerInfo.set_system_time(int(c_expression.get_clock_time() * 1e3)) # cast us to ns
899
- _framework_profiler_enable_mi()
764
+ ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
765
+ if context.get_context("mode") == context.GRAPH_MODE:
766
+ jit_config = context.get_jit_config()
767
+ jit_level = jit_config.get("jit_level", "")
768
+ ProfilerInfo.set_jit_level(jit_level)
769
+ if self._profile_framework:
770
+ _framework_profiler_enable_mi()
900
771
 
901
772
  def stop(self):
902
773
  """
@@ -953,12 +824,96 @@ class Profiler:
953
824
  self._ascend_profiler.stop()
954
825
 
955
826
  self._stop_time = int(time.time() * 10000000)
827
+
828
+ if self._profile_framework:
829
+ _framework_profiler_disable_mi()
830
+
956
831
  ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
957
832
  self._init_profiler_info()
958
833
  ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
959
834
  ProfilerInfo.save(self._output_path)
835
+ self._dump_metadata()
960
836
  logger.info("Profiling: stop time: %d", self._stop_time)
961
837
 
838
+ def add_metadata(self, key: str, value: str):
839
+ """
840
+ Report custom metadata key-value pair data.
841
+
842
+ Args:
843
+ key (str): The key to the metadata.
844
+ value (str): The value to the metadata.
845
+
846
+ Examples:
847
+ >>> from mindspore import Profiler
848
+ >>> # Profiler init.
849
+ >>> profiler = Profiler()
850
+ >>> # Call Profiler add_metadata
851
+ >>> profiler.add_metadata("test_key", "test_value")
852
+ >>> # Profiler end
853
+ >>> profiler.analyse()
854
+ """
855
+ if not isinstance(key, str) or not isinstance(value, str):
856
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
857
+ return
858
+ if not self._check_str_valid(key) or not self._check_str_valid(value):
859
+ logger.warning("Invalid input key or value. Skip this metadata.")
860
+ return
861
+ add_size = getsizeof(key) + getsizeof(value)
862
+ if getsizeof(self.metadata) + add_size < self.max_meta_size:
863
+ if key in self.metadata:
864
+ logger.warning(f"{key} is already saved as metadata, override it.")
865
+ self.metadata[key] = value
866
+ else:
867
+ logger.warning("Too many metadata added. Skip this metadata")
868
+
869
+ def add_metadata_json(self, key: str, value: str):
870
+ """
871
+ Report custom metadata key-value pair data with the value as a JSON string data.
872
+
873
+ Args:
874
+ key (str): The key to the metadata.
875
+ value (str): The json str format value to the metadata.
876
+
877
+ Examples:
878
+ >>> import json
879
+ >>> from mindspore import Profiler
880
+ >>> # Profiler init.
881
+ >>> profiler = Profiler()
882
+ >>> # Call Profiler add_metadata_json
883
+ >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
884
+ >>> # Profiler end, metadata will be saved in profiler_metadata.json
885
+ >>> profiler.analyse()
886
+ """
887
+ if not isinstance(key, str) or not isinstance(value, str):
888
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
889
+ return
890
+ if not self._check_str_valid(key) or not self._check_str_valid(value):
891
+ logger.warning("Invalid input key or value. Skip this metadata.")
892
+ return
893
+ add_size = getsizeof(key) + getsizeof(value)
894
+ if getsizeof(self.metadata) + add_size < self.max_meta_size:
895
+ try:
896
+ if key in self.metadata:
897
+ logger.warning(f"{key} is already saved as metadata, override it.")
898
+ self.metadata[key] = json.loads(value)
899
+ except ValueError:
900
+ logger.warning("The metadata value must be json format string. Skip this metadata")
901
+ else:
902
+ logger.warning("Too many metadata added. Skip this metadata")
903
+
904
+ def _dump_metadata(self):
905
+ """Dump metadata to file."""
906
+ if not self.metadata:
907
+ return
908
+ FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
909
+ self.metadata.clear()
910
+
911
+ def _check_str_valid(self, input_str: str):
912
+ """Check str length"""
913
+ if len(input_str) > self.max_str_len:
914
+ return False
915
+ return True
916
+
962
917
  def _set_ascend_job_id(self, ascend_job_id):
963
918
  """Set output_path for offline parsing performance data."""
964
919
  if not ascend_job_id:
@@ -983,7 +938,7 @@ class Profiler:
983
938
  self._profile_communication = options.get('profile_communication')
984
939
  self._op_time = options.get('op_time')
985
940
  self._device_target = context.get_context("device_target").lower()
986
- self._profile_framework = options.get('profile_framework', 'all')
941
+ self._profile_framework = options.get('profile_framework', None)
987
942
  self._profiler_manager = c_expression.ProfilerManager.get_instance()
988
943
  self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
989
944
  if self._data_process:
@@ -1034,32 +989,32 @@ class Profiler:
1034
989
 
1035
990
  def _gpu_profiler_init(self, kwargs):
1036
991
  """Gpu profiler init."""
992
+ self._parse_parameter_for_gpu(kwargs)
1037
993
  # Setup and start MindData Profiling
1038
994
  if self._data_process:
1039
995
  self._md_profiler = cde.GlobalContext.profiling_manager()
1040
996
  self._md_profiler.init()
1041
- self._parse_parameter_for_gpu(kwargs)
1042
997
 
1043
998
  gpu_profiler = c_expression.Profiler
1044
999
  self._gpu_profiler = gpu_profiler.get_instance("GPU")
1045
- self._gpu_profiler.init(self._output_path)
1046
- self._gpu_profiler.sync_enable(self._sync_enable)
1047
1000
  if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
1048
1001
  self._dev_id = str(get_rank())
1049
1002
  os.environ['DEVICE_ID'] = self._dev_id
1050
1003
  self._rank_id = self._dev_id
1004
+ self._gpu_profiler.init(self._output_path, int(self._rank_id))
1005
+ self._gpu_profiler.sync_enable(self._sync_enable)
1051
1006
 
1052
1007
  def _ascend_profiler_init(self, kwargs):
1053
1008
  """Ascend profiler init."""
1009
+ self._parse_parameter_for_ascend(kwargs)
1054
1010
  # Setup and start MindData Profiling
1055
1011
  if self._data_process:
1056
1012
  self._md_profiler = cde.GlobalContext.profiling_manager()
1057
1013
  self._md_profiler.init()
1058
1014
  self._init_time = int(time.time() * 10000000)
1059
1015
  logger.info("Profiling: profiling init time: %d", self._init_time)
1060
- self._parse_parameter_for_ascend(kwargs)
1061
- os.environ['DEVICE_ID'] = self._dev_id
1062
1016
 
1017
+ os.environ['DEVICE_ID'] = self._dev_id
1063
1018
  self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
1064
1019
  # Characters longer than 2048 are ignored, resulting in profiling option resolution errors
1065
1020
  if len(self._ascend_profiling_options) > 2048:
@@ -1075,7 +1030,7 @@ class Profiler:
1075
1030
  data_path = os.path.join(container_path, "data")
1076
1031
  data_path = validate_and_normalize_path(data_path)
1077
1032
  if not os.path.exists(data_path):
1078
- os.makedirs(data_path, exist_ok=True)
1033
+ os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1079
1034
 
1080
1035
  def _construct_profiling_options(self):
1081
1036
  """
@@ -1101,9 +1056,9 @@ class Profiler:
1101
1056
  "op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1102
1057
  "profile_framework": self._profile_framework,
1103
1058
  "profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
1104
- "host_stack": "on" if self._host_stack else "off"
1059
+ "with_stack": "on" if self._with_stack else "off"
1105
1060
  }
1106
-
1061
+ ProfilerInfo.set_profiling_options(profiling_options)
1107
1062
  return profiling_options
1108
1063
 
1109
1064
  def _parse_parameter_for_gpu(self, kwargs):
@@ -1175,11 +1130,11 @@ class Profiler:
1175
1130
  pcie_enable = False
1176
1131
  self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
1177
1132
 
1178
- self._parallel_strategy = kwargs.pop("parallel_strategy", True)
1133
+ self._parallel_strategy = kwargs.pop("parallel_strategy", False)
1179
1134
  if not isinstance(self._parallel_strategy, bool):
1180
1135
  logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
1181
- f"but got type {type(self._parallel_strategy)}, it will be set to True.")
1182
- self._parallel_strategy = True
1136
+ f"but got type {type(self._parallel_strategy)}, it will be set to False.")
1137
+ self._parallel_strategy = False
1183
1138
 
1184
1139
  self.profiler_level = kwargs.pop("profiler_level", None)
1185
1140
  if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
@@ -1381,7 +1336,7 @@ class Profiler:
1381
1336
  finally:
1382
1337
  pass
1383
1338
 
1384
- def _ascend_graph_memory_analyse(self, points):
1339
+ def _ascend_graph_memory_analyse(self):
1385
1340
  """Analyse memory usage info."""
1386
1341
  if not self._profile_memory:
1387
1342
  return
@@ -1390,7 +1345,7 @@ class Profiler:
1390
1345
  "PyNative mode currently.")
1391
1346
  try:
1392
1347
  logger.info("Profiling: analyzing the memory usage info.")
1393
- self._analyse_memory_usage(points)
1348
+ self._analyse_memory_usage()
1394
1349
  except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1395
1350
  logger.warning(err.message)
1396
1351
  finally:
@@ -1408,28 +1363,37 @@ class Profiler:
1408
1363
 
1409
1364
  dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1410
1365
  ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
1411
- os.makedirs(ascend_profiler_output_path, exist_ok=True)
1366
+ PathManager.make_dir_safety(ascend_profiler_output_path)
1412
1367
 
1413
1368
  source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
1414
1369
  target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
1415
- shutil.copy(source_profiler_info_path, target_profiler_info_path)
1370
+ PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
1371
+
1372
+ source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
1373
+ target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
1374
+ PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
1416
1375
 
1417
1376
  source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
1418
1377
  target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
1419
- shutil.copy(source_timeline_path, target_timeline_path)
1378
+ PathManager.copy_file(source_timeline_path, target_timeline_path)
1420
1379
 
1421
1380
  src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
1422
- if os.path.exists(src_op_mem_file):
1423
- dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
1424
- shutil.copy(src_op_mem_file, dst_op_mem_file)
1381
+ dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
1382
+ PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
1425
1383
 
1426
- ms_output_path = os.path.abspath(
1384
+ ms_output_path = os.path.realpath(
1427
1385
  os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1428
1386
  static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
1429
1387
  src_static_op_mem_path = glob.glob(static_op_mem_path)
1430
1388
  if src_static_op_mem_path:
1431
1389
  dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
1432
- shutil.copy(src_static_op_mem_path[0], dst_static_op_mem_file)
1390
+ PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
1391
+
1392
+ src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
1393
+ src_op_statistics_path = glob.glob(src_op_statistics_path)
1394
+ if src_op_statistics_path:
1395
+ dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
1396
+ PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
1433
1397
 
1434
1398
  self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
1435
1399
  self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
@@ -1468,7 +1432,7 @@ class Profiler:
1468
1432
  f"communication_matrix.json")
1469
1433
  communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
1470
1434
 
1471
- analyze_path = os.path.abspath(os.path.join(source_path, os.path.pardir, 'analyze'))
1435
+ analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
1472
1436
  communicate_analyser = AscendCommunicationGenerator(analyze_path)
1473
1437
  communicate_analyser.parse()
1474
1438
  communicate_analyser.write(communication_file_path, communication_matrix_file_path)
@@ -1500,26 +1464,6 @@ class Profiler:
1500
1464
  finally:
1501
1465
  pass
1502
1466
 
1503
- def _ascend_graph_msadvisor_analyse(self, job_id):
1504
- """Call MSAdvisor function."""
1505
- logger.info("MSAdvisor starts running.")
1506
- msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
1507
- try:
1508
- msadvisor.analyse()
1509
- except FileNotFoundError as err:
1510
- logger.warning("MSAdvisor: command not found,"
1511
- "please check if installed ascend-toolkit and set environment path correctly. %s", err)
1512
- except OSError as err:
1513
- logger.warning("Cannot execute binary file: Exec format error. %s", err)
1514
- except subprocess.CalledProcessError:
1515
- logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
1516
- except (ValueError, ProfilerFileNotFoundException) as err:
1517
- logger.warning("MSAdvisor running failed. %s", err)
1518
- finally:
1519
- pass
1520
- if context.get_context("mode") == context.PYNATIVE_MODE:
1521
- logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
1522
-
1523
1467
  def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
1524
1468
  """Get the mapping between framework operator and device kernel."""
1525
1469
  if not kernels:
@@ -1535,8 +1479,6 @@ class Profiler:
1535
1479
  key = name if name.startswith("hcom_") else (name, ts)
1536
1480
  launch_op = kernel_map.get(key)
1537
1481
  if not launch_op:
1538
- if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
1539
- logger.warning(f"Failed to get launch operator for {name}!")
1540
1482
  continue
1541
1483
  launch_ops[index] = launch_op.name
1542
1484
  return launch_ops
@@ -1547,6 +1489,7 @@ class Profiler:
1547
1489
  else:
1548
1490
  MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
1549
1491
 
1492
+ @timeit("Profiler analyse done")
1550
1493
  def _ascend_graph_analyse_inner(self, offline_path=None):
1551
1494
  """Ascend graph mode analyse."""
1552
1495
  job_id = self._get_profiling_job_id(offline_path)
@@ -1558,7 +1501,7 @@ class Profiler:
1558
1501
  source_path = os.path.join(self._output_path, job_id)
1559
1502
  self._minddata_analyse()
1560
1503
  if self._op_time:
1561
- mindstudio_profiler_output = os.path.abspath(
1504
+ mindstudio_profiler_output = os.path.realpath(
1562
1505
  os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1563
1506
  flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
1564
1507
  if not flag:
@@ -1567,14 +1510,17 @@ class Profiler:
1567
1510
  ProfilerInfo.set_export_flag(flag)
1568
1511
  op_summary, op_statistic, steptrace, steptrace_model \
1569
1512
  = _ascend_graph_msprof_analyse(mindstudio_profiler_output)
1513
+ kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
1514
+
1570
1515
  if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
1571
1516
  not isinstance(op_statistic, np.ndarray) and not op_statistic:
1517
+ logger.warning('Op statistic data is empty!')
1572
1518
  return
1573
- kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
1519
+
1574
1520
  launch_ops = self._get_kernel_op_map(op_summary, kernels)
1575
1521
  self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
1576
1522
  graph_ids = np.unique(op_summary['Model ID']).tolist()
1577
- points = self._ascend_fpbp_analyse(op_summary, steptrace)
1523
+ self._ascend_fpbp_analyse(op_summary, steptrace)
1578
1524
  if len(graph_ids) == 1:
1579
1525
  self._ascend_step_trace_analyse(steptrace)
1580
1526
  else:
@@ -1582,13 +1528,13 @@ class Profiler:
1582
1528
  if self._dynamic_status:
1583
1529
  self._ascend_dynamic_net_analyse(op_summary)
1584
1530
  self._ascend_flops_analyse(op_summary, launch_ops)
1585
- self._ascend_graph_memory_analyse(points)
1531
+ self._ascend_graph_memory_analyse()
1586
1532
  self._ascend_ms_analyze(mindstudio_profiler_output)
1587
1533
  self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
1588
- self._ascend_graph_msadvisor_analyse(job_id)
1589
1534
  self._minddata_aicpu_analyse(self._output_path, job_id)
1590
1535
  ProfilerInfo.set_graph_ids(graph_ids)
1591
1536
  try:
1537
+ ProfilerInfo.set_data_simplification(self._data_simplification)
1592
1538
  ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
1593
1539
  except RuntimeError as err:
1594
1540
  logger.error('Profilier simplify data failed, %s', str(err))
@@ -1690,7 +1636,7 @@ class Profiler:
1690
1636
  try:
1691
1637
  timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
1692
1638
  timeline_generator.init_timeline(pretty=self._pretty_json)
1693
- timeline_generator.write_timeline(self._timeline_size_limit_byte)
1639
+ timeline_generator.write_timeline()
1694
1640
  timeline_generator.write_timeline_summary()
1695
1641
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1696
1642
  logger.warning('Fail to write timeline data: %s', err)
@@ -1699,15 +1645,13 @@ class Profiler:
1699
1645
  raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
1700
1646
  "data.")
1701
1647
 
1702
- def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
1703
- is_gpu_kernel_async_launch_flag=False):
1648
+ def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
1704
1649
  """
1705
1650
  Analyse step trace data and save the result.
1706
1651
 
1707
1652
  Args:
1708
- source_path (str): The directory that contains the step trace original data.
1709
- framework_parser (FrameworkParser): The framework parse instance.
1710
1653
  is_training_mode_flag (bool): Whether in training mode or not.
1654
+ is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
1711
1655
  """
1712
1656
  logger.info("Begin to parse step trace.")
1713
1657
  # construct output path
@@ -1738,56 +1682,31 @@ class Profiler:
1738
1682
  logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1739
1683
  logger.info("The point info is: %s", point_info)
1740
1684
 
1741
- return point_info, is_training_mode_flag
1742
- return {}, is_training_mode_flag
1743
-
1744
- # whether keep the first step
1745
- skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
1746
- # recognize inference or training mode
1747
- is_training_mode_flag = framework_parser.check_op_name("Gradients")
1748
- # parser the step trace files and save the result to disk
1749
- source_path = validate_and_normalize_path(source_path)
1750
- parser = AscendStepTraceParser(input_dir=source_path,
1751
- output_file_path=step_trace_intermediate_file_path,
1752
- skip_first_step=skip_first_step_flag,
1753
- is_training_mode=is_training_mode_flag)
1754
- parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
1755
- parser.parse_and_save()
1756
- point_info = parser.record_point_info(point_info_file_path)
1757
-
1758
- # print parser result
1759
- parser.show()
1760
- logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1761
- logger.info("The point info is: %s", point_info)
1762
-
1763
- return point_info, is_training_mode_flag
1764
-
1765
1685
  def _generate_timeline(self, reduce_op_type):
1766
1686
  """Used for gpu, generate timeline info, write to json format file."""
1767
1687
  try:
1768
1688
  timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
1769
1689
  context.get_context("mode"))
1770
1690
  timeline_generator.init_timeline(reduce_op_type)
1771
- self._timeline_meta = timeline_generator.write_timeline(self._timeline_size_limit_byte)
1691
+ self._timeline_meta = timeline_generator.write_timeline()
1772
1692
  timeline_generator.write_timeline_summary()
1693
+ timeline_generator.parse_fwk_data()
1694
+ timeline_generator.write_fwk_timeline()
1773
1695
  return timeline_generator
1774
1696
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1775
1697
  logger.warning('Fail to write timeline data: %s', err)
1776
1698
  raise RuntimeError('Fail to write timeline data.') from err
1777
1699
 
1778
- def _analyse_memory_usage(self, points):
1700
+ def _analyse_memory_usage(self):
1779
1701
  """Analyse memory usage data."""
1780
1702
  integrator = Integrator(self._output_path, self._rank_id)
1781
- aicore_detail_data = integrator.get_aicore_detail_data()
1782
- memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
1783
- memory_parser.init_memory_usage_info(aicore_detail_data, points)
1784
- memory_parser.write_memory_files()
1703
+ integrator.get_aicore_detail_data()
1785
1704
 
1786
1705
  def _get_profiling_job_id(self, offline_path):
1787
1706
  """Get profiling job id, which was generated by ada service.
1788
1707
 
1789
1708
  Returns:
1790
- str, profiling job id.
1709
+ str, profiling job id, eg: PROF_XXX/device_*.
1791
1710
  """
1792
1711
 
1793
1712
  if offline_path:
@@ -1816,18 +1735,17 @@ class Profiler:
1816
1735
  "profiler will ignore this job dir.", job_dir)
1817
1736
  continue
1818
1737
 
1819
- prof_rank_id, prof_device_id = self._parse_info_json(info_file_path)
1738
+ prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
1739
+ prof_device_id = ProfilerInfo.get_device_id(prof_dir)
1820
1740
  job_start_time = self._parse_job_start_time(prof_dir)
1821
1741
 
1822
1742
  if offline_path:
1823
- if self._rank_id != prof_rank_id:
1824
- continue
1825
1743
  self._start_time = int(job_start_time)
1826
1744
  else:
1827
1745
  if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
1828
- logger.debug("Find profiling find job path %s, but not current training device id. "
1829
- "Current training rank id %s, but job path rank id: %s, "
1830
- "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
1746
+ logger.warning("Find profiling find job path %s, but not current training device id. "
1747
+ "Current training rank id %s, but job path rank id: %s, "
1748
+ "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
1831
1749
  continue
1832
1750
 
1833
1751
  if job_start_time < self._start_time:
@@ -1936,19 +1854,21 @@ class Profiler:
1936
1854
  self._output_path = validate_and_normalize_path(output_path)
1937
1855
  else:
1938
1856
  output_path = kwargs.pop("output_path")
1857
+ if not isinstance(output_path, str):
1858
+ logger.warning(
1859
+ f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
1860
+ output_path = "data"
1939
1861
  self._output_path = validate_and_normalize_path(output_path)
1940
1862
 
1941
1863
  self._output_path = os.path.join(self._output_path, "profiler")
1942
1864
  if not os.path.exists(self._output_path):
1943
- os.makedirs(self._output_path, exist_ok=True)
1944
- os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1865
+ os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1945
1866
  else:
1946
1867
  logger.warning("The target dir already exists. "
1947
1868
  "There may be some old profiling data, and they will be rewritten in the end.")
1948
1869
  self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
1949
1870
  if not os.path.exists(self._framework_path):
1950
- os.makedirs(self._framework_path, exist_ok=True)
1951
- os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1871
+ os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1952
1872
 
1953
1873
  def _parser_kwargs(self, kwargs):
1954
1874
  """Parse kwargs vale."""
@@ -1969,11 +1889,11 @@ class Profiler:
1969
1889
  f"but got type {type(self._op_time)}, it will be set to True.")
1970
1890
  self._op_time = True
1971
1891
 
1972
- self._data_process = kwargs.pop("data_process", True)
1892
+ self._data_process = kwargs.pop("data_process", False)
1973
1893
  if not isinstance(self._data_process, bool):
1974
1894
  logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
1975
- f"but got type {type(self._data_process)}, it will be set to True.")
1976
- self._data_process = True
1895
+ f"but got type {type(self._data_process)}, it will be set to False.")
1896
+ self._data_process = False
1977
1897
 
1978
1898
  timeline_limit = kwargs.pop("timeline_limit", 500)
1979
1899
  if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
@@ -1985,70 +1905,22 @@ class Profiler:
1985
1905
  "[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
1986
1906
  timeline_limit = 500
1987
1907
  self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
1988
- self._profile_framework = kwargs.pop("profile_framework", "all")
1989
- if self._profile_framework not in ["memory", "time", "all", None]:
1990
- logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
1991
- f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
1992
- self._profile_framework = "all"
1993
- if not isinstance(self._data_simplification, bool):
1994
- logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
1995
- f"but got type {type(self._data_simplification)}, it will be set to True.")
1996
- self._data_simplification = True
1908
+ self._profile_framework = kwargs.pop("profile_framework", None)
1909
+ if self._profile_framework not in ["time", "all", None]:
1910
+ logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
1911
+ f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
1912
+ self._profile_framework = None
1997
1913
 
1998
1914
  if not isinstance(self._data_simplification, bool):
1999
1915
  logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
2000
1916
  f"but got type {type(self._data_simplification)}, it will be set to True.")
2001
1917
  self._data_simplification = True
2002
1918
 
2003
- self._host_stack = kwargs.pop("host_stack", True)
2004
- if not isinstance(self._host_stack, bool):
2005
- logger.warning(f"For '{self.__class__.__name__}', the parameter host_stack must be bool, but got "
2006
- f"type {type(self._host_stack)}, it will be set to True.")
2007
- self._host_stack = True
2008
-
2009
- def _host_info_analyse(self):
2010
- """
2011
- Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
2012
- """
2013
- logger.info("Profiling HostInfo start.")
2014
- host_dir = os.path.join(self._output_path, 'host_info')
2015
- host_dir = validate_and_normalize_path(host_dir)
2016
- if not os.path.exists(host_dir):
2017
- logger.warning("Host info directory: %s not exist.", host_dir)
2018
- return
2019
- csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
2020
- json_file_name = 'timeline_' + str(self._rank_id) + '.json'
2021
- memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
2022
- dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
2023
- host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
2024
- timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
2025
- memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
2026
- dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
2027
- _parse_host_info(host_info_file, timeline_file, memory_file)
2028
- _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
2029
- logger.info("Profile HostInfo finished.")
2030
-
2031
-
2032
- def _offline_parse(offline_path):
2033
- """Parse data in abnormal scenario, only support for host_info at present."""
2034
- logger.info("Profiling HostInfo offline start.")
2035
- host_dir = os.path.join(offline_path, 'profiler', 'host_info')
2036
- host_dir = validate_and_normalize_path(host_dir)
2037
- if not os.path.exists(host_dir):
2038
- logger.warning("Host info directory: %s not exist.", host_dir)
2039
- return
2040
- files = os.listdir(host_dir)
2041
- for file in files:
2042
- if not file.startswith("host_info_") or not file.endswith(".csv"):
2043
- continue
2044
- rank_id = file.split('_')[-1].split('.')[0]
2045
- if not rank_id.isdigit():
2046
- logger.info("Cannot get rank_id from file: %s, skip it", file)
2047
- return
2048
- host_info_file = os.path.join(host_dir, file)
2049
- timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
2050
- memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
2051
- dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
2052
- _parse_host_info(host_info_file, timeline_file, memory_file)
2053
- _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
2054
- logger.info("Profile HostInfo offline finished.")
1919
+ self._with_stack = kwargs.pop("with_stack", False)
1920
+ if not isinstance(self._with_stack, bool):
1921
+ logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
1922
+ f"type {type(self._with_stack)}, it will be set to False.")
1923
+ self._with_stack = False
1924
+ if self._with_stack and self._profile_framework not in ["time", "all"]:
1925
+ logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
1926
+ self._with_stack = False