mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (285) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +3 -1
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +50 -9
  7. mindspore/_extends/parse/compile_config.py +41 -0
  8. mindspore/_extends/parse/parser.py +9 -7
  9. mindspore/_extends/parse/standard_method.py +52 -14
  10. mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
  11. mindspore/amp.py +24 -10
  12. mindspore/avcodec-59.dll +0 -0
  13. mindspore/avdevice-59.dll +0 -0
  14. mindspore/avfilter-8.dll +0 -0
  15. mindspore/avformat-59.dll +0 -0
  16. mindspore/avutil-57.dll +0 -0
  17. mindspore/common/__init__.py +6 -4
  18. mindspore/common/_pijit_context.py +190 -0
  19. mindspore/common/_register_for_tensor.py +2 -1
  20. mindspore/common/_tensor_overload.py +139 -0
  21. mindspore/common/api.py +102 -87
  22. mindspore/common/dump.py +5 -6
  23. mindspore/common/generator.py +1 -7
  24. mindspore/common/hook_handle.py +14 -26
  25. mindspore/common/mindir_util.py +2 -2
  26. mindspore/common/parameter.py +46 -13
  27. mindspore/common/recompute.py +39 -9
  28. mindspore/common/sparse_tensor.py +7 -3
  29. mindspore/common/tensor.py +209 -29
  30. mindspore/communication/__init__.py +1 -1
  31. mindspore/communication/_comm_helper.py +38 -3
  32. mindspore/communication/comm_func.py +310 -55
  33. mindspore/communication/management.py +14 -14
  34. mindspore/context.py +123 -22
  35. mindspore/dataset/__init__.py +1 -1
  36. mindspore/dataset/audio/__init__.py +1 -1
  37. mindspore/dataset/core/config.py +7 -0
  38. mindspore/dataset/core/validator_helpers.py +7 -0
  39. mindspore/dataset/engine/cache_client.py +1 -1
  40. mindspore/dataset/engine/datasets.py +72 -44
  41. mindspore/dataset/engine/datasets_audio.py +7 -7
  42. mindspore/dataset/engine/datasets_standard_format.py +53 -3
  43. mindspore/dataset/engine/datasets_text.py +20 -20
  44. mindspore/dataset/engine/datasets_user_defined.py +174 -104
  45. mindspore/dataset/engine/datasets_vision.py +33 -33
  46. mindspore/dataset/engine/iterators.py +29 -0
  47. mindspore/dataset/engine/obs/util.py +7 -0
  48. mindspore/dataset/engine/queue.py +114 -60
  49. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  50. mindspore/dataset/engine/validators.py +34 -14
  51. mindspore/dataset/text/__init__.py +1 -4
  52. mindspore/dataset/transforms/__init__.py +0 -3
  53. mindspore/dataset/utils/line_reader.py +2 -0
  54. mindspore/dataset/vision/__init__.py +1 -4
  55. mindspore/dataset/vision/utils.py +1 -1
  56. mindspore/dataset/vision/validators.py +2 -1
  57. mindspore/dnnl.dll +0 -0
  58. mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
  59. mindspore/experimental/es/embedding_service.py +883 -0
  60. mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
  61. mindspore/experimental/llm_boost/__init__.py +21 -0
  62. mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
  63. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  64. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  65. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  66. mindspore/experimental/llm_boost/register.py +129 -0
  67. mindspore/experimental/llm_boost/utils.py +31 -0
  68. mindspore/experimental/optim/adamw.py +85 -0
  69. mindspore/experimental/optim/optimizer.py +3 -0
  70. mindspore/hal/__init__.py +3 -3
  71. mindspore/hal/contiguous_tensors_handle.py +175 -0
  72. mindspore/hal/stream.py +18 -0
  73. mindspore/include/api/model_group.h +13 -1
  74. mindspore/include/api/types.h +10 -10
  75. mindspore/include/dataset/config.h +2 -2
  76. mindspore/include/dataset/constants.h +2 -2
  77. mindspore/include/dataset/execute.h +2 -2
  78. mindspore/include/dataset/vision.h +4 -0
  79. mindspore/jpeg62.dll +0 -0
  80. mindspore/log.py +1 -1
  81. mindspore/mindrecord/filewriter.py +68 -51
  82. mindspore/mindspore_backend.dll +0 -0
  83. mindspore/mindspore_common.dll +0 -0
  84. mindspore/mindspore_core.dll +0 -0
  85. mindspore/mindspore_glog.dll +0 -0
  86. mindspore/mindspore_np_dtype.dll +0 -0
  87. mindspore/mindspore_ops.dll +0 -0
  88. mindspore/mint/__init__.py +495 -46
  89. mindspore/mint/distributed/__init__.py +31 -0
  90. mindspore/mint/distributed/distributed.py +254 -0
  91. mindspore/mint/nn/__init__.py +266 -21
  92. mindspore/mint/nn/functional.py +125 -19
  93. mindspore/mint/nn/layer/__init__.py +39 -0
  94. mindspore/mint/nn/layer/activation.py +133 -0
  95. mindspore/mint/nn/layer/normalization.py +477 -0
  96. mindspore/mint/nn/layer/pooling.py +110 -0
  97. mindspore/mint/optim/adamw.py +28 -7
  98. mindspore/mint/special/__init__.py +63 -0
  99. mindspore/multiprocessing/__init__.py +2 -1
  100. mindspore/nn/__init__.py +0 -1
  101. mindspore/nn/cell.py +275 -93
  102. mindspore/nn/layer/activation.py +211 -44
  103. mindspore/nn/layer/basic.py +113 -3
  104. mindspore/nn/layer/embedding.py +120 -2
  105. mindspore/nn/layer/normalization.py +101 -5
  106. mindspore/nn/layer/padding.py +34 -48
  107. mindspore/nn/layer/pooling.py +161 -7
  108. mindspore/nn/layer/transformer.py +3 -3
  109. mindspore/nn/loss/__init__.py +2 -2
  110. mindspore/nn/loss/loss.py +84 -6
  111. mindspore/nn/optim/__init__.py +2 -1
  112. mindspore/nn/optim/adadelta.py +1 -1
  113. mindspore/nn/optim/adam.py +1 -1
  114. mindspore/nn/optim/lamb.py +1 -1
  115. mindspore/nn/optim/tft_wrapper.py +127 -0
  116. mindspore/nn/wrap/cell_wrapper.py +12 -23
  117. mindspore/nn/wrap/grad_reducer.py +5 -5
  118. mindspore/nn/wrap/loss_scale.py +17 -3
  119. mindspore/numpy/__init__.py +1 -1
  120. mindspore/numpy/array_creations.py +65 -68
  121. mindspore/numpy/array_ops.py +64 -60
  122. mindspore/numpy/fft.py +610 -75
  123. mindspore/numpy/logic_ops.py +11 -10
  124. mindspore/numpy/math_ops.py +85 -84
  125. mindspore/numpy/utils_const.py +4 -4
  126. mindspore/opencv_core452.dll +0 -0
  127. mindspore/opencv_imgcodecs452.dll +0 -0
  128. mindspore/opencv_imgproc452.dll +0 -0
  129. mindspore/ops/__init__.py +6 -4
  130. mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
  131. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
  132. mindspore/ops/_vmap/vmap_array_ops.py +2 -4
  133. mindspore/ops/_vmap/vmap_math_ops.py +17 -1
  134. mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
  135. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
  136. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
  137. mindspore/ops/auto_generate/gen_extend_func.py +734 -13
  138. mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
  139. mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
  140. mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
  141. mindspore/ops/composite/base.py +85 -48
  142. mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
  143. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
  144. mindspore/ops/function/__init__.py +22 -0
  145. mindspore/ops/function/array_func.py +490 -153
  146. mindspore/ops/function/debug_func.py +113 -1
  147. mindspore/ops/function/fft_func.py +15 -2
  148. mindspore/ops/function/grad/grad_func.py +3 -2
  149. mindspore/ops/function/math_func.py +558 -207
  150. mindspore/ops/function/nn_func.py +817 -383
  151. mindspore/ops/function/other_func.py +3 -2
  152. mindspore/ops/function/random_func.py +184 -8
  153. mindspore/ops/function/reshard_func.py +13 -11
  154. mindspore/ops/function/sparse_unary_func.py +1 -1
  155. mindspore/ops/function/vmap_func.py +3 -2
  156. mindspore/ops/functional.py +24 -14
  157. mindspore/ops/op_info_register.py +3 -3
  158. mindspore/ops/operations/__init__.py +6 -1
  159. mindspore/ops/operations/_grad_ops.py +2 -76
  160. mindspore/ops/operations/_infer_ops.py +1 -1
  161. mindspore/ops/operations/_inner_ops.py +71 -94
  162. mindspore/ops/operations/array_ops.py +12 -146
  163. mindspore/ops/operations/comm_ops.py +42 -53
  164. mindspore/ops/operations/custom_ops.py +83 -19
  165. mindspore/ops/operations/debug_ops.py +42 -10
  166. mindspore/ops/operations/manually_defined/_inner.py +12 -0
  167. mindspore/ops/operations/manually_defined/ops_def.py +265 -10
  168. mindspore/ops/operations/math_ops.py +12 -223
  169. mindspore/ops/operations/nn_ops.py +20 -114
  170. mindspore/ops/operations/other_ops.py +7 -4
  171. mindspore/ops/operations/random_ops.py +46 -1
  172. mindspore/ops/primitive.py +18 -6
  173. mindspore/ops_generate/arg_dtype_cast.py +2 -0
  174. mindspore/ops_generate/gen_aclnn_implement.py +11 -11
  175. mindspore/ops_generate/gen_constants.py +36 -0
  176. mindspore/ops_generate/gen_ops.py +67 -52
  177. mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
  178. mindspore/ops_generate/gen_pyboost_func.py +131 -47
  179. mindspore/ops_generate/op_proto.py +10 -3
  180. mindspore/ops_generate/pyboost_utils.py +14 -1
  181. mindspore/ops_generate/template.py +43 -21
  182. mindspore/parallel/__init__.py +3 -1
  183. mindspore/parallel/_auto_parallel_context.py +28 -8
  184. mindspore/parallel/_cell_wrapper.py +83 -0
  185. mindspore/parallel/_parallel_serialization.py +47 -19
  186. mindspore/parallel/_tensor.py +81 -11
  187. mindspore/parallel/_utils.py +13 -1
  188. mindspore/parallel/algo_parameter_config.py +5 -5
  189. mindspore/parallel/checkpoint_transform.py +46 -39
  190. mindspore/parallel/cluster/process_entity/__init__.py +1 -1
  191. mindspore/parallel/cluster/process_entity/_api.py +31 -23
  192. mindspore/parallel/cluster/process_entity/_utils.py +2 -27
  193. mindspore/parallel/parameter_broadcast.py +3 -4
  194. mindspore/parallel/shard.py +162 -31
  195. mindspore/parallel/transform_safetensors.py +993 -0
  196. mindspore/profiler/__init__.py +2 -1
  197. mindspore/profiler/common/constant.py +29 -0
  198. mindspore/profiler/common/registry.py +47 -0
  199. mindspore/profiler/common/util.py +28 -0
  200. mindspore/profiler/dynamic_profiler.py +694 -0
  201. mindspore/profiler/envprofiling.py +17 -19
  202. mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
  203. mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
  204. mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
  205. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
  206. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
  207. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
  208. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  209. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
  210. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
  211. mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
  212. mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
  213. mindspore/profiler/parser/base_timeline_generator.py +19 -25
  214. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  215. mindspore/profiler/parser/framework_parser.py +1 -391
  216. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  217. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  218. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  219. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  220. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  221. mindspore/profiler/parser/profiler_info.py +78 -6
  222. mindspore/profiler/profiler.py +153 -0
  223. mindspore/profiler/profiling.py +280 -412
  224. mindspore/rewrite/__init__.py +1 -2
  225. mindspore/rewrite/common/namespace.py +4 -4
  226. mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
  227. mindspore/run_check/_check_version.py +36 -103
  228. mindspore/safeguard/rewrite_obfuscation.py +591 -247
  229. mindspore/swresample-4.dll +0 -0
  230. mindspore/swscale-6.dll +0 -0
  231. mindspore/tinyxml2.dll +0 -0
  232. mindspore/train/__init__.py +4 -3
  233. mindspore/train/_utils.py +28 -2
  234. mindspore/train/amp.py +171 -53
  235. mindspore/train/callback/__init__.py +2 -2
  236. mindspore/train/callback/_callback.py +4 -4
  237. mindspore/train/callback/_checkpoint.py +85 -22
  238. mindspore/train/callback/_cluster_monitor.py +1 -1
  239. mindspore/train/callback/_flops_collector.py +1 -0
  240. mindspore/train/callback/_loss_monitor.py +3 -3
  241. mindspore/train/callback/_on_request_exit.py +134 -31
  242. mindspore/train/callback/_summary_collector.py +5 -5
  243. mindspore/train/callback/_tft_register.py +352 -0
  244. mindspore/train/dataset_helper.py +7 -3
  245. mindspore/train/metrics/metric.py +3 -3
  246. mindspore/train/metrics/roc.py +4 -4
  247. mindspore/train/mind_ir_pb2.py +44 -39
  248. mindspore/train/model.py +134 -58
  249. mindspore/train/serialization.py +336 -112
  250. mindspore/turbojpeg.dll +0 -0
  251. mindspore/utils/__init__.py +21 -0
  252. mindspore/utils/utils.py +60 -0
  253. mindspore/version.py +1 -1
  254. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
  255. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +258 -252
  256. mindspore/include/c_api/ms/abstract.h +0 -67
  257. mindspore/include/c_api/ms/attribute.h +0 -197
  258. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  259. mindspore/include/c_api/ms/base/macros.h +0 -32
  260. mindspore/include/c_api/ms/base/status.h +0 -33
  261. mindspore/include/c_api/ms/base/types.h +0 -283
  262. mindspore/include/c_api/ms/context.h +0 -102
  263. mindspore/include/c_api/ms/graph.h +0 -160
  264. mindspore/include/c_api/ms/node.h +0 -606
  265. mindspore/include/c_api/ms/tensor.h +0 -161
  266. mindspore/include/c_api/ms/value.h +0 -84
  267. mindspore/mindspore_shared_lib.dll +0 -0
  268. mindspore/nn/extend/basic.py +0 -140
  269. mindspore/nn/extend/embedding.py +0 -143
  270. mindspore/nn/extend/layer/normalization.py +0 -109
  271. mindspore/nn/extend/pooling.py +0 -117
  272. mindspore/nn/layer/embedding_service.py +0 -531
  273. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  274. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  275. mindspore/ops/extend/__init__.py +0 -53
  276. mindspore/ops/extend/array_func.py +0 -218
  277. mindspore/ops/extend/math_func.py +0 -76
  278. mindspore/ops/extend/nn_func.py +0 -308
  279. mindspore/ops/silent_check.py +0 -162
  280. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  281. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  282. mindspore/train/callback/_mindio_ttp.py +0 -443
  283. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
  284. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
  285. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
@@ -14,19 +14,16 @@
14
14
  # ============================================================================
15
15
  """Profiling api file."""
16
16
  import os
17
- import re
18
- import shutil
19
17
  import stat
20
18
  import time
21
19
  import json
22
20
  from json import JSONDecodeError
23
21
  import glob
24
- import subprocess
25
- import csv
26
22
  import socket
23
+ import multiprocessing
27
24
  from enum import Enum
28
- from multiprocessing import Process
29
25
  from typing import List
26
+ from sys import getsizeof
30
27
  import numpy as np
31
28
 
32
29
  from mindspore import log as logger, context
@@ -47,13 +44,11 @@ from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
47
44
  from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
48
45
  from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
49
46
  from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
50
- from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
51
47
  from mindspore.profiler.parser.minddata_parser import MinddataParser
52
48
  from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
53
49
  from mindspore.profiler.parser.minddata_pipeline_parser import \
54
50
  MinddataPipelineParser
55
- from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
56
- from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
51
+ from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
57
52
  from mindspore.profiler.parser.profiler_info import ProfilerInfo
58
53
  from mindspore.common.api import _pynative_executor
59
54
  from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
@@ -67,6 +62,11 @@ from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
67
62
  from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
68
63
  from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
69
64
  from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
65
+ from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
66
+ from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
67
+ from mindspore.profiler.parser.ascend_analysis.constant import Constant
68
+ from mindspore.profiler.common.util import timeit
69
+
70
70
 
71
71
  INIT_OP_NAME = 'Default/InitDataSetQueue'
72
72
 
@@ -105,7 +105,7 @@ class DeviceSupportParam(Enum):
105
105
  ASCEND = [
106
106
  'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
107
107
  'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
108
- 'ascend_job_id', 'profile_framework', 'host_stack', 'profiler_level', 'data_simplification'
108
+ 'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
109
109
  ]
110
110
 
111
111
 
@@ -114,7 +114,6 @@ ALWAYS_VALID_PARAM = [
114
114
  'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
115
115
  ]
116
116
 
117
-
118
117
  ANALYSIS_ASYNC_MODE = 'async'
119
118
  ANALYSIS_SYNC_MODE = 'sync'
120
119
  DEFAULT_MODEL_ID = 4294967295
@@ -164,147 +163,6 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
164
163
  logger.warning("Can not map the start time for item: %s.", row)
165
164
 
166
165
 
167
- def _calculate_dataset_execution_time(input_file, output_file):
168
- r"""
169
- Parse the host info into timeline file, so as to show on UI.
170
-
171
- Args:
172
- input_file: the original host_info file, in csv format.
173
- output_file: the output file, in csv format.
174
- """
175
- input_file = validate_and_normalize_path(input_file)
176
- # execution_time_map is used to store the ExecutionCalculator for each stage.
177
- execution_time_map = {}
178
- # ts_map is used to store the start time of each event_stage_tid_pid.
179
- ts_map = {}
180
- with open(input_file, 'r') as f:
181
- for row in csv.DictReader(f):
182
- try:
183
- module_name = row['module_name']
184
- if module_name != 'Dataset':
185
- continue
186
- _calculate_dataset_item(row, execution_time_map, ts_map)
187
- except KeyError as e:
188
- logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
189
- continue
190
- if ts_map:
191
- logger.warning("Only start time is record for these items:")
192
- for k, v in ts_map.items():
193
- logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
194
- output_file = validate_and_normalize_path(output_file)
195
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
196
- modes = stat.S_IWUSR | stat.S_IRUSR
197
- with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
198
- csv_writer = csv.writer(f)
199
- csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
200
- for _, v in execution_time_map.items():
201
- csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
202
- os.chmod(output_file, modes)
203
- logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
204
-
205
-
206
- def _extract_timeline_item(row, time_line, ts_map):
207
- """Process one row, try to extract a timeline item."""
208
- start_end = row['start_end']
209
- event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
210
- # map start and end, put the mapped event into timeline.
211
- if start_end == '1' and event_stage_tid_pid in ts_map:
212
- title = row['event'] + '::' + row['stage']
213
- event = {'name': title, 'cat': row['module_name']}
214
- ts_end = int(row['time_stamp(us)'])
215
- ts = ts_map[event_stage_tid_pid]
216
- event['ts'] = ts
217
- event['dur'] = ts_end - ts
218
- event['ph'] = 'X'
219
- event['pid'] = row['pid']
220
- event['tid'] = row['tid']
221
- event['args'] = {'parent_pid': row['parent_pid']}
222
- time_line.append(event)
223
- del ts_map[event_stage_tid_pid]
224
- elif start_end == '0':
225
- ts = int(row['time_stamp(us)'])
226
- ts_map[event_stage_tid_pid] = ts
227
- # Put the instance event into timeline.
228
- elif start_end == '2':
229
- title = row['event'] + '::' + row['stage']
230
- event = {
231
- 'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
232
- 'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
233
- }
234
- time_line.append(event)
235
- else:
236
- logger.warning("Can not map the start time for item: %s.", row)
237
-
238
-
239
- def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
240
- r"""
241
- Parse the host info into timeline file, so as to show on UI.
242
-
243
- Args:
244
- input_file: the original host_info file, in csv format.
245
- output_timeline_file: the output timeline file, in json format.
246
- output_memory_file: the output memory_usage file, in csv format.
247
- is_develop_user: some data only shown to develop users, other users no need to analyse it.
248
- """
249
- input_file = validate_and_normalize_path(input_file)
250
- time_line = []
251
- # ts_map is used to store the start time of each event_stage_tid_pid
252
- ts_map = {}
253
- memory_header = [
254
- 'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
255
- 'memory_usage(kB)', 'time_stamp(us)'
256
- ]
257
- memory_info = []
258
- with open(input_file, 'r') as f:
259
- for row in csv.DictReader(f):
260
- try:
261
- level = row['level']
262
- if level == '0' and not is_develop_user:
263
- continue
264
- if int(row['time_stamp(us)']) > 0:
265
- _extract_timeline_item(row, time_line, ts_map)
266
- if int(row['memory_usage(kB)']) > 0:
267
- memory_info.append(row)
268
- except KeyError as e:
269
- logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
270
- continue
271
- if memory_info:
272
- with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
273
- csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
274
- csv_writer.writeheader()
275
- for item in memory_info:
276
- csv_writer.writerow(item)
277
- os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
278
- else:
279
- logger.warning("No memory_usage is record in file: %s", input_file)
280
-
281
- if ts_map:
282
- logger.warning("Only start time is record for these items:")
283
- for k, v in ts_map.items():
284
- logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
285
- last_dash = k.rfind('_')
286
- if last_dash == -1:
287
- logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
288
- continue
289
- second_last_dash = k.rfind('_', 0, last_dash - 1)
290
- if second_last_dash == -1:
291
- logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
292
- continue
293
- pid = k[last_dash + 1:]
294
- tid = k[second_last_dash + 1: last_dash]
295
- title = k[:second_last_dash]
296
- unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
297
- time_line.append(unfinished_timeline)
298
-
299
- if time_line:
300
- timeline_file = validate_and_normalize_path(output_timeline_file)
301
- with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
302
- json.dump(time_line, json_file)
303
- os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
304
- else:
305
- logger.warning("No valid time_stamp is record in file: %s", input_file)
306
-
307
-
308
166
  def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
309
167
  """Executing the msprof export mode."""
310
168
  try:
@@ -351,20 +209,21 @@ class Profiler:
351
209
  output_path (str, optional): Output data path. Default: ``"./data"`` .
352
210
  profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
353
211
 
354
- - Profiler.Level0: Leanest level of profiling data collection, collects information about the elapsed
212
+ - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
355
213
  time of the computational operators on the NPU and communication large operator information.
356
- - Profiler.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and communication
357
- mini operator information based on Level0.
358
- - Profiler.Level2: Collect GE and Runtime information in CANN layer on top of Level1
214
+ - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
215
+ communication mini operator information based on Level0.
216
+ - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
359
217
 
360
218
  op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
361
219
  profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
362
220
  a multi devices training,collect when True. Setting this parameter has no effect during single card
363
221
  training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
364
222
  profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
365
- When using this parameter, `op_time` must be set to True. Default: ``False`` .
223
+ When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
224
+ compilation level is O2 requires collecting from the first step. Default: ``False`` .
366
225
  parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
367
- Default value: ``True`` .
226
+ Default value: ``False`` .
368
227
  start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
369
228
  data collection based on conditions. Default: ``True`` .
370
229
  aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
@@ -380,11 +239,12 @@ class Profiler:
380
239
  - 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
381
240
  - 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
382
241
  - 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
242
+ This function only support Atlas A2 training series products.
383
243
 
384
244
  l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
385
245
  Default: ``False`` .
386
- hbm_ddr (bool, optional): (Ascend only) Whether to collect HBM/DDR read and write rate data, collect when True.
387
- Default: ``False`` .
246
+ hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
247
+ collect when True. Default: ``False`` .
388
248
  pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
389
249
  Default: ``False`` .
390
250
  sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
@@ -396,25 +256,32 @@ class Profiler:
396
256
  - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
397
257
  This method can reduce the impact of adding profiler on overall training time.
398
258
  data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
399
- Default value: ``True`` .
259
+ Default value: ``False`` .
400
260
  timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
401
261
  When using this parameter, `op_time` must be set to True. Default value: ``500`` .
402
262
  profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
403
- ["all", "time", "memory", None], When is not set to None, a subdirectory host_info will be generated in the
404
- specified profiler directory, which stores the collected memory and time files on the Host side.
405
- Default: "all".
263
+ ["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
264
+ parameter, the op_time parameter must be enabled.
265
+ Default: None.
406
266
 
407
- - "all": Record both host timestamp and host memory usage.
408
- - "time": Only record host timestamp.
409
- - "memory": Only record host memory usage.
267
+ - "all": Record host timestamp.
268
+ - "time": The same as "all".
410
269
  - None: Not record host information.
411
270
  data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
412
271
  If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
413
272
  directory are retained to save disk space.
414
273
  Default value: ``True`` .
415
- host_stack (bool, optional): (Ascend) Whether to collect frame host call stack data.
416
- Default value: ``True`` .
417
-
274
+ with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
275
+ data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
276
+ profile_framework parameters must be enabled. Default value: ``False`` .
277
+ analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
278
+ data. This parameter is experimental parameter and does not need to be set by the user.
279
+ Default value: ``False`` .
280
+ rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
281
+ experimental parameter and does not need to be set by the user. Default value: ``0`` .
282
+ env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
283
+ This parameter is experimental parameter and does not need to be set by the user.
284
+ Default value: ``False`` .
418
285
  Raises:
419
286
  RuntimeError: When the version of CANN does not match the version of MindSpore,
420
287
  MindSpore cannot parse the generated ascend_job_id directory structure.
@@ -428,6 +295,7 @@ class Profiler:
428
295
  >>> from mindspore import nn
429
296
  >>> import mindspore.dataset as ds
430
297
  >>> from mindspore import Profiler
298
+ >>> from mindspore.profiler import ProfilerLevel
431
299
  >>>
432
300
  >>> class Net(nn.Cell):
433
301
  ... def __init__(self):
@@ -453,7 +321,7 @@ class Profiler:
453
321
  ...
454
322
  ... # Init Profiler
455
323
  ... # Note that the Profiler should be initialized before model.train
456
- ... profiler = Profiler()
324
+ ... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
457
325
  ...
458
326
  ... # Train Model
459
327
  ... net = Net()
@@ -462,11 +330,6 @@ class Profiler:
462
330
  ... # Profiler end
463
331
  ... profiler.analyse()
464
332
  """
465
-
466
- _hwts_output_filename_target = "output_format_data_hwts_"
467
- _opcompute_output_filename_target = "output_op_compute_time_"
468
- _aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
469
- _has_analysed = False
470
333
  _has_initialized = False
471
334
  _ascend_profiling_options = ""
472
335
  _ascend_job_id = ""
@@ -492,6 +355,9 @@ class Profiler:
492
355
  self._rank_size = 1
493
356
  self._rank_id = 0
494
357
  self._ascend_profiler = None
358
+ self.metadata = {}
359
+ self.max_str_len = 4096
360
+ self.max_meta_size = 50 * 1024
495
361
  self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
496
362
  self._parallel_strategy = True
497
363
  self._model_iteration_dict = None
@@ -512,13 +378,13 @@ class Profiler:
512
378
  self._sync_enable = True
513
379
  self._stop_time = 0
514
380
  self._dynamic_status = False
515
- self._profile_framework = "all"
381
+ self._profile_framework = None
516
382
  self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
517
383
  self.profiler_level = None
518
384
  self._pretty_json = False
519
385
  self._analyse_only = kwargs.get("analyse_only", False)
520
386
  self._data_simplification = kwargs.get("data_simplification", True)
521
- self._host_stack = True
387
+ self._with_stack = False
522
388
  if self._msprof_enable:
523
389
  return
524
390
  self._start_time = int(time.time() * 1e6) # us
@@ -540,20 +406,6 @@ class Profiler:
540
406
  if self.start_profile:
541
407
  self.start()
542
408
 
543
- @staticmethod
544
- def _get_prof_rank(prof_path: str):
545
- """get rank id."""
546
- sub_dirs = os.listdir(os.path.realpath(prof_path))
547
- info_json_path = ""
548
- for sub_dir in sub_dirs:
549
- if sub_dir.startswith("device_"):
550
- device_id = sub_dir.split("_")[-1]
551
- info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
552
- if not os.path.exists(info_json_path):
553
- return -1
554
- rank_id, _ = Profiler._parse_info_json(info_json_path)
555
- return rank_id
556
-
557
409
  @staticmethod
558
410
  def _check_output_path(output_path):
559
411
  """Checking path validity."""
@@ -602,30 +454,8 @@ class Profiler:
602
454
  logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
603
455
  return None
604
456
 
605
- @staticmethod
606
- def _parse_info_json(info_file):
607
- """
608
- Parse info log file, get the rank id and device id of the job.
609
- Args:
610
- input_file (str): The file path of the parse info log file.
611
-
612
- Returns:
613
- rank id, device id
614
- """
615
- with open(info_file, "r") as f:
616
- info_dict = json.load(f)
617
-
618
- rank_id = info_dict.get("rank_id", 0)
619
- dev_info = info_dict.get("DeviceInfo", [])
620
- dev_id = dev_info[0].get("id", -1)
621
-
622
- if int(rank_id) < 0:
623
- rank_id = 0
624
-
625
- return str(rank_id), str(dev_id)
626
-
627
457
  @classmethod
628
- def offline_analyse(cls, path: str, pretty=False, step_list=None):
458
+ def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
629
459
  """
630
460
  Analyze training performance data offline, which is invoked after performance data collection is completed.
631
461
 
@@ -633,37 +463,50 @@ class Profiler:
633
463
  path (str): The profiling data path which need to be analyzed offline.
634
464
  There needs to be a profiler directory in this path.
635
465
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
636
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
637
- By default, all steps will be analyzed.
466
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
467
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
468
+ data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
638
469
 
639
470
  Examples:
640
471
  >>> from mindspore import Profiler
641
472
  >>> Profiler.offline_analyse("./profiling_path")
642
473
  """
643
- profiler_path = os.path.join(path, "profiler")
644
- if not os.path.exists(profiler_path):
645
- raise ProfilerPathErrorException(f'There must be a profiler folder in the data path: {path}.')
646
-
647
- rank_set = set()
648
- sub_dirs = os.listdir(os.path.realpath(profiler_path))
649
- for sub_dir in sub_dirs:
650
- sub_path = os.path.join(profiler_path, sub_dir)
651
- if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z0-9]+", sub_dir):
652
- rank = cls._get_prof_rank(sub_path)
653
- rank_set.add(rank)
654
- if not rank_set:
655
- return
656
-
657
- process_list = []
658
- for rank_id in rank_set:
659
- profiler = cls(analyse_only=True, rank_id=rank_id)
660
- process = Process(target=profiler.analyse,
661
- args=(path, pretty, step_list))
662
- process.start()
663
- process_list.append(process)
664
-
665
- for process in process_list:
666
- process.join()
474
+ real_path = os.path.realpath(path)
475
+ PathManager.check_input_directory_path(real_path)
476
+ profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
477
+ if not isinstance(data_simplification, bool):
478
+ logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
479
+ f"but got type {type(data_simplification)}, it will be set to True.")
480
+ data_simplification = True
481
+ if not profiler_parent_path_list:
482
+ raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
483
+ f'single-device profiler data, or multiple subdirectories each containing '
484
+ f'a "profiler" directory for multi-device profiler data. ')
485
+ # get rank id
486
+ rank_list = []
487
+ for parent_path in profiler_parent_path_list:
488
+ profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
489
+ rank_id = ProfilerInfo.get_rank_id(profiler_path)
490
+ if int(rank_id) < 0:
491
+ logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
492
+ rank_list.append(rank_id)
493
+ # start offline analyse
494
+ if len(profiler_parent_path_list) == 1:
495
+ PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
496
+ profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
497
+ profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
498
+ else:
499
+ # Multiprocess Parsing
500
+ multiprocessing.set_start_method("fork", force=True)
501
+ process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
502
+ pool = multiprocessing.Pool(processes=process_number)
503
+ for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
504
+ PathManager.check_directory_path_writeable(profiler_parent_path)
505
+ profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
506
+ data_simplification=data_simplification)
507
+ pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
508
+ pool.close()
509
+ pool.join()
667
510
 
668
511
  def op_analyse(self, op_name, device_id=None):
669
512
  """
@@ -739,14 +582,38 @@ class Profiler:
739
582
  Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
740
583
  for online mode. Default: ``None``.
741
584
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
742
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
743
- By default, all steps will be analyzed.
585
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
586
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
744
587
  mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
745
588
 
746
589
  - sync: analyse data in current process, it will block the current process.
747
- - async: analyse data in subprocess, it will not the current process.Since the parsing process
590
+ - async: analyse data in subprocess, it will not block the current process. Since the parsing process
748
591
  will take up extra CPU resources, please enable this mode according to the actual resource situation.
749
592
 
593
+ Examples:
594
+ >>> from mindspore.train import Callback
595
+ >>> from mindspore import Profiler
596
+ >>> class StopAtStep(Callback):
597
+ ... def __init__(self, start_step=1, stop_step=5):
598
+ ... super(StopAtStep, self).__init__()
599
+ ... self.start_step = start_step
600
+ ... self.stop_step = stop_step
601
+ ... self.profiler = Profiler(start_profile=False)
602
+ ...
603
+ ... def step_begin(self, run_context):
604
+ ... cb_params = run_context.original_args()
605
+ ... step_num = cb_params.cur_step_num
606
+ ... if step_num == self.start_step:
607
+ ... self.profiler.start()
608
+ ...
609
+ ... def step_end(self, run_context):
610
+ ... cb_params = run_context.original_args()
611
+ ... step_num = cb_params.cur_step_num
612
+ ... if step_num == self.stop_step:
613
+ ... self.profiler.stop()
614
+ ...
615
+ ... def end(self, run_context):
616
+ ... self.profiler.analyse(step_list=[2,3,4], mode="sync")
750
617
  """
751
618
  try:
752
619
  if isinstance(pretty, bool):
@@ -793,11 +660,12 @@ class Profiler:
793
660
 
794
661
  ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
795
662
  if offline_path:
663
+ # Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
664
+ ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
796
665
  ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
797
666
  self._ascend_graph_analyse(offline_path=offline_path)
798
667
  ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
799
668
  ProfilerInfo.save(self._output_path)
800
- _offline_parse(offline_path)
801
669
  return
802
670
  if self._msprof_enable:
803
671
  return
@@ -817,18 +685,16 @@ class Profiler:
817
685
  ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
818
686
  if self._device_target and self._device_target == DeviceTarget.CPU.value:
819
687
  self._cpu_analyse()
688
+ if self._profile_framework:
689
+ logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
690
+ "data.")
820
691
 
821
692
  if self._device_target and self._device_target == DeviceTarget.GPU.value:
822
693
  self._gpu_analyse()
823
694
 
824
695
  elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
825
696
  self._ascend_analyse()
826
- if self._profile_framework:
827
- if self._device_target != DeviceTarget.CPU.value:
828
- self._host_info_analyse()
829
- else:
830
- logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
831
- " directory in the output path.")
697
+
832
698
  logger.info("Profiling: all the data have been analyzed.")
833
699
  ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
834
700
  ProfilerInfo.save(self._output_path)
@@ -895,8 +761,13 @@ class Profiler:
895
761
  self._ascend_graph_start()
896
762
  ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
897
763
  ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
898
- ProfilerInfo.set_system_time(int(c_expression.get_clock_time() * 1e3)) # cast us to ns
899
- _framework_profiler_enable_mi()
764
+ ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
765
+ if context.get_context("mode") == context.GRAPH_MODE:
766
+ jit_config = context.get_jit_config()
767
+ jit_level = jit_config.get("jit_level", "")
768
+ ProfilerInfo.set_jit_level(jit_level)
769
+ if self._profile_framework:
770
+ _framework_profiler_enable_mi()
900
771
 
901
772
  def stop(self):
902
773
  """
@@ -957,8 +828,88 @@ class Profiler:
957
828
  self._init_profiler_info()
958
829
  ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
959
830
  ProfilerInfo.save(self._output_path)
831
+ self._dump_metadata()
960
832
  logger.info("Profiling: stop time: %d", self._stop_time)
961
833
 
834
+ def add_metadata(self, key: str, value: str):
835
+ """
836
+ Report custom metadata key-value pair data.
837
+
838
+ Args:
839
+ key (str): The key to the metadata.
840
+ value (str): The value to the metadata.
841
+
842
+ Examples:
843
+ >>> from mindspore import Profiler
844
+ >>> # Profiler init.
845
+ >>> profiler = Profiler()
846
+ >>> # Call Profiler add_metadata
847
+ >>> profiler.add_metadata("test_key", "test_value")
848
+ >>> # Profiler end
849
+ >>> profiler.analyse()
850
+ """
851
+ if not isinstance(key, str) or not isinstance(value, str):
852
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
853
+ return
854
+ if not self._check_str_valid(key) or not self._check_str_valid(value):
855
+ logger.warning("Invalid input key or value. Skip this metadata.")
856
+ return
857
+ add_size = getsizeof(key) + getsizeof(value)
858
+ if getsizeof(self.metadata) + add_size < self.max_meta_size:
859
+ if key in self.metadata:
860
+ logger.warning(f"{key} is already saved as metadata, override it.")
861
+ self.metadata[key] = value
862
+ else:
863
+ logger.warning("Too many metadata added. Skip this metadata")
864
+
865
+ def add_metadata_json(self, key: str, value: str):
866
+ """
867
+ Report custom metadata key-value pair data with the value as a JSON string data.
868
+
869
+ Args:
870
+ key (str): The key to the metadata.
871
+ value (str): The json str format value to the metadata.
872
+
873
+ Examples:
874
+ >>> import json
875
+ >>> from mindspore import Profiler
876
+ >>> # Profiler init.
877
+ >>> profiler = Profiler()
878
+ >>> # Call Profiler add_metadata_json
879
+ >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
880
+ >>> # Profiler end, metadata will be saved in profiler_metadata.json
881
+ >>> profiler.analyse()
882
+ """
883
+ if not isinstance(key, str) or not isinstance(value, str):
884
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
885
+ return
886
+ if not self._check_str_valid(key) or not self._check_str_valid(value):
887
+ logger.warning("Invalid input key or value. Skip this metadata.")
888
+ return
889
+ add_size = getsizeof(key) + getsizeof(value)
890
+ if getsizeof(self.metadata) + add_size < self.max_meta_size:
891
+ try:
892
+ if key in self.metadata:
893
+ logger.warning(f"{key} is already saved as metadata, override it.")
894
+ self.metadata[key] = json.loads(value)
895
+ except ValueError:
896
+ logger.warning("The metadata value must be json format string. Skip this metadata")
897
+ else:
898
+ logger.warning("Too many metadata added. Skip this metadata")
899
+
900
+ def _dump_metadata(self):
901
+ """Dump metadata to file."""
902
+ if not self.metadata:
903
+ return
904
+ FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
905
+ self.metadata.clear()
906
+
907
+ def _check_str_valid(self, input_str: str):
908
+ """Check str length"""
909
+ if len(input_str) > self.max_str_len:
910
+ return False
911
+ return True
912
+
962
913
  def _set_ascend_job_id(self, ascend_job_id):
963
914
  """Set output_path for offline parsing performance data."""
964
915
  if not ascend_job_id:
@@ -983,7 +934,7 @@ class Profiler:
983
934
  self._profile_communication = options.get('profile_communication')
984
935
  self._op_time = options.get('op_time')
985
936
  self._device_target = context.get_context("device_target").lower()
986
- self._profile_framework = options.get('profile_framework', 'all')
937
+ self._profile_framework = options.get('profile_framework', None)
987
938
  self._profiler_manager = c_expression.ProfilerManager.get_instance()
988
939
  self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
989
940
  if self._data_process:
@@ -1034,32 +985,32 @@ class Profiler:
1034
985
 
1035
986
  def _gpu_profiler_init(self, kwargs):
1036
987
  """Gpu profiler init."""
988
+ self._parse_parameter_for_gpu(kwargs)
1037
989
  # Setup and start MindData Profiling
1038
990
  if self._data_process:
1039
991
  self._md_profiler = cde.GlobalContext.profiling_manager()
1040
992
  self._md_profiler.init()
1041
- self._parse_parameter_for_gpu(kwargs)
1042
993
 
1043
994
  gpu_profiler = c_expression.Profiler
1044
995
  self._gpu_profiler = gpu_profiler.get_instance("GPU")
1045
- self._gpu_profiler.init(self._output_path)
1046
- self._gpu_profiler.sync_enable(self._sync_enable)
1047
996
  if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
1048
997
  self._dev_id = str(get_rank())
1049
998
  os.environ['DEVICE_ID'] = self._dev_id
1050
999
  self._rank_id = self._dev_id
1000
+ self._gpu_profiler.init(self._output_path, int(self._rank_id))
1001
+ self._gpu_profiler.sync_enable(self._sync_enable)
1051
1002
 
1052
1003
  def _ascend_profiler_init(self, kwargs):
1053
1004
  """Ascend profiler init."""
1005
+ self._parse_parameter_for_ascend(kwargs)
1054
1006
  # Setup and start MindData Profiling
1055
1007
  if self._data_process:
1056
1008
  self._md_profiler = cde.GlobalContext.profiling_manager()
1057
1009
  self._md_profiler.init()
1058
1010
  self._init_time = int(time.time() * 10000000)
1059
1011
  logger.info("Profiling: profiling init time: %d", self._init_time)
1060
- self._parse_parameter_for_ascend(kwargs)
1061
- os.environ['DEVICE_ID'] = self._dev_id
1062
1012
 
1013
+ os.environ['DEVICE_ID'] = self._dev_id
1063
1014
  self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
1064
1015
  # Characters longer than 2048 are ignored, resulting in profiling option resolution errors
1065
1016
  if len(self._ascend_profiling_options) > 2048:
@@ -1075,7 +1026,7 @@ class Profiler:
1075
1026
  data_path = os.path.join(container_path, "data")
1076
1027
  data_path = validate_and_normalize_path(data_path)
1077
1028
  if not os.path.exists(data_path):
1078
- os.makedirs(data_path, exist_ok=True)
1029
+ os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1079
1030
 
1080
1031
  def _construct_profiling_options(self):
1081
1032
  """
@@ -1101,9 +1052,9 @@ class Profiler:
1101
1052
  "op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1102
1053
  "profile_framework": self._profile_framework,
1103
1054
  "profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
1104
- "host_stack": "on" if self._host_stack else "off"
1055
+ "with_stack": "on" if self._with_stack else "off"
1105
1056
  }
1106
-
1057
+ ProfilerInfo.set_profiling_options(profiling_options)
1107
1058
  return profiling_options
1108
1059
 
1109
1060
  def _parse_parameter_for_gpu(self, kwargs):
@@ -1175,11 +1126,11 @@ class Profiler:
1175
1126
  pcie_enable = False
1176
1127
  self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
1177
1128
 
1178
- self._parallel_strategy = kwargs.pop("parallel_strategy", True)
1129
+ self._parallel_strategy = kwargs.pop("parallel_strategy", False)
1179
1130
  if not isinstance(self._parallel_strategy, bool):
1180
1131
  logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
1181
- f"but got type {type(self._parallel_strategy)}, it will be set to True.")
1182
- self._parallel_strategy = True
1132
+ f"but got type {type(self._parallel_strategy)}, it will be set to False.")
1133
+ self._parallel_strategy = False
1183
1134
 
1184
1135
  self.profiler_level = kwargs.pop("profiler_level", None)
1185
1136
  if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
@@ -1381,7 +1332,7 @@ class Profiler:
1381
1332
  finally:
1382
1333
  pass
1383
1334
 
1384
- def _ascend_graph_memory_analyse(self, points):
1335
+ def _ascend_graph_memory_analyse(self):
1385
1336
  """Analyse memory usage info."""
1386
1337
  if not self._profile_memory:
1387
1338
  return
@@ -1390,7 +1341,7 @@ class Profiler:
1390
1341
  "PyNative mode currently.")
1391
1342
  try:
1392
1343
  logger.info("Profiling: analyzing the memory usage info.")
1393
- self._analyse_memory_usage(points)
1344
+ self._analyse_memory_usage()
1394
1345
  except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1395
1346
  logger.warning(err.message)
1396
1347
  finally:
@@ -1408,28 +1359,37 @@ class Profiler:
1408
1359
 
1409
1360
  dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1410
1361
  ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
1411
- os.makedirs(ascend_profiler_output_path, exist_ok=True)
1362
+ PathManager.make_dir_safety(ascend_profiler_output_path)
1412
1363
 
1413
1364
  source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
1414
1365
  target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
1415
- shutil.copy(source_profiler_info_path, target_profiler_info_path)
1366
+ PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
1367
+
1368
+ source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
1369
+ target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
1370
+ PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
1416
1371
 
1417
1372
  source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
1418
1373
  target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
1419
- shutil.copy(source_timeline_path, target_timeline_path)
1374
+ PathManager.copy_file(source_timeline_path, target_timeline_path)
1420
1375
 
1421
1376
  src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
1422
- if os.path.exists(src_op_mem_file):
1423
- dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
1424
- shutil.copy(src_op_mem_file, dst_op_mem_file)
1377
+ dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
1378
+ PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
1425
1379
 
1426
- ms_output_path = os.path.abspath(
1380
+ ms_output_path = os.path.realpath(
1427
1381
  os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1428
1382
  static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
1429
1383
  src_static_op_mem_path = glob.glob(static_op_mem_path)
1430
1384
  if src_static_op_mem_path:
1431
1385
  dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
1432
- shutil.copy(src_static_op_mem_path[0], dst_static_op_mem_file)
1386
+ PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
1387
+
1388
+ src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
1389
+ src_op_statistics_path = glob.glob(src_op_statistics_path)
1390
+ if src_op_statistics_path:
1391
+ dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
1392
+ PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
1433
1393
 
1434
1394
  self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
1435
1395
  self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
@@ -1468,7 +1428,7 @@ class Profiler:
1468
1428
  f"communication_matrix.json")
1469
1429
  communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
1470
1430
 
1471
- analyze_path = os.path.abspath(os.path.join(source_path, os.path.pardir, 'analyze'))
1431
+ analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
1472
1432
  communicate_analyser = AscendCommunicationGenerator(analyze_path)
1473
1433
  communicate_analyser.parse()
1474
1434
  communicate_analyser.write(communication_file_path, communication_matrix_file_path)
@@ -1500,26 +1460,6 @@ class Profiler:
1500
1460
  finally:
1501
1461
  pass
1502
1462
 
1503
- def _ascend_graph_msadvisor_analyse(self, job_id):
1504
- """Call MSAdvisor function."""
1505
- logger.info("MSAdvisor starts running.")
1506
- msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
1507
- try:
1508
- msadvisor.analyse()
1509
- except FileNotFoundError as err:
1510
- logger.warning("MSAdvisor: command not found,"
1511
- "please check if installed ascend-toolkit and set environment path correctly. %s", err)
1512
- except OSError as err:
1513
- logger.warning("Cannot execute binary file: Exec format error. %s", err)
1514
- except subprocess.CalledProcessError:
1515
- logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
1516
- except (ValueError, ProfilerFileNotFoundException) as err:
1517
- logger.warning("MSAdvisor running failed. %s", err)
1518
- finally:
1519
- pass
1520
- if context.get_context("mode") == context.PYNATIVE_MODE:
1521
- logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
1522
-
1523
1463
  def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
1524
1464
  """Get the mapping between framework operator and device kernel."""
1525
1465
  if not kernels:
@@ -1535,8 +1475,6 @@ class Profiler:
1535
1475
  key = name if name.startswith("hcom_") else (name, ts)
1536
1476
  launch_op = kernel_map.get(key)
1537
1477
  if not launch_op:
1538
- if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
1539
- logger.warning(f"Failed to get launch operator for {name}!")
1540
1478
  continue
1541
1479
  launch_ops[index] = launch_op.name
1542
1480
  return launch_ops
@@ -1547,6 +1485,7 @@ class Profiler:
1547
1485
  else:
1548
1486
  MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
1549
1487
 
1488
+ @timeit("Profiler analyse done")
1550
1489
  def _ascend_graph_analyse_inner(self, offline_path=None):
1551
1490
  """Ascend graph mode analyse."""
1552
1491
  job_id = self._get_profiling_job_id(offline_path)
@@ -1558,7 +1497,7 @@ class Profiler:
1558
1497
  source_path = os.path.join(self._output_path, job_id)
1559
1498
  self._minddata_analyse()
1560
1499
  if self._op_time:
1561
- mindstudio_profiler_output = os.path.abspath(
1500
+ mindstudio_profiler_output = os.path.realpath(
1562
1501
  os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1563
1502
  flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
1564
1503
  if not flag:
@@ -1567,14 +1506,17 @@ class Profiler:
1567
1506
  ProfilerInfo.set_export_flag(flag)
1568
1507
  op_summary, op_statistic, steptrace, steptrace_model \
1569
1508
  = _ascend_graph_msprof_analyse(mindstudio_profiler_output)
1509
+ kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
1510
+
1570
1511
  if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
1571
1512
  not isinstance(op_statistic, np.ndarray) and not op_statistic:
1513
+ logger.warning('Op statistic data is empty!')
1572
1514
  return
1573
- kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
1515
+
1574
1516
  launch_ops = self._get_kernel_op_map(op_summary, kernels)
1575
1517
  self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
1576
1518
  graph_ids = np.unique(op_summary['Model ID']).tolist()
1577
- points = self._ascend_fpbp_analyse(op_summary, steptrace)
1519
+ self._ascend_fpbp_analyse(op_summary, steptrace)
1578
1520
  if len(graph_ids) == 1:
1579
1521
  self._ascend_step_trace_analyse(steptrace)
1580
1522
  else:
@@ -1582,13 +1524,13 @@ class Profiler:
1582
1524
  if self._dynamic_status:
1583
1525
  self._ascend_dynamic_net_analyse(op_summary)
1584
1526
  self._ascend_flops_analyse(op_summary, launch_ops)
1585
- self._ascend_graph_memory_analyse(points)
1527
+ self._ascend_graph_memory_analyse()
1586
1528
  self._ascend_ms_analyze(mindstudio_profiler_output)
1587
1529
  self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
1588
- self._ascend_graph_msadvisor_analyse(job_id)
1589
1530
  self._minddata_aicpu_analyse(self._output_path, job_id)
1590
1531
  ProfilerInfo.set_graph_ids(graph_ids)
1591
1532
  try:
1533
+ ProfilerInfo.set_data_simplification(self._data_simplification)
1592
1534
  ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
1593
1535
  except RuntimeError as err:
1594
1536
  logger.error('Profilier simplify data failed, %s', str(err))
@@ -1690,7 +1632,7 @@ class Profiler:
1690
1632
  try:
1691
1633
  timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
1692
1634
  timeline_generator.init_timeline(pretty=self._pretty_json)
1693
- timeline_generator.write_timeline(self._timeline_size_limit_byte)
1635
+ timeline_generator.write_timeline()
1694
1636
  timeline_generator.write_timeline_summary()
1695
1637
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1696
1638
  logger.warning('Fail to write timeline data: %s', err)
@@ -1699,15 +1641,13 @@ class Profiler:
1699
1641
  raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
1700
1642
  "data.")
1701
1643
 
1702
- def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
1703
- is_gpu_kernel_async_launch_flag=False):
1644
+ def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
1704
1645
  """
1705
1646
  Analyse step trace data and save the result.
1706
1647
 
1707
1648
  Args:
1708
- source_path (str): The directory that contains the step trace original data.
1709
- framework_parser (FrameworkParser): The framework parse instance.
1710
1649
  is_training_mode_flag (bool): Whether in training mode or not.
1650
+ is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
1711
1651
  """
1712
1652
  logger.info("Begin to parse step trace.")
1713
1653
  # construct output path
@@ -1738,56 +1678,31 @@ class Profiler:
1738
1678
  logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1739
1679
  logger.info("The point info is: %s", point_info)
1740
1680
 
1741
- return point_info, is_training_mode_flag
1742
- return {}, is_training_mode_flag
1743
-
1744
- # whether keep the first step
1745
- skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
1746
- # recognize inference or training mode
1747
- is_training_mode_flag = framework_parser.check_op_name("Gradients")
1748
- # parser the step trace files and save the result to disk
1749
- source_path = validate_and_normalize_path(source_path)
1750
- parser = AscendStepTraceParser(input_dir=source_path,
1751
- output_file_path=step_trace_intermediate_file_path,
1752
- skip_first_step=skip_first_step_flag,
1753
- is_training_mode=is_training_mode_flag)
1754
- parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
1755
- parser.parse_and_save()
1756
- point_info = parser.record_point_info(point_info_file_path)
1757
-
1758
- # print parser result
1759
- parser.show()
1760
- logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1761
- logger.info("The point info is: %s", point_info)
1762
-
1763
- return point_info, is_training_mode_flag
1764
-
1765
1681
  def _generate_timeline(self, reduce_op_type):
1766
1682
  """Used for gpu, generate timeline info, write to json format file."""
1767
1683
  try:
1768
1684
  timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
1769
1685
  context.get_context("mode"))
1770
1686
  timeline_generator.init_timeline(reduce_op_type)
1771
- self._timeline_meta = timeline_generator.write_timeline(self._timeline_size_limit_byte)
1687
+ self._timeline_meta = timeline_generator.write_timeline()
1772
1688
  timeline_generator.write_timeline_summary()
1689
+ timeline_generator.parse_fwk_data()
1690
+ timeline_generator.write_fwk_timeline()
1773
1691
  return timeline_generator
1774
1692
  except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1775
1693
  logger.warning('Fail to write timeline data: %s', err)
1776
1694
  raise RuntimeError('Fail to write timeline data.') from err
1777
1695
 
1778
- def _analyse_memory_usage(self, points):
1696
+ def _analyse_memory_usage(self):
1779
1697
  """Analyse memory usage data."""
1780
1698
  integrator = Integrator(self._output_path, self._rank_id)
1781
- aicore_detail_data = integrator.get_aicore_detail_data()
1782
- memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
1783
- memory_parser.init_memory_usage_info(aicore_detail_data, points)
1784
- memory_parser.write_memory_files()
1699
+ integrator.get_aicore_detail_data()
1785
1700
 
1786
1701
  def _get_profiling_job_id(self, offline_path):
1787
1702
  """Get profiling job id, which was generated by ada service.
1788
1703
 
1789
1704
  Returns:
1790
- str, profiling job id.
1705
+ str, profiling job id, eg: PROF_XXX/device_*.
1791
1706
  """
1792
1707
 
1793
1708
  if offline_path:
@@ -1816,18 +1731,17 @@ class Profiler:
1816
1731
  "profiler will ignore this job dir.", job_dir)
1817
1732
  continue
1818
1733
 
1819
- prof_rank_id, prof_device_id = self._parse_info_json(info_file_path)
1734
+ prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
1735
+ prof_device_id = ProfilerInfo.get_device_id(prof_dir)
1820
1736
  job_start_time = self._parse_job_start_time(prof_dir)
1821
1737
 
1822
1738
  if offline_path:
1823
- if self._rank_id != prof_rank_id:
1824
- continue
1825
1739
  self._start_time = int(job_start_time)
1826
1740
  else:
1827
1741
  if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
1828
- logger.debug("Find profiling find job path %s, but not current training device id. "
1829
- "Current training rank id %s, but job path rank id: %s, "
1830
- "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
1742
+ logger.warning("Find profiling find job path %s, but not current training device id. "
1743
+ "Current training rank id %s, but job path rank id: %s, "
1744
+ "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
1831
1745
  continue
1832
1746
 
1833
1747
  if job_start_time < self._start_time:
@@ -1936,19 +1850,21 @@ class Profiler:
1936
1850
  self._output_path = validate_and_normalize_path(output_path)
1937
1851
  else:
1938
1852
  output_path = kwargs.pop("output_path")
1853
+ if not isinstance(output_path, str):
1854
+ logger.warning(
1855
+ f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
1856
+ output_path = "data"
1939
1857
  self._output_path = validate_and_normalize_path(output_path)
1940
1858
 
1941
1859
  self._output_path = os.path.join(self._output_path, "profiler")
1942
1860
  if not os.path.exists(self._output_path):
1943
- os.makedirs(self._output_path, exist_ok=True)
1944
- os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1861
+ os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1945
1862
  else:
1946
1863
  logger.warning("The target dir already exists. "
1947
1864
  "There may be some old profiling data, and they will be rewritten in the end.")
1948
1865
  self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
1949
1866
  if not os.path.exists(self._framework_path):
1950
- os.makedirs(self._framework_path, exist_ok=True)
1951
- os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1867
+ os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1952
1868
 
1953
1869
  def _parser_kwargs(self, kwargs):
1954
1870
  """Parse kwargs vale."""
@@ -1969,11 +1885,11 @@ class Profiler:
1969
1885
  f"but got type {type(self._op_time)}, it will be set to True.")
1970
1886
  self._op_time = True
1971
1887
 
1972
- self._data_process = kwargs.pop("data_process", True)
1888
+ self._data_process = kwargs.pop("data_process", False)
1973
1889
  if not isinstance(self._data_process, bool):
1974
1890
  logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
1975
- f"but got type {type(self._data_process)}, it will be set to True.")
1976
- self._data_process = True
1891
+ f"but got type {type(self._data_process)}, it will be set to False.")
1892
+ self._data_process = False
1977
1893
 
1978
1894
  timeline_limit = kwargs.pop("timeline_limit", 500)
1979
1895
  if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
@@ -1985,70 +1901,22 @@ class Profiler:
1985
1901
  "[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
1986
1902
  timeline_limit = 500
1987
1903
  self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
1988
- self._profile_framework = kwargs.pop("profile_framework", "all")
1989
- if self._profile_framework not in ["memory", "time", "all", None]:
1990
- logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
1991
- f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
1992
- self._profile_framework = "all"
1993
- if not isinstance(self._data_simplification, bool):
1994
- logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
1995
- f"but got type {type(self._data_simplification)}, it will be set to True.")
1996
- self._data_simplification = True
1904
+ self._profile_framework = kwargs.pop("profile_framework", None)
1905
+ if self._profile_framework not in ["time", "all", None]:
1906
+ logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
1907
+ f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
1908
+ self._profile_framework = None
1997
1909
 
1998
1910
  if not isinstance(self._data_simplification, bool):
1999
1911
  logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
2000
1912
  f"but got type {type(self._data_simplification)}, it will be set to True.")
2001
1913
  self._data_simplification = True
2002
1914
 
2003
- self._host_stack = kwargs.pop("host_stack", True)
2004
- if not isinstance(self._host_stack, bool):
2005
- logger.warning(f"For '{self.__class__.__name__}', the parameter host_stack must be bool, but got "
2006
- f"type {type(self._host_stack)}, it will be set to True.")
2007
- self._host_stack = True
2008
-
2009
- def _host_info_analyse(self):
2010
- """
2011
- Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
2012
- """
2013
- logger.info("Profiling HostInfo start.")
2014
- host_dir = os.path.join(self._output_path, 'host_info')
2015
- host_dir = validate_and_normalize_path(host_dir)
2016
- if not os.path.exists(host_dir):
2017
- logger.warning("Host info directory: %s not exist.", host_dir)
2018
- return
2019
- csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
2020
- json_file_name = 'timeline_' + str(self._rank_id) + '.json'
2021
- memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
2022
- dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
2023
- host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
2024
- timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
2025
- memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
2026
- dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
2027
- _parse_host_info(host_info_file, timeline_file, memory_file)
2028
- _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
2029
- logger.info("Profile HostInfo finished.")
2030
-
2031
-
2032
- def _offline_parse(offline_path):
2033
- """Parse data in abnormal scenario, only support for host_info at present."""
2034
- logger.info("Profiling HostInfo offline start.")
2035
- host_dir = os.path.join(offline_path, 'profiler', 'host_info')
2036
- host_dir = validate_and_normalize_path(host_dir)
2037
- if not os.path.exists(host_dir):
2038
- logger.warning("Host info directory: %s not exist.", host_dir)
2039
- return
2040
- files = os.listdir(host_dir)
2041
- for file in files:
2042
- if not file.startswith("host_info_") or not file.endswith(".csv"):
2043
- continue
2044
- rank_id = file.split('_')[-1].split('.')[0]
2045
- if not rank_id.isdigit():
2046
- logger.info("Cannot get rank_id from file: %s, skip it", file)
2047
- return
2048
- host_info_file = os.path.join(host_dir, file)
2049
- timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
2050
- memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
2051
- dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
2052
- _parse_host_info(host_info_file, timeline_file, memory_file)
2053
- _calculate_dataset_execution_time(host_info_file, dataset_execution_file)
2054
- logger.info("Profile HostInfo offline finished.")
1915
+ self._with_stack = kwargs.pop("with_stack", False)
1916
+ if not isinstance(self._with_stack, bool):
1917
+ logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
1918
+ f"type {type(self._with_stack)}, it will be set to False.")
1919
+ self._with_stack = False
1920
+ if self._with_stack and self._profile_framework not in ["time", "all"]:
1921
+ logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
1922
+ self._with_stack = False