mindspore 2.4.10__cp39-cp39-win_amd64.whl → 2.6.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (579) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +13 -6
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +3 -0
  7. mindspore/_checkparam.py +3 -38
  8. mindspore/_deprecated/__init__.py +17 -0
  9. mindspore/_deprecated/jit.py +198 -0
  10. mindspore/_extends/builtin_operations.py +1 -1
  11. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  12. mindspore/_extends/parse/__init__.py +6 -7
  13. mindspore/_extends/parse/compile_config.py +83 -0
  14. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  15. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  16. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  17. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  18. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  19. mindspore/_extends/parse/parser.py +47 -198
  20. mindspore/_extends/parse/resources.py +1 -5
  21. mindspore/_extends/parse/standard_method.py +229 -99
  22. mindspore/_extends/pijit/__init__.py +2 -2
  23. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  24. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  25. mindspore/_extends/utils.py +1 -1
  26. mindspore/amp.py +11 -5
  27. mindspore/avcodec-59.dll +0 -0
  28. mindspore/avdevice-59.dll +0 -0
  29. mindspore/avfilter-8.dll +0 -0
  30. mindspore/avformat-59.dll +0 -0
  31. mindspore/avutil-57.dll +0 -0
  32. mindspore/boost/__init__.py +2 -2
  33. mindspore/boost/base.py +3 -7
  34. mindspore/boost/boost_cell_wrapper.py +138 -43
  35. mindspore/common/__init__.py +6 -3
  36. mindspore/common/_grad_function.py +56 -0
  37. mindspore/common/_pijit_context.py +14 -5
  38. mindspore/common/_register_for_tensor.py +1 -2
  39. mindspore/common/_stub_tensor.py +30 -14
  40. mindspore/common/_tensor_cpp_method.py +17 -0
  41. mindspore/common/_tensor_docs.py +4760 -0
  42. mindspore/common/api.py +480 -372
  43. mindspore/common/auto_dynamic_shape.py +41 -44
  44. mindspore/common/dtype.py +39 -36
  45. mindspore/common/dump.py +9 -6
  46. mindspore/common/file_system.py +9 -1
  47. mindspore/common/generator.py +5 -0
  48. mindspore/common/hook_handle.py +6 -2
  49. mindspore/common/initializer.py +13 -10
  50. mindspore/common/jit_begin_end.py +94 -0
  51. mindspore/common/jit_config.py +6 -1
  52. mindspore/common/jit_context.py +76 -0
  53. mindspore/common/jit_trace.py +378 -0
  54. mindspore/common/lazy_inline.py +9 -3
  55. mindspore/common/mindir_util.py +10 -2
  56. mindspore/common/mutable.py +5 -4
  57. mindspore/common/parameter.py +135 -52
  58. mindspore/common/seed.py +2 -2
  59. mindspore/common/sparse_tensor.py +23 -17
  60. mindspore/common/tensor.py +975 -1981
  61. mindspore/communication/__init__.py +7 -5
  62. mindspore/communication/_comm_helper.py +52 -2
  63. mindspore/communication/comm_func.py +240 -181
  64. mindspore/communication/management.py +95 -26
  65. mindspore/context.py +324 -573
  66. mindspore/dataset/__init__.py +65 -37
  67. mindspore/dataset/audio/__init__.py +2 -8
  68. mindspore/dataset/audio/transforms.py +3 -17
  69. mindspore/dataset/callback/ds_callback.py +2 -1
  70. mindspore/dataset/core/config.py +87 -6
  71. mindspore/dataset/engine/cache_admin.py +3 -3
  72. mindspore/dataset/engine/cache_client.py +6 -5
  73. mindspore/dataset/engine/datasets.py +292 -267
  74. mindspore/dataset/engine/datasets_audio.py +22 -8
  75. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  76. mindspore/dataset/engine/datasets_text.py +78 -48
  77. mindspore/dataset/engine/datasets_user_defined.py +183 -117
  78. mindspore/dataset/engine/datasets_vision.py +120 -44
  79. mindspore/dataset/engine/iterators.py +283 -63
  80. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  81. mindspore/dataset/engine/obs/util.py +8 -0
  82. mindspore/dataset/engine/queue.py +40 -0
  83. mindspore/dataset/engine/samplers.py +289 -43
  84. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  85. mindspore/dataset/engine/validators.py +53 -11
  86. mindspore/dataset/text/__init__.py +7 -6
  87. mindspore/dataset/text/transforms.py +6 -5
  88. mindspore/dataset/text/utils.py +3 -3
  89. mindspore/dataset/transforms/__init__.py +0 -9
  90. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  91. mindspore/dataset/transforms/transforms.py +31 -14
  92. mindspore/dataset/utils/browse_dataset.py +1 -1
  93. mindspore/dataset/vision/__init__.py +2 -9
  94. mindspore/dataset/vision/transforms.py +202 -158
  95. mindspore/dataset/vision/utils.py +7 -5
  96. mindspore/dataset/vision/validators.py +1 -2
  97. mindspore/device_context/__init__.py +21 -0
  98. mindspore/device_context/ascend/__init__.py +25 -0
  99. mindspore/device_context/ascend/device.py +72 -0
  100. mindspore/device_context/ascend/op_debug.py +153 -0
  101. mindspore/device_context/ascend/op_precision.py +193 -0
  102. mindspore/device_context/ascend/op_tuning.py +123 -0
  103. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  104. mindspore/device_context/cpu/device.py +62 -0
  105. mindspore/device_context/cpu/op_tuning.py +43 -0
  106. mindspore/device_context/gpu/__init__.py +21 -0
  107. mindspore/device_context/gpu/device.py +70 -0
  108. mindspore/device_context/gpu/op_precision.py +67 -0
  109. mindspore/device_context/gpu/op_tuning.py +175 -0
  110. mindspore/device_manager.py +170 -0
  111. mindspore/dnnl.dll +0 -0
  112. mindspore/experimental/es/embedding_service.py +35 -27
  113. mindspore/experimental/llm_boost/__init__.py +1 -0
  114. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  115. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +209 -0
  116. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  117. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  118. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  119. mindspore/experimental/llm_boost/register.py +1 -0
  120. mindspore/experimental/map_parameter.py +4 -4
  121. mindspore/experimental/optim/adadelta.py +6 -6
  122. mindspore/experimental/optim/adagrad.py +4 -4
  123. mindspore/experimental/optim/adam.py +7 -0
  124. mindspore/experimental/optim/adamax.py +4 -4
  125. mindspore/experimental/optim/adamw.py +4 -0
  126. mindspore/experimental/optim/asgd.py +1 -1
  127. mindspore/experimental/optim/lr_scheduler.py +73 -46
  128. mindspore/experimental/optim/radam.py +34 -31
  129. mindspore/experimental/optim/rprop.py +1 -1
  130. mindspore/experimental/optim/sgd.py +1 -1
  131. mindspore/hal/contiguous_tensors_handle.py +6 -10
  132. mindspore/hal/device.py +55 -53
  133. mindspore/hal/event.py +52 -52
  134. mindspore/hal/memory.py +179 -120
  135. mindspore/hal/stream.py +150 -109
  136. mindspore/include/api/context.h +0 -1
  137. mindspore/include/dataset/constants.h +7 -4
  138. mindspore/include/dataset/execute.h +2 -2
  139. mindspore/jpeg62.dll +0 -0
  140. mindspore/log.py +50 -0
  141. mindspore/mindrecord/__init__.py +21 -8
  142. mindspore/mindrecord/config.py +17 -316
  143. mindspore/mindrecord/filereader.py +1 -9
  144. mindspore/mindrecord/filewriter.py +5 -15
  145. mindspore/mindrecord/mindpage.py +1 -9
  146. mindspore/mindspore_backend_common.dll +0 -0
  147. mindspore/mindspore_backend_manager.dll +0 -0
  148. mindspore/mindspore_common.dll +0 -0
  149. mindspore/mindspore_core.dll +0 -0
  150. mindspore/mindspore_dump.dll +0 -0
  151. mindspore/mindspore_frontend.dll +0 -0
  152. mindspore/mindspore_glog.dll +0 -0
  153. mindspore/mindspore_memory_pool.dll +0 -0
  154. mindspore/mindspore_ms_backend.dll +0 -0
  155. mindspore/mindspore_ops.dll +0 -0
  156. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  157. mindspore/mindspore_ops_kernel_common.dll +0 -0
  158. mindspore/mindspore_profiler.dll +0 -0
  159. mindspore/mindspore_pyboost.dll +0 -0
  160. mindspore/mindspore_pynative.dll +0 -0
  161. mindspore/mindspore_res_manager.dll +0 -0
  162. mindspore/mindspore_runtime_pipeline.dll +0 -0
  163. mindspore/mint/__init__.py +798 -761
  164. mindspore/mint/distributed/__init__.py +70 -4
  165. mindspore/mint/distributed/distributed.py +2679 -44
  166. mindspore/mint/linalg/__init__.py +8 -0
  167. mindspore/mint/nn/__init__.py +743 -22
  168. mindspore/mint/nn/functional.py +716 -23
  169. mindspore/mint/nn/layer/__init__.py +21 -4
  170. mindspore/mint/nn/layer/_functions.py +334 -0
  171. mindspore/mint/nn/layer/activation.py +276 -1
  172. mindspore/mint/nn/layer/basic.py +123 -0
  173. mindspore/mint/nn/layer/conv.py +933 -0
  174. mindspore/mint/nn/layer/normalization.py +223 -28
  175. mindspore/mint/nn/layer/padding.py +797 -0
  176. mindspore/mint/nn/layer/pooling.py +235 -0
  177. mindspore/mint/optim/__init__.py +3 -1
  178. mindspore/mint/optim/adam.py +223 -0
  179. mindspore/mint/optim/adamw.py +26 -19
  180. mindspore/mint/optim/sgd.py +171 -0
  181. mindspore/mint/special/__init__.py +2 -1
  182. mindspore/multiprocessing/__init__.py +5 -0
  183. mindspore/nn/__init__.py +4 -1
  184. mindspore/nn/cell.py +1373 -192
  185. mindspore/nn/dynamic_lr.py +2 -1
  186. mindspore/nn/layer/activation.py +29 -27
  187. mindspore/nn/layer/basic.py +51 -35
  188. mindspore/nn/layer/channel_shuffle.py +3 -3
  189. mindspore/nn/layer/container.py +1 -1
  190. mindspore/nn/layer/conv.py +53 -42
  191. mindspore/nn/layer/embedding.py +12 -11
  192. mindspore/nn/layer/normalization.py +56 -49
  193. mindspore/nn/layer/padding.py +4 -3
  194. mindspore/nn/layer/pooling.py +120 -42
  195. mindspore/nn/layer/rnn_cells.py +1 -1
  196. mindspore/nn/layer/rnns.py +2 -1
  197. mindspore/nn/layer/timedistributed.py +5 -5
  198. mindspore/nn/layer/transformer.py +59 -36
  199. mindspore/nn/learning_rate_schedule.py +8 -4
  200. mindspore/nn/loss/loss.py +58 -55
  201. mindspore/nn/optim/ada_grad.py +7 -5
  202. mindspore/nn/optim/adadelta.py +11 -9
  203. mindspore/nn/optim/adafactor.py +1 -1
  204. mindspore/nn/optim/adam.py +19 -15
  205. mindspore/nn/optim/adamax.py +8 -7
  206. mindspore/nn/optim/adasum.py +5 -5
  207. mindspore/nn/optim/asgd.py +3 -1
  208. mindspore/nn/optim/ftrl.py +11 -9
  209. mindspore/nn/optim/lamb.py +1 -1
  210. mindspore/nn/optim/lars.py +1 -4
  211. mindspore/nn/optim/lazyadam.py +12 -10
  212. mindspore/nn/optim/momentum.py +7 -6
  213. mindspore/nn/optim/optimizer.py +3 -3
  214. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  215. mindspore/nn/optim/rmsprop.py +13 -12
  216. mindspore/nn/optim/rprop.py +11 -9
  217. mindspore/nn/optim/sgd.py +9 -6
  218. mindspore/nn/optim/tft_wrapper.py +5 -2
  219. mindspore/nn/optim/thor.py +2 -1
  220. mindspore/nn/probability/bijector/bijector.py +17 -11
  221. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  222. mindspore/nn/probability/bijector/invert.py +2 -2
  223. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  224. mindspore/nn/probability/bijector/softplus.py +3 -2
  225. mindspore/nn/probability/distribution/beta.py +3 -3
  226. mindspore/nn/probability/distribution/categorical.py +1 -1
  227. mindspore/nn/probability/distribution/cauchy.py +4 -2
  228. mindspore/nn/probability/distribution/exponential.py +6 -7
  229. mindspore/nn/probability/distribution/gamma.py +2 -2
  230. mindspore/nn/probability/distribution/gumbel.py +2 -2
  231. mindspore/nn/probability/distribution/half_normal.py +5 -3
  232. mindspore/nn/probability/distribution/logistic.py +5 -3
  233. mindspore/nn/probability/distribution/poisson.py +1 -1
  234. mindspore/nn/probability/distribution/uniform.py +5 -3
  235. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  236. mindspore/nn/reinforcement/tensor_array.py +1 -1
  237. mindspore/nn/utils/init.py +13 -11
  238. mindspore/nn/wrap/__init__.py +6 -6
  239. mindspore/nn/wrap/cell_wrapper.py +181 -122
  240. mindspore/nn/wrap/grad_reducer.py +45 -36
  241. mindspore/nn/wrap/loss_scale.py +6 -7
  242. mindspore/numpy/array_creations.py +63 -65
  243. mindspore/numpy/array_ops.py +149 -144
  244. mindspore/numpy/logic_ops.py +41 -42
  245. mindspore/numpy/math_ops.py +361 -359
  246. mindspore/numpy/utils.py +17 -18
  247. mindspore/numpy/utils_const.py +5 -6
  248. mindspore/opencv_core452.dll +0 -0
  249. mindspore/opencv_imgcodecs452.dll +0 -0
  250. mindspore/opencv_imgproc452.dll +0 -0
  251. mindspore/ops/__init__.py +5 -3
  252. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  253. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  254. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  255. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  256. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  257. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  258. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  259. mindspore/ops/_register_for_op.py +0 -11
  260. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  261. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  262. mindspore/ops/_vmap/vmap_array_ops.py +52 -25
  263. mindspore/ops/_vmap/vmap_base.py +0 -2
  264. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  265. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  266. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  267. mindspore/ops/auto_generate/__init__.py +4 -3
  268. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +258 -46
  269. mindspore/ops/auto_generate/gen_extend_func.py +757 -185
  270. mindspore/ops/auto_generate/gen_ops_def.py +4197 -2243
  271. mindspore/ops/auto_generate/gen_ops_prim.py +16976 -6055
  272. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  273. mindspore/ops/composite/__init__.py +2 -1
  274. mindspore/ops/composite/base.py +20 -25
  275. mindspore/ops/composite/math_ops.py +6 -16
  276. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  277. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  278. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  279. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  280. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  281. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  282. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  283. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  284. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  285. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  286. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  287. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  288. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  289. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  290. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  291. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  292. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  293. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  294. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  295. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  296. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  297. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  299. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  301. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  302. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  303. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  304. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  305. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  306. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  307. mindspore/ops/function/__init__.py +40 -2
  308. mindspore/ops/function/_add_attr_func.py +58 -0
  309. mindspore/ops/function/array_func.py +2089 -2403
  310. mindspore/ops/function/clip_func.py +80 -23
  311. mindspore/ops/function/debug_func.py +57 -57
  312. mindspore/ops/function/grad/__init__.py +1 -0
  313. mindspore/ops/function/grad/grad_func.py +104 -71
  314. mindspore/ops/function/image_func.py +2 -2
  315. mindspore/ops/function/linalg_func.py +47 -78
  316. mindspore/ops/function/math_func.py +4351 -3813
  317. mindspore/ops/function/nn_func.py +1712 -637
  318. mindspore/ops/function/other_func.py +159 -1
  319. mindspore/ops/function/parameter_func.py +18 -84
  320. mindspore/ops/function/random_func.py +452 -387
  321. mindspore/ops/function/reshard_func.py +4 -70
  322. mindspore/ops/function/sparse_func.py +3 -3
  323. mindspore/ops/function/sparse_unary_func.py +6 -6
  324. mindspore/ops/function/spectral_func.py +25 -58
  325. mindspore/ops/function/vmap_func.py +26 -18
  326. mindspore/ops/functional.py +23 -7
  327. mindspore/ops/functional_overload.py +1548 -0
  328. mindspore/ops/op_info_register.py +32 -244
  329. mindspore/ops/operations/__init__.py +23 -15
  330. mindspore/ops/operations/_custom_ops_utils.py +235 -0
  331. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  332. mindspore/ops/operations/_grad_ops.py +2 -43
  333. mindspore/ops/operations/_infer_ops.py +2 -1
  334. mindspore/ops/operations/_inner_ops.py +43 -84
  335. mindspore/ops/operations/_ms_kernel.py +4 -10
  336. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  337. mindspore/ops/operations/_scalar_ops.py +3 -2
  338. mindspore/ops/operations/_sequence_ops.py +1 -1
  339. mindspore/ops/operations/_tensor_array.py +1 -1
  340. mindspore/ops/operations/array_ops.py +81 -324
  341. mindspore/ops/operations/comm_ops.py +154 -108
  342. mindspore/ops/operations/custom_ops.py +298 -87
  343. mindspore/ops/operations/debug_ops.py +157 -59
  344. mindspore/ops/operations/inner_ops.py +7 -5
  345. mindspore/ops/operations/linalg_ops.py +1 -57
  346. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  347. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  348. mindspore/ops/operations/math_ops.py +32 -234
  349. mindspore/ops/operations/nn_ops.py +212 -531
  350. mindspore/ops/operations/other_ops.py +62 -9
  351. mindspore/ops/operations/random_ops.py +13 -7
  352. mindspore/ops/operations/reshard_ops.py +1 -1
  353. mindspore/ops/operations/sparse_ops.py +2 -2
  354. mindspore/ops/primitive.py +66 -53
  355. mindspore/ops/tensor_method.py +1895 -0
  356. mindspore/ops_generate/__init__.py +0 -5
  357. mindspore/ops_generate/aclnn/__init__.py +0 -0
  358. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  359. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  360. mindspore/ops_generate/api/__init__.py +0 -0
  361. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  362. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  363. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  364. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  365. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  366. mindspore/ops_generate/api/gen_api.py +103 -0
  367. mindspore/ops_generate/api/op_api_proto.py +235 -0
  368. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  369. mindspore/ops_generate/common/__init__.py +0 -0
  370. mindspore/ops_generate/common/base_generator.py +11 -0
  371. mindspore/ops_generate/common/gen_constants.py +91 -0
  372. mindspore/ops_generate/common/gen_utils.py +348 -0
  373. mindspore/ops_generate/common/op_proto.py +473 -0
  374. mindspore/ops_generate/common/template.py +523 -0
  375. mindspore/ops_generate/gen_ops.py +22 -1069
  376. mindspore/ops_generate/op_def/__init__.py +0 -0
  377. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  378. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  379. mindspore/ops_generate/op_def/ops_def_cc_generator.py +296 -0
  380. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  381. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  382. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  383. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  384. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  385. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  386. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  387. mindspore/ops_generate/pyboost/__init__.py +0 -0
  388. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  389. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  390. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  391. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  392. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  393. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  394. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  395. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  396. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  397. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  398. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  399. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  400. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  401. mindspore/ops_generate/resources/__init__.py +0 -0
  402. mindspore/ops_generate/resources/resource_list.py +30 -0
  403. mindspore/ops_generate/resources/resource_loader.py +36 -0
  404. mindspore/ops_generate/resources/resource_manager.py +64 -0
  405. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  406. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  407. mindspore/parallel/__init__.py +7 -3
  408. mindspore/parallel/_auto_parallel_context.py +159 -40
  409. mindspore/parallel/_cell_wrapper.py +132 -15
  410. mindspore/parallel/_parallel_serialization.py +107 -5
  411. mindspore/parallel/_ps_context.py +1 -1
  412. mindspore/parallel/_recovery_context.py +7 -2
  413. mindspore/parallel/_tensor.py +142 -18
  414. mindspore/parallel/_utils.py +199 -23
  415. mindspore/parallel/algo_parameter_config.py +4 -4
  416. mindspore/parallel/auto_parallel.py +732 -0
  417. mindspore/parallel/checkpoint_convert.py +159 -0
  418. mindspore/parallel/checkpoint_transform.py +700 -35
  419. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  420. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  421. mindspore/parallel/cluster/run.py +21 -4
  422. mindspore/parallel/function/__init__.py +24 -0
  423. mindspore/parallel/function/reshard_func.py +258 -0
  424. mindspore/parallel/nn/__init__.py +25 -0
  425. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  426. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  427. mindspore/parallel/parameter_broadcast.py +25 -14
  428. mindspore/parallel/shard.py +137 -59
  429. mindspore/parallel/transform_safetensors.py +364 -305
  430. mindspore/profiler/__init__.py +22 -5
  431. mindspore/profiler/analysis/__init__.py +0 -0
  432. mindspore/profiler/analysis/parser/__init__.py +0 -0
  433. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  434. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  435. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  436. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  437. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  438. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  439. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  440. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  441. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +109 -0
  442. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  443. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  444. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  445. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  446. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  447. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  448. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  449. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  450. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  451. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  452. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  453. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  454. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  455. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  456. mindspore/profiler/analysis/task_manager.py +131 -0
  457. mindspore/profiler/analysis/time_converter.py +84 -0
  458. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  459. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  460. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  461. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  462. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  463. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  464. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  465. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  466. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  467. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  468. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  469. mindspore/profiler/analysis/work_flow.py +73 -0
  470. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  471. mindspore/profiler/common/command_executor.py +90 -0
  472. mindspore/profiler/common/constant.py +186 -3
  473. mindspore/profiler/common/file_manager.py +208 -0
  474. mindspore/profiler/common/log.py +130 -0
  475. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  476. mindspore/profiler/common/path_manager.py +395 -0
  477. mindspore/profiler/common/process_bar.py +168 -0
  478. mindspore/profiler/common/process_pool.py +9 -3
  479. mindspore/profiler/common/profiler_context.py +500 -0
  480. mindspore/profiler/common/profiler_info.py +304 -0
  481. mindspore/profiler/common/profiler_meta_data.py +74 -0
  482. mindspore/profiler/common/profiler_output_path.py +284 -0
  483. mindspore/profiler/common/profiler_parameters.py +251 -0
  484. mindspore/profiler/common/profiler_path_manager.py +179 -0
  485. mindspore/profiler/common/record_function.py +76 -0
  486. mindspore/profiler/common/tlv_decoder.py +76 -0
  487. mindspore/profiler/common/util.py +75 -2
  488. mindspore/profiler/dynamic_profiler.py +341 -75
  489. mindspore/profiler/envprofiler.py +163 -0
  490. mindspore/profiler/experimental_config.py +197 -0
  491. mindspore/profiler/mstx.py +242 -0
  492. mindspore/profiler/platform/__init__.py +21 -0
  493. mindspore/profiler/platform/base_profiler.py +40 -0
  494. mindspore/profiler/platform/cpu_profiler.py +124 -0
  495. mindspore/profiler/platform/gpu_profiler.py +74 -0
  496. mindspore/profiler/platform/npu_profiler.py +335 -0
  497. mindspore/profiler/profiler.py +1073 -90
  498. mindspore/profiler/profiler_action_controller.py +187 -0
  499. mindspore/profiler/profiler_interface.py +118 -0
  500. mindspore/profiler/schedule.py +243 -0
  501. mindspore/rewrite/api/node.py +15 -13
  502. mindspore/rewrite/api/symbol_tree.py +2 -3
  503. mindspore/run_check/_check_version.py +27 -20
  504. mindspore/run_check/run_check.py +1 -1
  505. mindspore/runtime/__init__.py +37 -0
  506. mindspore/runtime/device.py +27 -0
  507. mindspore/runtime/event.py +209 -0
  508. mindspore/runtime/executor.py +177 -0
  509. mindspore/runtime/memory.py +416 -0
  510. mindspore/runtime/stream.py +460 -0
  511. mindspore/runtime/thread_bind_core.py +401 -0
  512. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  513. mindspore/swresample-4.dll +0 -0
  514. mindspore/swscale-6.dll +0 -0
  515. mindspore/tinyxml2.dll +0 -0
  516. mindspore/train/__init__.py +8 -8
  517. mindspore/train/_utils.py +96 -27
  518. mindspore/train/amp.py +9 -5
  519. mindspore/train/callback/__init__.py +2 -2
  520. mindspore/train/callback/_callback.py +2 -16
  521. mindspore/train/callback/_checkpoint.py +53 -55
  522. mindspore/train/callback/_cluster_monitor.py +14 -18
  523. mindspore/train/callback/_early_stop.py +1 -1
  524. mindspore/train/callback/_flops_collector.py +103 -68
  525. mindspore/train/callback/_history.py +8 -5
  526. mindspore/train/callback/_lambda_callback.py +2 -2
  527. mindspore/train/callback/_landscape.py +0 -3
  528. mindspore/train/callback/_loss_monitor.py +2 -1
  529. mindspore/train/callback/_on_request_exit.py +6 -5
  530. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  531. mindspore/train/callback/_summary_collector.py +52 -19
  532. mindspore/train/callback/_time_monitor.py +2 -1
  533. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +228 -108
  534. mindspore/train/data_sink.py +25 -2
  535. mindspore/train/dataset_helper.py +15 -16
  536. mindspore/train/loss_scale_manager.py +8 -7
  537. mindspore/train/metrics/accuracy.py +3 -3
  538. mindspore/train/metrics/confusion_matrix.py +9 -9
  539. mindspore/train/metrics/error.py +3 -3
  540. mindspore/train/metrics/hausdorff_distance.py +4 -4
  541. mindspore/train/metrics/mean_surface_distance.py +3 -3
  542. mindspore/train/metrics/metric.py +0 -12
  543. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  544. mindspore/train/metrics/precision.py +11 -10
  545. mindspore/train/metrics/recall.py +9 -9
  546. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  547. mindspore/train/mind_ir_pb2.py +174 -46
  548. mindspore/train/model.py +269 -136
  549. mindspore/train/serialization.py +622 -978
  550. mindspore/train/summary/_summary_adapter.py +2 -2
  551. mindspore/train/summary/summary_record.py +2 -3
  552. mindspore/train/train_thor/model_thor.py +1 -1
  553. mindspore/turbojpeg.dll +0 -0
  554. mindspore/utils/__init__.py +6 -3
  555. mindspore/utils/dryrun.py +140 -0
  556. mindspore/utils/hooks.py +81 -0
  557. mindspore/utils/runtime_execution_order_check.py +552 -0
  558. mindspore/utils/utils.py +138 -4
  559. mindspore/version.py +1 -1
  560. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/METADATA +3 -3
  561. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/RECORD +564 -395
  562. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/entry_points.txt +1 -1
  563. mindspore/_install_custom.py +0 -43
  564. mindspore/common/_register_for_adapter.py +0 -74
  565. mindspore/common/_tensor_overload.py +0 -139
  566. mindspore/mindspore_np_dtype.dll +0 -0
  567. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  568. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  569. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  570. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  571. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  572. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  573. mindspore/ops_generate/gen_utils.py +0 -209
  574. mindspore/ops_generate/op_proto.py +0 -145
  575. mindspore/ops_generate/template.py +0 -261
  576. mindspore/profiler/envprofiling.py +0 -254
  577. mindspore/profiler/profiling.py +0 -1926
  578. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/WHEEL +0 -0
  579. {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -1,1926 +0,0 @@
1
- # Copyright 2020-2023 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """Profiling api file."""
16
- import os
17
- import stat
18
- import time
19
- import json
20
- from json import JSONDecodeError
21
- import glob
22
- import socket
23
- import multiprocessing
24
- from enum import Enum
25
- from typing import List
26
- from sys import getsizeof
27
- import numpy as np
28
-
29
- from mindspore import log as logger, context
30
- from mindspore.context import get_auto_parallel_context
31
- from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
32
- import mindspore._c_expression as c_expression
33
- import mindspore._c_dataengine as cde
34
- from mindspore._c_expression import _framework_profiler_enable_mi, _framework_profiler_disable_mi
35
- from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
36
- ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
37
- from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
38
- from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
39
- from mindspore.profiler.common.util import get_file_path, ProfilerPathManager
40
- from mindspore.profiler.common.process_pool import MultiProcessPool
41
- from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
42
- from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
43
- from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
44
- from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
45
- from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
46
- from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
47
- from mindspore.profiler.parser.minddata_parser import MinddataParser
48
- from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
49
- from mindspore.profiler.parser.minddata_pipeline_parser import \
50
- MinddataPipelineParser
51
- from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
52
- from mindspore.profiler.parser.profiler_info import ProfilerInfo
53
- from mindspore.common.api import _pynative_executor
54
- from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
55
- from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
56
- from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
57
- from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
58
- from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
59
- from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
60
- from mindspore.profiler.parser.ascend_cluster_generator import AscendClusterGenerator
61
- from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
62
- from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
63
- from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
64
- from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
65
- from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
66
- from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
67
- from mindspore.profiler.parser.ascend_analysis.constant import Constant
68
- from mindspore.profiler.common.util import timeit
69
-
70
-
71
- INIT_OP_NAME = 'Default/InitDataSetQueue'
72
-
73
- AICORE_METRICS_DICT = {
74
- 0: "ArithmeticUtilization",
75
- 1: "PipeUtilization",
76
- 2: "Memory",
77
- 3: "MemoryL0",
78
- 4: "ResourceConflictRatio",
79
- 5: "MemoryUB",
80
- 6: "L2Cache",
81
- -1: "None"
82
- }
83
-
84
-
85
- class ModelTraingMode(Enum):
86
- PYNATIVE = 0
87
- GRAPH = 1
88
- KERNEL_BY_KERNEL = 2
89
- UNKNOWN = 3
90
-
91
-
92
- class ProfilerLevel(Enum):
93
- Level0 = "Level0"
94
- Level1 = "Level1"
95
- Level2 = "Level2"
96
-
97
-
98
- class DeviceSupportParam(Enum):
99
- """The device target enum."""
100
- CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
101
- GPU = [
102
- 'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'sync_enable', 'op_time',
103
- 'profile_framework'
104
- ]
105
- ASCEND = [
106
- 'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
107
- 'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
108
- 'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
109
- ]
110
-
111
-
112
- ALWAYS_VALID_PARAM = [
113
- 'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
114
- 'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
115
- ]
116
-
117
- ANALYSIS_ASYNC_MODE = 'async'
118
- ANALYSIS_SYNC_MODE = 'sync'
119
- DEFAULT_MODEL_ID = 4294967295
120
-
121
-
122
- def _environment_check():
123
- if c_expression.security.enable_security():
124
- raise RuntimeError("Profiler is not supported when MindSpore is compiled with \'-s on\'.")
125
-
126
-
127
- class ExecutionCalculator:
128
- """Calculate the average execution time and counts for each stage."""
129
-
130
- def __init__(self, event, stage, custom_info):
131
- self.event = event
132
- self.stage = stage
133
- self.custom_info = custom_info
134
- self.count = 0
135
- self.average_execution = 0
136
-
137
-
138
- def _calculate_dataset_item(row, execution_time_map, ts_map):
139
- """Calculate dataset execution time for one row."""
140
- start_end = row['start_end']
141
- event = row['event']
142
- stage = row['stage']
143
- custom_info = row['custom_info']
144
- event_stage_tid_pid = event + '_' + stage + '_' + row['tid'] + '_' + row['pid']
145
- if start_end == '1' and event_stage_tid_pid in ts_map:
146
- title = event + '::' + stage + '::' + custom_info
147
- ts_end = int(row['time_stamp(us)'])
148
- ts = ts_map[event_stage_tid_pid]
149
- dur = ts_end - ts
150
- if title not in execution_time_map:
151
- execution_time_map[title] = ExecutionCalculator(event=event, stage=stage, custom_info=custom_info)
152
- execution_time_map[title].count += 1
153
- if execution_time_map[title].count != 0:
154
- execution_time_map[title].average_execution += \
155
- (dur - execution_time_map[title].average_execution) / execution_time_map[title].count
156
- del ts_map[event_stage_tid_pid]
157
- elif start_end == '0':
158
- ts = int(row['time_stamp(us)'])
159
- ts_map[event_stage_tid_pid] = ts
160
- elif start_end == '2':
161
- logger.info("It is a instant event, skip to calculate execution time. item: %s.", row)
162
- else:
163
- logger.warning("Can not map the start time for item: %s.", row)
164
-
165
-
166
- def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
167
- """Executing the msprof export mode."""
168
- try:
169
- ProfilerInfo.set_export_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
170
- msprof_exporter = AscendMsprofExporter(mindstudio_profiler_output)
171
- flag = msprof_exporter.export(model_iteration_dict)
172
- ProfilerInfo.set_export_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
173
- return flag
174
- except (ProfilerException, TimeoutError, FileNotFoundError, RuntimeError) as err:
175
- logger.warning(str(err))
176
- return False
177
-
178
-
179
- def _ascend_graph_msprof_analyse(mindstudio_profiler_output):
180
- """
181
- Ascend graph model msprof data analyse.
182
-
183
- Returns:
184
- list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace, df_step_trace_model
185
- """
186
- res = ([], [], [], [])
187
- try:
188
- msprof_analyser = AscendMsprofDataGenerator(mindstudio_profiler_output)
189
- res = msprof_analyser.parse()
190
- return res
191
- except ProfilerException as err:
192
- logger.warning(err.message)
193
- finally:
194
- pass
195
- return res
196
-
197
-
198
- class Profiler:
199
- r"""
200
- This class to enable the profiling of MindSpore neural networks.
201
- MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
202
- and use Profiler.analyse() to stop profiling and analyse the results.
203
- Users can visualize the results using the `MindSpore Insight
204
- <https://www.mindspore.cn/mindinsight/docs/en/master/index.html>`_ tool.
205
- Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
206
- correspondence, cluster, etc data analysis.
207
-
208
- Args:
209
- output_path (str, optional): Output data path. Default: ``"./data"`` .
210
- profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
211
-
212
- - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
213
- time of the computational operators on the NPU and communication large operator information.
214
- - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
215
- communication mini operator information based on Level0.
216
- - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
217
-
218
- op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
219
- profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
220
- a multi devices training,collect when True. Setting this parameter has no effect during single card
221
- training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
222
- profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
223
- When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
224
- compilation level is O2 requires collecting from the first step. Default: ``False`` .
225
- parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
226
- Default value: ``False`` .
227
- start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
228
- data collection based on conditions. Default: ``True`` .
229
- aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
230
- parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5, 6],
231
- Default: ``0`` , the data items contained in each metric are as follows:
232
-
233
- - -1: Does not collect AICORE data.
234
- - 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
235
- - 1: PipeUtilization contains vec_ratio, mac_ratio, scalar_ratio, mte1/mte2/mte3_ratio, icache_miss_rate
236
- etc.
237
- - 2: Memory contains ub_read/write_bw, l1_read/write_bw, l2_read/write_bw, main_mem_read/write_bw etc.
238
- - 3: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
239
- - 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
240
- - 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
241
- - 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
242
- This function only support Atlas A2 training series products.
243
-
244
- l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
245
- Default: ``False`` .
246
- hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
247
- collect when True. Default: ``False`` .
248
- pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
249
- Default: ``False`` .
250
- sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
251
- Default: ``True`` .
252
-
253
- - True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
254
- Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
255
- The duration of the operator is the difference between the two timestamps.
256
- - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
257
- This method can reduce the impact of adding profiler on overall training time.
258
- data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
259
- Default value: ``False`` .
260
- timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
261
- When using this parameter, `op_time` must be set to True. Default value: ``500`` .
262
- profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
263
- ["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
264
- parameter, the op_time parameter must be enabled.
265
- Default: None.
266
-
267
- - "all": Record host timestamp.
268
- - "time": The same as "all".
269
- - None: Not record host information.
270
- data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
271
- If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
272
- directory are retained to save disk space.
273
- Default value: ``True`` .
274
- with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
275
- data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
276
- profile_framework parameters must be enabled. Default value: ``False`` .
277
- analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
278
- data. This parameter is experimental parameter and does not need to be set by the user.
279
- Default value: ``False`` .
280
- rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
281
- experimental parameter and does not need to be set by the user. Default value: ``0`` .
282
- env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
283
- This parameter is experimental parameter and does not need to be set by the user.
284
- Default value: ``False`` .
285
- Raises:
286
- RuntimeError: When the version of CANN does not match the version of MindSpore,
287
- MindSpore cannot parse the generated ascend_job_id directory structure.
288
-
289
- Supported Platforms:
290
- ``Ascend`` ``GPU``
291
-
292
- Examples:
293
- >>> import numpy as np
294
- >>> import mindspore as ms
295
- >>> from mindspore import nn
296
- >>> import mindspore.dataset as ds
297
- >>> from mindspore import Profiler
298
- >>> from mindspore.profiler import ProfilerLevel
299
- >>>
300
- >>> class Net(nn.Cell):
301
- ... def __init__(self):
302
- ... super(Net, self).__init__()
303
- ... self.fc = nn.Dense(2,2)
304
- ... def construct(self, x):
305
- ... return self.fc(x)
306
- >>>
307
- >>> def generator():
308
- ... for i in range(2):
309
- ... yield (np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32))
310
- >>>
311
- >>> def train(net):
312
- ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
313
- ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
314
- ... data = ds.GeneratorDataset(generator, ["data", "label"])
315
- ... model = ms.train.Model(net, loss, optimizer)
316
- ... model.train(1, data)
317
- >>>
318
- >>> if __name__ == '__main__':
319
- ... # If the device_target is GPU, set the device_target to "GPU"
320
- ... ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
321
- ...
322
- ... # Init Profiler
323
- ... # Note that the Profiler should be initialized before model.train
324
- ... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
325
- ...
326
- ... # Train Model
327
- ... net = Net()
328
- ... train(net)
329
- ...
330
- ... # Profiler end
331
- ... profiler.analyse()
332
- """
333
- _has_initialized = False
334
- _ascend_profiling_options = ""
335
- _ascend_job_id = ""
336
- ENABLE_STATUS = "on"
337
- DISABLE_STATUS = "off"
338
-
339
- def __init__(self, **kwargs):
340
- if os.getenv("PROFILING_MODE"):
341
- raise RuntimeError("Profiling is already enabled by PROFILING_MODE env.")
342
-
343
- self._dev_id = None
344
- self._cpu_profiler = None
345
- self._gpu_profiler = None
346
- self._md_profiler = None
347
- self._is_heterogeneous = False
348
- self._profiler_manager = None
349
- self._timeline_meta = []
350
- self._init_time = None
351
- self._ascend_job_id = ''
352
- self._job_id_env = None
353
- self._filt_optype_names = ''
354
- self._output_path = ''
355
- self._rank_size = 1
356
- self._rank_id = 0
357
- self._ascend_profiler = None
358
- self.metadata = {}
359
- self.max_str_len = 4096
360
- self.max_meta_size = 50 * 1024
361
- self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
362
- self._parallel_strategy = True
363
- self._model_iteration_dict = None
364
- self._analyse_mode = ANALYSIS_SYNC_MODE
365
- _environment_check()
366
- # default aicore_metrics type is ArithmeticUtilization
367
- self._aicore_metrics_id = 0
368
- self._l2_cache = self.DISABLE_STATUS
369
- self._hbm_ddr = self.DISABLE_STATUS
370
- self._pcie = self.DISABLE_STATUS
371
- self._data_process = True
372
- self._op_time = True
373
- self._profile_communication = False
374
- self._has_started = False
375
- self._has_started_twice = False
376
- self.start_profile = True
377
- self._profile_memory = False
378
- self._sync_enable = True
379
- self._stop_time = 0
380
- self._dynamic_status = False
381
- self._profile_framework = None
382
- self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
383
- self.profiler_level = None
384
- self._pretty_json = False
385
- self._analyse_only = kwargs.get("analyse_only", False)
386
- self._data_simplification = kwargs.get("data_simplification", True)
387
- self._with_stack = False
388
- if self._msprof_enable:
389
- return
390
- self._start_time = int(time.time() * 1e6) # us
391
- self._monotonic_time = int(time.monotonic() * 1e6) # us
392
- logger.info("Profiling: start time: %d", self._start_time)
393
- if kwargs.get("env_enable"):
394
- self._profiler_init(kwargs)
395
- return
396
- Profiler._has_initialized = True
397
- # get device_id and device_target
398
- if self._analyse_only:
399
- self._device_target = DeviceTarget.ASCEND.value
400
- self._rank_id = kwargs.get("rank_id", 0)
401
- else:
402
- self._get_devid_rankid_and_devtarget()
403
- self._parser_kwargs(kwargs)
404
- self._get_output_path(kwargs)
405
- self._decide_device_target(kwargs)
406
- if self.start_profile:
407
- self.start()
408
-
409
- @staticmethod
410
- def _check_output_path(output_path):
411
- """Checking path validity."""
412
- try:
413
- output_path = validate_and_normalize_path(output_path)
414
- except RuntimeError as err:
415
- raise ProfilerPathErrorException(f'profiling data output path {output_path} is invalid.') from err
416
- finally:
417
- pass
418
- if not os.path.isdir(output_path):
419
- raise ProfilerDirNotFoundException(output_path)
420
- return output_path
421
-
422
- @staticmethod
423
- def _parse_job_start_time(prof_dir):
424
- """
425
- Get the start time of the job.
426
-
427
- Args:
428
- input_file (str): The file path of the host start log file.
429
-
430
- Returns:
431
- str, job start time.
432
- """
433
- try:
434
- AscendMsprofExporter.check_msprof_env()
435
- script_path = AscendMsprofExporter.get_msprof_info_path()
436
- if not script_path:
437
- logger.warning("Can`t find get_msprof_info.py path, use single-export mode instead.")
438
- return None
439
- logger.info("get_msprof_info.py path is : %s", script_path)
440
- host_dir = os.path.join(prof_dir, 'host')
441
- cmd = ['python', script_path, '-dir', host_dir]
442
- outs, _ = AscendMsprofExporter.run_cmd(cmd)
443
- if not outs:
444
- logger.warning('Can`t find the msprof info result')
445
- return None
446
- result = json.loads(outs)
447
- if result.get('status', 1) == 1:
448
- return None
449
- jor_start_time = result.get('data', {}).get('collection_info', {}).get('Collection start time', None)
450
- if jor_start_time is not None:
451
- return float(jor_start_time.strip())
452
- return None
453
- except (RuntimeError, JSONDecodeError, AttributeError, TimeoutError, FileNotFoundError) as err:
454
- logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
455
- return None
456
-
457
- @classmethod
458
- def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
459
- """
460
- Analyze training performance data offline, which is invoked after performance data collection is completed.
461
-
462
- Args:
463
- path (str): The profiling data path which need to be analyzed offline.
464
- There needs to be a profiler directory in this path.
465
- pretty (bool, optional): Whether to pretty json files. Default: ``False``.
466
- step_list (list, optional): A list of steps that need to be analyzed, the steps must be
467
- consecutive integers. Default: ``None``. By default, all steps will be analyzed.
468
- data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
469
-
470
- Examples:
471
- >>> from mindspore import Profiler
472
- >>> Profiler.offline_analyse("./profiling_path")
473
- """
474
- real_path = os.path.realpath(path)
475
- PathManager.check_input_directory_path(real_path)
476
- profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
477
- if not isinstance(data_simplification, bool):
478
- logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
479
- f"but got type {type(data_simplification)}, it will be set to True.")
480
- data_simplification = True
481
- if not profiler_parent_path_list:
482
- raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
483
- f'single-device profiler data, or multiple subdirectories each containing '
484
- f'a "profiler" directory for multi-device profiler data. ')
485
- # get rank id
486
- rank_list = []
487
- for parent_path in profiler_parent_path_list:
488
- profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
489
- rank_id = ProfilerInfo.get_rank_id(profiler_path)
490
- if int(rank_id) < 0:
491
- logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
492
- rank_list.append(rank_id)
493
- # start offline analyse
494
- if len(profiler_parent_path_list) == 1:
495
- PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
496
- profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
497
- profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
498
- else:
499
- # Multiprocess Parsing
500
- multiprocessing.set_start_method("fork", force=True)
501
- process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
502
- pool = multiprocessing.Pool(processes=process_number)
503
- for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
504
- PathManager.check_directory_path_writeable(profiler_parent_path)
505
- profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
506
- data_simplification=data_simplification)
507
- pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
508
- pool.close()
509
- pool.join()
510
-
511
- def op_analyse(self, op_name, device_id=None):
512
- """
513
- Profiler users can use this interface to obtain operator performance data.
514
-
515
- Args:
516
- op_name (str or list): The primitive operator name to query.
517
- device_id (int, optional): ID of the target device. This parameter is optional during network training or
518
- inference, and users can use device_id parameter to specify which card operator performance data to
519
- parse. If this interface is used for offline data parsing, Default: ``0`` .
520
-
521
- Raises:
522
- TypeError: If the `op_name` parameter type is incorrect.
523
- TypeError: If the `device_id` parameter type is incorrect.
524
- RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
525
-
526
- Supported Platforms:
527
- ``GPU`` ``CPU``
528
-
529
- Examples:
530
- >>> from mindspore import Profiler
531
- >>> from mindspore import nn
532
- >>> from mindspore import Model
533
- >>> # Profiler init.
534
- >>> profiler = Profiler()
535
- >>> # Train Model or eval Model, taking LeNet5 as an example.
536
- >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
537
- >>> net = LeNet5()
538
- >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
539
- >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
540
- >>> # Create the dataset taking MNIST as an example.
541
- >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
542
- >>> dataloader = create_dataset()
543
- >>> model = Model(net, loss, optimizer)
544
- >>> model.train(5, dataloader, dataset_sink_mode=False)
545
- >>>
546
- >>> # Profiler end
547
- >>> profiler.analyse()
548
- >>>
549
- >>> profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
550
- """
551
- if self._device_target == 'ascend':
552
- raise RuntimeError("The Interface 'Profiler.op_analyse()' is not supported on Ascend currently.")
553
- if device_id and not isinstance(device_id, int):
554
- raise TypeError(f"For 'Profiler.op_analyse()', the parameter device_id must be int, "
555
- f"but got type {type(device_id)}")
556
- online_device_id = int(self._dev_id)
557
- self._dev_id = self._dev_id if device_id is None else device_id
558
- if self._dev_id is None:
559
- self._dev_id = 0
560
- if not isinstance(op_name, str) and not isinstance(op_name, list):
561
- raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name must be str or list, "
562
- f"but got type {type(op_name)}")
563
- if not op_name:
564
- raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name cannot be "", '' or [].")
565
- parser = GpuFrameWorkParser(self._output_path, self._dev_id, op_name)
566
- op_info = parser.parse()
567
- if self._rank_size > 1:
568
- if online_device_id == int(self._dev_id):
569
- return op_info
570
- if online_device_id != int(self._dev_id):
571
- message = f"For 'Profiler.op_analyse()', the parameter device_id is equal to {self._dev_id}, but the " \
572
- f"current device id is {online_device_id}, so no operator performance information is queried."
573
- return message
574
- return op_info
575
-
576
- def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync"):
577
- """
578
- Collect and analyze training performance data, support calls during and after training. The example shows above.
579
-
580
- Args:
581
- offline_path (Union[str, None], optional): The data path which need to be analyzed with offline mode.
582
- Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
583
- for online mode. Default: ``None``.
584
- pretty (bool, optional): Whether to pretty json files. Default: ``False``.
585
- step_list (list, optional): A list of steps that need to be analyzed, the steps must be
586
- consecutive integers. Default: ``None``. By default, all steps will be analyzed.
587
- mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
588
-
589
- - sync: analyse data in current process, it will block the current process.
590
- - async: analyse data in subprocess, it will not block the current process. Since the parsing process
591
- will take up extra CPU resources, please enable this mode according to the actual resource situation.
592
-
593
- Examples:
594
- >>> from mindspore.train import Callback
595
- >>> from mindspore import Profiler
596
- >>> class StopAtStep(Callback):
597
- ... def __init__(self, start_step=1, stop_step=5):
598
- ... super(StopAtStep, self).__init__()
599
- ... self.start_step = start_step
600
- ... self.stop_step = stop_step
601
- ... self.profiler = Profiler(start_profile=False)
602
- ...
603
- ... def step_begin(self, run_context):
604
- ... cb_params = run_context.original_args()
605
- ... step_num = cb_params.cur_step_num
606
- ... if step_num == self.start_step:
607
- ... self.profiler.start()
608
- ...
609
- ... def step_end(self, run_context):
610
- ... cb_params = run_context.original_args()
611
- ... step_num = cb_params.cur_step_num
612
- ... if step_num == self.stop_step:
613
- ... self.profiler.stop()
614
- ...
615
- ... def end(self, run_context):
616
- ... self.profiler.analyse(step_list=[2,3,4], mode="sync")
617
- """
618
- try:
619
- if isinstance(pretty, bool):
620
- self._pretty_json = pretty
621
- if mode not in [ANALYSIS_SYNC_MODE, ANALYSIS_ASYNC_MODE]:
622
- logger.warning("For analyse, the parameter mode must be one of ['sync', 'async'], "
623
- "it will be set to 'sync'.")
624
- mode = ANALYSIS_SYNC_MODE
625
- model_iteration_dict = {}
626
- if step_list is not None and not isinstance(step_list, list):
627
- raise ProfilerParamTypeErrorException("Parameter step_list must be a list.")
628
- if step_list:
629
- if not all(isinstance(step_id, int) for step_id in step_list):
630
- raise ProfilerParamTypeErrorException("The elements of the parameter step_list must be integers.")
631
- step_list.sort()
632
- if step_list[-1] - step_list[0] != len(step_list) - 1:
633
- err_msg = "The elements of the parameter step_list must be continuous integers."
634
- raise ProfilerParamTypeErrorException(err_msg)
635
- model_iteration_dict[DEFAULT_MODEL_ID] = step_list
636
- if offline_path is not None and not isinstance(offline_path, str):
637
- raise ProfilerParamTypeErrorException("For analyse, the type of parameter offline_path must be str.")
638
- self._analyse(offline_path=offline_path, model_iteration_dict=model_iteration_dict, mode=mode)
639
- except (ProfilerException, RuntimeError, OSError, TypeError, NameError) as err:
640
- logger.error("Profiler analyse failed: %s", str(err))
641
-
642
- def _analyse(self, offline_path=None, model_iteration_dict=None, mode=ANALYSIS_SYNC_MODE):
643
- """
644
- Collect and analyze training performance data, support calls during and after training. The example shows above.
645
-
646
- Args:
647
- offline_path (Union[str, None], optional): The data path which need to be analysed with offline mode.
648
- Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
649
- for online mode. Default: ``None``.
650
- model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
651
- mode (str, optional): Analysis mode. Whether to analyse data in subprocess. Default: ``sync``.
652
- By default, analyse data in current process.
653
- """
654
- self._model_iteration_dict = model_iteration_dict
655
- self._init_profiler_info()
656
- self._is_support_step_info_collect()
657
- self._analyse_mode = mode
658
- parallel_mode = get_auto_parallel_context("parallel_mode")
659
- stage_num = get_auto_parallel_context("pipeline_stages")
660
-
661
- ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
662
- if offline_path:
663
- # Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
664
- ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
665
- ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
666
- self._ascend_graph_analyse(offline_path=offline_path)
667
- ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
668
- ProfilerInfo.save(self._output_path)
669
- return
670
- if self._msprof_enable:
671
- return
672
-
673
- # Stop data collection after all operators are executed.
674
- _pynative_executor.sync()
675
-
676
- Profiler._has_initialized = False
677
- self._dynamic_status = self._profiler_manager.dynamic_status()
678
- _environment_check()
679
-
680
- cpu_op_file = glob.glob(os.path.join(self._output_path, 'cpu_op_type_info_*'))
681
- if self._device_target and self._device_target != DeviceTarget.CPU.value and cpu_op_file:
682
- self._is_heterogeneous = True
683
-
684
- ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
685
- ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
686
- if self._device_target and self._device_target == DeviceTarget.CPU.value:
687
- self._cpu_analyse()
688
- if self._profile_framework:
689
- logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
690
- "data.")
691
-
692
- if self._device_target and self._device_target == DeviceTarget.GPU.value:
693
- self._gpu_analyse()
694
-
695
- elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
696
- self._ascend_analyse()
697
-
698
- logger.info("Profiling: all the data have been analyzed.")
699
- ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
700
- ProfilerInfo.save(self._output_path)
701
-
702
- def start(self):
703
- """
704
- Used for Ascend, GPU, start profiling. Profiling can be turned on based on step and epoch.
705
-
706
- Raises:
707
- RuntimeError: If the profiler has already started.
708
- RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
709
-
710
- Examples:
711
- >>> from mindspore.train import Callback
712
- >>> from mindspore import Profiler
713
- >>> class StopAtStep(Callback):
714
- ... def __init__(self, start_step, stop_step):
715
- ... super(StopAtStep, self).__init__()
716
- ... self.start_step = start_step
717
- ... self.stop_step = stop_step
718
- ... self.profiler = Profiler(start_profile=False)
719
- ...
720
- ... def step_begin(self, run_context):
721
- ... cb_params = run_context.original_args()
722
- ... step_num = cb_params.cur_step_num
723
- ... if step_num == self.start_step:
724
- ... self.profiler.start()
725
- ...
726
- ... def step_end(self, run_context):
727
- ... cb_params = run_context.original_args()
728
- ... step_num = cb_params.cur_step_num
729
- ... if step_num == self.stop_step:
730
- ... self.profiler.stop()
731
- ...
732
- ... def end(self, run_context):
733
- ... self.profiler.analyse()
734
- """
735
- if self._msprof_enable:
736
- return
737
-
738
- if not self._has_started:
739
- if not self._has_started_twice:
740
- self._has_started = True
741
- else:
742
- raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
743
-
744
- self._cpu_profiler.step_profiling_enable(True)
745
- if self._op_time:
746
- self._cpu_profiler.enable_op_time()
747
- if self._profile_memory:
748
- self._cpu_profiler.enable_profile_memory()
749
-
750
- if self._device_target and self._device_target == DeviceTarget.GPU.value:
751
- if self._data_process:
752
- self._md_profiler.start()
753
- self._gpu_profiler.data_process_enable(True)
754
- if self._profile_framework or self._op_time:
755
- self._gpu_profiler.step_profiling_enable(True)
756
- if self._op_time:
757
- self._gpu_profiler.enable_op_time()
758
- elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
759
- if self._data_process:
760
- self._md_profiler.start()
761
- self._ascend_graph_start()
762
- ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
763
- ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
764
- ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
765
- if context.get_context("mode") == context.GRAPH_MODE:
766
- jit_config = context.get_jit_config()
767
- jit_level = jit_config.get("jit_level", "")
768
- ProfilerInfo.set_jit_level(jit_level)
769
- if self._profile_framework:
770
- _framework_profiler_enable_mi()
771
-
772
- def stop(self):
773
- """
774
- Used for Ascend, GPU, stop profiling. Profiling can be turned off based on step and epoch.
775
-
776
- Raises:
777
- RuntimeError: If the profiler has not started, this function is disabled.
778
-
779
- Examples:
780
- >>> from mindspore.train import Callback
781
- >>> from mindspore import Profiler
782
- >>> class StopAtEpoch(Callback):
783
- ... def __init__(self, start_epoch, stop_epoch):
784
- ... super(StopAtEpoch, self).__init__()
785
- ... self.start_epoch = start_epoch
786
- ... self.stop_epoch = stop_epoch
787
- ... self.profiler = Profiler(start_profile=False)
788
- ...
789
- ... def epoch_begin(self, run_context):
790
- ... cb_params = run_context.original_args()
791
- ... epoch_num = cb_params.cur_epoch_num
792
- ... if epoch_num == self.start_epoch:
793
- ... self.profiler.start()
794
- ...
795
- ... def epoch_end(self, run_context):
796
- ... cb_params = run_context.original_args()
797
- ... epoch_num = cb_params.cur_epoch_num
798
- ... if epoch_num == self.stop_epoch:
799
- ... self.profiler.stop()
800
- ...
801
- ... def end(self, run_context):
802
- ... self.profiler.analyse()
803
- """
804
- if self._msprof_enable:
805
- return
806
-
807
- if self._has_started:
808
- self._has_started = False
809
- else:
810
- raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
811
- "before calling the stop() method.")
812
-
813
- # Stop data collection after all operators are executed.
814
- _pynative_executor.sync()
815
-
816
- self._cpu_profiler.stop()
817
- if self._data_process and self._md_profiler is not None:
818
- self._md_profiler.stop()
819
- self._md_profiler.save(self._output_path)
820
-
821
- if self._device_target and self._device_target == DeviceTarget.GPU.value:
822
- self._gpu_profiler.stop()
823
- elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
824
- self._ascend_profiler.stop()
825
-
826
- self._stop_time = int(time.time() * 10000000)
827
-
828
- if self._profile_framework:
829
- _framework_profiler_disable_mi()
830
-
831
- ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
832
- self._init_profiler_info()
833
- ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
834
- ProfilerInfo.save(self._output_path)
835
- self._dump_metadata()
836
- logger.info("Profiling: stop time: %d", self._stop_time)
837
-
838
- def add_metadata(self, key: str, value: str):
839
- """
840
- Report custom metadata key-value pair data.
841
-
842
- Args:
843
- key (str): The key to the metadata.
844
- value (str): The value to the metadata.
845
-
846
- Examples:
847
- >>> from mindspore import Profiler
848
- >>> # Profiler init.
849
- >>> profiler = Profiler()
850
- >>> # Call Profiler add_metadata
851
- >>> profiler.add_metadata("test_key", "test_value")
852
- >>> # Profiler end
853
- >>> profiler.analyse()
854
- """
855
- if not isinstance(key, str) or not isinstance(value, str):
856
- logger.warning("The key and value of metadata must be string. Skip this metadata.")
857
- return
858
- if not self._check_str_valid(key) or not self._check_str_valid(value):
859
- logger.warning("Invalid input key or value. Skip this metadata.")
860
- return
861
- add_size = getsizeof(key) + getsizeof(value)
862
- if getsizeof(self.metadata) + add_size < self.max_meta_size:
863
- if key in self.metadata:
864
- logger.warning(f"{key} is already saved as metadata, override it.")
865
- self.metadata[key] = value
866
- else:
867
- logger.warning("Too many metadata added. Skip this metadata")
868
-
869
- def add_metadata_json(self, key: str, value: str):
870
- """
871
- Report custom metadata key-value pair data with the value as a JSON string data.
872
-
873
- Args:
874
- key (str): The key to the metadata.
875
- value (str): The json str format value to the metadata.
876
-
877
- Examples:
878
- >>> import json
879
- >>> from mindspore import Profiler
880
- >>> # Profiler init.
881
- >>> profiler = Profiler()
882
- >>> # Call Profiler add_metadata_json
883
- >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
884
- >>> # Profiler end, metadata will be saved in profiler_metadata.json
885
- >>> profiler.analyse()
886
- """
887
- if not isinstance(key, str) or not isinstance(value, str):
888
- logger.warning("The key and value of metadata must be string. Skip this metadata.")
889
- return
890
- if not self._check_str_valid(key) or not self._check_str_valid(value):
891
- logger.warning("Invalid input key or value. Skip this metadata.")
892
- return
893
- add_size = getsizeof(key) + getsizeof(value)
894
- if getsizeof(self.metadata) + add_size < self.max_meta_size:
895
- try:
896
- if key in self.metadata:
897
- logger.warning(f"{key} is already saved as metadata, override it.")
898
- self.metadata[key] = json.loads(value)
899
- except ValueError:
900
- logger.warning("The metadata value must be json format string. Skip this metadata")
901
- else:
902
- logger.warning("Too many metadata added. Skip this metadata")
903
-
904
- def _dump_metadata(self):
905
- """Dump metadata to file."""
906
- if not self.metadata:
907
- return
908
- FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
909
- self.metadata.clear()
910
-
911
- def _check_str_valid(self, input_str: str):
912
- """Check str length"""
913
- if len(input_str) > self.max_str_len:
914
- return False
915
- return True
916
-
917
- def _set_ascend_job_id(self, ascend_job_id):
918
- """Set output_path for offline parsing performance data."""
919
- if not ascend_job_id:
920
- return
921
- self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
922
- if not os.path.exists(self._ascend_job_id):
923
- msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
924
- logger.critical(msg)
925
- raise ValueError(msg)
926
- self._output_path, _ = os.path.split(self._ascend_job_id)
927
-
928
- def _profiler_init(self, kwargs):
929
- """Initialize variables when profiler is enabled by environment variables."""
930
- options = kwargs.get("env_enable")
931
- self._has_started = True
932
- self._start_time = options.get("start_time")
933
- self._output_path = options.get('file_output_path')
934
- self._profile_memory = options.get('profile_memory')
935
- self._parallel_strategy = options.get('parallel_strategy')
936
- self._timeline_size_limit_byte = options.get('timeline_limit') * 1024 * 1024
937
- self._data_process = options.get('data_process')
938
- self._profile_communication = options.get('profile_communication')
939
- self._op_time = options.get('op_time')
940
- self._device_target = context.get_context("device_target").lower()
941
- self._profile_framework = options.get('profile_framework', None)
942
- self._profiler_manager = c_expression.ProfilerManager.get_instance()
943
- self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
944
- if self._data_process:
945
- self._md_profiler = cde.GlobalContext.profiling_manager()
946
- if self._device_target == DeviceTarget.GPU.value:
947
- self._gpu_profiler = c_expression.Profiler.get_instance("GPU")
948
-
949
- if self._device_target == DeviceTarget.ASCEND.value:
950
- self._ascend_profiler = c_expression.Profiler.get_instance("Ascend")
951
- self._get_devid_rankid_and_devtarget()
952
-
953
- def _init_profiler_info(self):
954
- """Init profiler info filer."""
955
- mode = "graph"
956
- if context.get_context("mode") == context.PYNATIVE_MODE:
957
- mode = "pynative"
958
- store_id = self._dev_id if self._device_target == DeviceTarget.GPU.value else self._rank_id
959
- ProfilerInfo.init_info(mode, store_id)
960
-
961
- def _decide_device_target(self, kwargs):
962
- """Complete Profiler initialization according to device_target"""
963
- profiler_manager = c_expression.ProfilerManager
964
- self._profiler_manager = profiler_manager.get_instance()
965
- if self._profile_framework is None:
966
- self._profiler_manager.set_profile_framework("NULL")
967
- else:
968
- self._profiler_manager.set_profile_framework(self._profile_framework)
969
- if self._device_target:
970
- cpu_profiler = c_expression.Profiler
971
- self._cpu_profiler = cpu_profiler.get_instance("CPU")
972
- self._cpu_profiler.init(self._output_path)
973
-
974
- if self._device_target and self._device_target == DeviceTarget.CPU.value:
975
- self._cpu_profiler_init(kwargs)
976
-
977
- if self._device_target and self._device_target == DeviceTarget.GPU.value:
978
- self._gpu_profiler_init(kwargs)
979
-
980
- elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
981
- self._ascend_profiler_init(kwargs)
982
-
983
- def _cpu_profiler_init(self, kwargs):
984
- """Cpu profiler init."""
985
- self.start_profile = kwargs.pop("start_profile", True)
986
- if not isinstance(self.start_profile, bool):
987
- raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
988
- f"but got type {type(self.start_profile)}")
989
-
990
- def _gpu_profiler_init(self, kwargs):
991
- """Gpu profiler init."""
992
- self._parse_parameter_for_gpu(kwargs)
993
- # Setup and start MindData Profiling
994
- if self._data_process:
995
- self._md_profiler = cde.GlobalContext.profiling_manager()
996
- self._md_profiler.init()
997
-
998
- gpu_profiler = c_expression.Profiler
999
- self._gpu_profiler = gpu_profiler.get_instance("GPU")
1000
- if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
1001
- self._dev_id = str(get_rank())
1002
- os.environ['DEVICE_ID'] = self._dev_id
1003
- self._rank_id = self._dev_id
1004
- self._gpu_profiler.init(self._output_path, int(self._rank_id))
1005
- self._gpu_profiler.sync_enable(self._sync_enable)
1006
-
1007
- def _ascend_profiler_init(self, kwargs):
1008
- """Ascend profiler init."""
1009
- self._parse_parameter_for_ascend(kwargs)
1010
- # Setup and start MindData Profiling
1011
- if self._data_process:
1012
- self._md_profiler = cde.GlobalContext.profiling_manager()
1013
- self._md_profiler.init()
1014
- self._init_time = int(time.time() * 10000000)
1015
- logger.info("Profiling: profiling init time: %d", self._init_time)
1016
-
1017
- os.environ['DEVICE_ID'] = self._dev_id
1018
- self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
1019
- # Characters longer than 2048 are ignored, resulting in profiling option resolution errors
1020
- if len(self._ascend_profiling_options) > 2048:
1021
- msg = f"For '{self.__class__.__name__}', the environment parameter length exceeds " \
1022
- f"the limit (2048), please input valid parameters."
1023
- logger.critical(msg)
1024
- raise ValueError(msg)
1025
- # use context interface to open profiling, for the new mindspore version(after 2020.5.21)
1026
- self._ascend_profiler = c_expression.Profiler.get_instance("Ascend")
1027
- self._ascend_profiler.init(self._output_path, int(self._dev_id), self._ascend_profiling_options)
1028
- base_profiling_container_path = os.path.join(self._output_path, "container")
1029
- container_path = os.path.join(base_profiling_container_path, self._dev_id)
1030
- data_path = os.path.join(container_path, "data")
1031
- data_path = validate_and_normalize_path(data_path)
1032
- if not os.path.exists(data_path):
1033
- os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1034
-
1035
- def _construct_profiling_options(self):
1036
- """
1037
- Construct profiling options to determine which profiling data should be collected.
1038
- """
1039
- fp_point = os.environ.get("PROFILING_FP_START", "")
1040
- bp_point = os.environ.get("PROFILING_BP_END", "")
1041
-
1042
- profiling_options = {
1043
- "output": self._output_path,
1044
- "fp_point": fp_point,
1045
- "bp_point": bp_point,
1046
- "training_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1047
- "task_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1048
- "aic_metrics": AICORE_METRICS_DICT.get(self._aicore_metrics_id, "ArithmeticUtilization"),
1049
- "aicpu": self.ENABLE_STATUS if self._data_process or self._op_time else self.DISABLE_STATUS,
1050
- "profile_memory": self.ENABLE_STATUS if self._op_time and self._profile_memory else self.DISABLE_STATUS,
1051
- "hccl": self.ENABLE_STATUS if self._op_time and self._profile_communication else self.DISABLE_STATUS,
1052
- "l2_cache": self._l2_cache,
1053
- "hbm_ddr": self._hbm_ddr,
1054
- "pcie": self._pcie,
1055
- "parallel_strategy": self.ENABLE_STATUS if self._parallel_strategy else self.DISABLE_STATUS,
1056
- "op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
1057
- "profile_framework": self._profile_framework,
1058
- "profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
1059
- "with_stack": "on" if self._with_stack else "off"
1060
- }
1061
- ProfilerInfo.set_profiling_options(profiling_options)
1062
- return profiling_options
1063
-
1064
- def _parse_parameter_for_gpu(self, kwargs):
1065
- """Parse parameter in Profiler when the device target is GPU."""
1066
- self.start_profile = kwargs.pop("start_profile", True)
1067
- if not isinstance(self.start_profile, bool):
1068
- raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
1069
- f"but got type {type(self.start_profile)}")
1070
-
1071
- self._sync_enable = kwargs.pop("sync_enable", True)
1072
- if not isinstance(self._sync_enable, bool):
1073
- logger.warning("The parameter sync_enable is an invalid value, it will be set to True.")
1074
- self._sync_enable = True
1075
-
1076
- def _parse_parameter_for_ascend(self, kwargs):
1077
- """Parse parameter in Profiler when the device target is Ascend."""
1078
- ascend_job_id = kwargs.pop("ascend_job_id", "")
1079
- self._set_ascend_job_id(ascend_job_id)
1080
- self.start_profile = kwargs.pop("start_profile", True)
1081
- if not isinstance(self.start_profile, bool):
1082
- raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
1083
- f"but got type {type(self.start_profile)}")
1084
-
1085
- self._profile_communication = kwargs.pop("profile_communication", False)
1086
- if not isinstance(self._profile_communication, bool):
1087
- logger.warning(f"For '{self.__class__.__name__}', the parameter profile_communication must be bool, "
1088
- f"but got type {type(self._profile_communication)}, it will be set to False.")
1089
- self._profile_communication = False
1090
-
1091
- if self._profile_communication:
1092
- hccl_option = {"output": self._output_path, "task_trace": self.ENABLE_STATUS}
1093
- os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
1094
-
1095
- self._profile_memory = kwargs.pop("profile_memory", False)
1096
- if not isinstance(self._profile_memory, bool):
1097
- logger.warning(f"For '{self.__class__.__name__}', the parameter profile_memory must be bool, "
1098
- f"but got type {type(self._profile_memory)}, it will be set to False.")
1099
- self._profile_memory = False
1100
-
1101
- self._aicore_metrics_id = kwargs.pop("aicore_metrics", 0)
1102
- if not isinstance(self._aicore_metrics_id, int):
1103
- logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be int, "
1104
- f"but got type {type(self._aicore_metrics_id)}, it will be set to 0.")
1105
- self._aicore_metrics_id = 0
1106
-
1107
- if self._aicore_metrics_id not in AICORE_METRICS_DICT:
1108
- logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be in "
1109
- f"[-1, 0, 1, 2, 3, 4, 5, 6], but got {self._aicore_metrics_id}, it will be set to 0.")
1110
- self._aicore_metrics_id = 0
1111
-
1112
- l2_cache_enable = kwargs.pop("l2_cache", False)
1113
- if not isinstance(l2_cache_enable, bool):
1114
- logger.warning(f"For '{self.__class__.__name__}', the parameter l2_cache must be bool, "
1115
- f"but got type {type(l2_cache_enable)}, it will be set to False.")
1116
- l2_cache_enable = False
1117
- self._l2_cache = self.ENABLE_STATUS if l2_cache_enable else self.DISABLE_STATUS
1118
-
1119
- hbm_ddr_enable = kwargs.pop("hbm_ddr", False)
1120
- if not isinstance(hbm_ddr_enable, bool):
1121
- logger.warning(f"For '{self.__class__.__name__}', the parameter hbm_ddr must be bool, "
1122
- f"but got type {type(hbm_ddr_enable)}, it will be set to False.")
1123
- hbm_ddr_enable = False
1124
- self._hbm_ddr = self.ENABLE_STATUS if hbm_ddr_enable else self.DISABLE_STATUS
1125
-
1126
- pcie_enable = kwargs.pop("pcie", False)
1127
- if not isinstance(pcie_enable, bool):
1128
- logger.warning(f"For '{self.__class__.__name__}', the parameter pcie must be bool, "
1129
- f"but got type {type(pcie_enable)}, it will be set to False.")
1130
- pcie_enable = False
1131
- self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
1132
-
1133
- self._parallel_strategy = kwargs.pop("parallel_strategy", False)
1134
- if not isinstance(self._parallel_strategy, bool):
1135
- logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
1136
- f"but got type {type(self._parallel_strategy)}, it will be set to False.")
1137
- self._parallel_strategy = False
1138
-
1139
- self.profiler_level = kwargs.pop("profiler_level", None)
1140
- if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
1141
- logger.warning(f"For '{self.__class__.__name__}', the parameter profiler_level must be one of "
1142
- f"[ProfilerLevel.Level0, ProfilerLevel.Level1, ProfilerLevel.Level2], but got type "
1143
- f"{type(self.profiler_level)}, it will be set to ProfilerLevel.Level0.")
1144
- self.profiler_level = ProfilerLevel.Level0
1145
- elif self.profiler_level == ProfilerLevel.Level0:
1146
- self._data_process = False
1147
- self._aicore_metrics_id = -1
1148
- logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level0, data_process will be set "
1149
- f"to False and aicore_metrics set to -1.")
1150
- elif self.profiler_level == ProfilerLevel.Level1:
1151
- self._data_process = False
1152
- logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level1, data_process will be set "
1153
- f"to False.")
1154
-
1155
- def _ascend_analyse(self):
1156
- """Collect and analyse ascend performance data."""
1157
- self._rank_size = 1
1158
- if self._profile_communication and not GlobalComm.INITED:
1159
- self._profile_communication = False
1160
-
1161
- if GlobalComm.INITED:
1162
- self._rank_size = get_group_size()
1163
- else:
1164
- self._rank_size = int(os.getenv('RANK_SIZE', '1'))
1165
- ProfilerInfo.set_rank_size(self._rank_size)
1166
-
1167
- if self._has_started:
1168
- self.stop()
1169
- else:
1170
- logger.info("No need to stop profiler because profiler has been stopped.")
1171
- self._ascend_profiler.finalize()
1172
- # export op data before analyse
1173
- self._ascend_graph_analyse()
1174
-
1175
- def _minddata_analyse(self):
1176
- """Analyse mindadata for ascend graph model."""
1177
- if not self._data_process:
1178
- return
1179
- store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1180
-
1181
- # parse minddata pipeline operator and queue
1182
- try:
1183
- MinddataPipelineParser(self._output_path, store_id, self._output_path).parse()
1184
- except ProfilerException as err:
1185
- logger.warning(err.message)
1186
- finally:
1187
- pass
1188
-
1189
- # Analyze minddata information
1190
- logger.info("Profiling: analyzing the minddata information.")
1191
- try:
1192
- MinddataProfilingAnalyzer(self._output_path, store_id,
1193
- self._output_path, pretty=self._pretty_json).analyze()
1194
- except ProfilerException as err:
1195
- logger.warning(err.message)
1196
- finally:
1197
- pass
1198
-
1199
- def _minddata_aicpu_analyse(self, source_path, job_id):
1200
- """Analyse minddata aicpu after ascend."""
1201
- if not self._data_process:
1202
- return
1203
- store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1204
- # Parsing minddata AICPU profiling
1205
- if self._device_target == DeviceTarget.ASCEND.value:
1206
- logger.info("Profiling: analyzing the minddata AICPU data.")
1207
- MinddataParser.execute(source_path, self._output_path, job_id, store_id)
1208
-
1209
- def _ascend_fpbp_analyse(self, op_summary, steptrace):
1210
- """
1211
- Ascned graph model op analyse.
1212
-
1213
- Returns:
1214
- dict[obj]: points: the fp bp information
1215
- """
1216
- points = None
1217
- try:
1218
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1219
- step_trace_point_info_path = os.path.join(self._output_path, f'step_trace_point_info_{dev_id}.json')
1220
-
1221
- step_trace_point_info_path = validate_and_normalize_path(step_trace_point_info_path)
1222
-
1223
- fpbp_analyse = AscendFPBPGenerator(op_summary, steptrace, pretty=self._pretty_json)
1224
- points, _ = fpbp_analyse.parse()
1225
- fpbp_analyse.write(step_trace_point_info_path)
1226
- except ProfilerException as err:
1227
- logger.warning(err.message)
1228
- finally:
1229
- pass
1230
- return points
1231
-
1232
- def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status, launch_ops: List):
1233
- """
1234
- Ascend graph model hwts analyse.
1235
-
1236
- Returns:
1237
- list[obj]: The list is: framework_parser, aicpu_data_parser, optime_parser, op_task_dict
1238
- """
1239
- try:
1240
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1241
-
1242
- op_intermediate_detail_path = os.path.join(self._output_path,
1243
- f'aicore_intermediate_{dev_id}_detail.csv')
1244
- op_intermediate_type_path = os.path.join(self._output_path, f'aicore_intermediate_{dev_id}_type.csv')
1245
- aicpu_intermediate_detail_path = os.path.join(self._output_path, f'aicpu_intermediate_{dev_id}.csv')
1246
- framework_raw_path = os.path.join(self._output_path, f'framework_raw_{dev_id}.csv')
1247
-
1248
- op_intermediate_detail_path = validate_and_normalize_path(op_intermediate_detail_path)
1249
- op_intermediate_type_path = validate_and_normalize_path(op_intermediate_type_path)
1250
- aicpu_intermediate_detail_path = validate_and_normalize_path(aicpu_intermediate_detail_path)
1251
- framework_raw_path = validate_and_normalize_path(framework_raw_path)
1252
-
1253
- if context.get_context("mode") == context.GRAPH_MODE:
1254
- output_timeline_data_path = os.path.join(self._output_path, f'output_timeline_data_{dev_id}.txt')
1255
- output_timeline_data_path = validate_and_normalize_path(output_timeline_data_path)
1256
- else:
1257
- output_timeline_data_path = None
1258
-
1259
- op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status, launch_ops)
1260
- op_analyser.parse()
1261
- op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
1262
- aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
1263
- except (ProfilerException, RuntimeError) as err:
1264
- logger.warning(str(err))
1265
- finally:
1266
- pass
1267
-
1268
- def _ascend_step_trace_analyse(self, steptrace):
1269
- """Analyse step trace info."""
1270
- try:
1271
- if not self._dynamic_status:
1272
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1273
- step_trace_intermediate_path = os.path.join(self._output_path,
1274
- f'step_trace_raw_{dev_id}_detail_time.csv')
1275
-
1276
- step_trace_intermediate_path = validate_and_normalize_path(step_trace_intermediate_path)
1277
-
1278
- steptrace_analyser = AscendStepTraceGenerator(steptrace)
1279
- steptrace_analyser.parse()
1280
- steptrace_analyser.write(step_trace_intermediate_path)
1281
- except ProfilerException as err:
1282
- logger.warning(err.message)
1283
- finally:
1284
- pass
1285
-
1286
- def _ascend_timeline_analyse(self, op_summary, steptrace, source_path, mindstudio_profiler_output) -> List:
1287
- """Analyse timeline info."""
1288
- try:
1289
- logger.info("Profiling: analyzing the timeline data")
1290
- timeline_analyser = AscendTimelineGenerator(self._output_path, source_path, mindstudio_profiler_output,
1291
- self._rank_id, self._rank_size, context.get_context('mode'),
1292
- self._model_iteration_dict.get(DEFAULT_MODEL_ID))
1293
- timeline_analyser.parse_cluster_data(op_summary, steptrace)
1294
- timeline_analyser.parse_timeline_data(pretty=self._pretty_json)
1295
- timeline_analyser.write_timeline_display()
1296
- timeline_analyser.write_timeline_summary()
1297
- except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1298
- logger.warning('Fail to write timeline data: %s', err)
1299
- finally:
1300
- pass
1301
- return timeline_analyser.get_kernel_event_list()
1302
-
1303
- def _ascend_dynamic_net_analyse(self, op_summary):
1304
- """Analyse dynamic shape network info."""
1305
- if self._profile_communication:
1306
- logger.warning(
1307
- "The profile_communication parameter cannot be set on the dynamic shape network.")
1308
- if self._profile_memory:
1309
- logger.warning("The profile_memory parameter cannot be set on the dynamic shape network.")
1310
- logger.warning(
1311
- "[Profiler]Dynamic Shape network does not support collecting step trace performance data currently.")
1312
- dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id, pretty=self._pretty_json)
1313
- dynamic_parser.write_dynamic_shape_data(op_summary)
1314
-
1315
- def _ascend_flops_analyse(self, op_summary, launch_ops):
1316
- """Get op FLOPs from op_summary, write output_op_flops_x.csv."""
1317
- if 'vector_fops' not in op_summary.dtype.names and 'cube_fops' not in op_summary.dtype.names:
1318
- logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
1319
- return
1320
-
1321
- try:
1322
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1323
-
1324
- flops_path = os.path.join(self._output_path, f'flops_{dev_id}.txt')
1325
- flops_summary_path = os.path.join(self._output_path, f'flops_summary_{dev_id}.json')
1326
-
1327
- flops_path = validate_and_normalize_path(flops_path)
1328
- flops_summary_path = validate_and_normalize_path(flops_summary_path)
1329
-
1330
- flops_analyser = AscendFlopsGenerator(op_summary, launch_ops, pretty=self._pretty_json)
1331
- flops_analyser.parse()
1332
- flops_analyser.write(flops_path, flops_summary_path)
1333
-
1334
- except (ProfilerException, RuntimeError) as err:
1335
- logger.warning(str(err))
1336
- finally:
1337
- pass
1338
-
1339
- def _ascend_graph_memory_analyse(self):
1340
- """Analyse memory usage info."""
1341
- if not self._profile_memory:
1342
- return
1343
- if self._profile_memory and context.get_context("mode") == context.PYNATIVE_MODE:
1344
- logger.warning("[Profiler]The parameter profile_memory is not supported on Ascend "
1345
- "PyNative mode currently.")
1346
- try:
1347
- logger.info("Profiling: analyzing the memory usage info.")
1348
- self._analyse_memory_usage()
1349
- except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1350
- logger.warning(err.message)
1351
- finally:
1352
- pass
1353
-
1354
- def _ascend_ms_analyze(self, source_path):
1355
- """Ascend ms generate"""
1356
-
1357
- timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
1358
- if self._rank_id:
1359
- ascend_ms_path = f"rank-{self._rank_id}_{timestamp}_ascend_ms"
1360
- else:
1361
- ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{timestamp}_ascend_ms"
1362
- ascend_ms_path = os.path.join(self._output_path, ascend_ms_path)
1363
-
1364
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1365
- ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
1366
- PathManager.make_dir_safety(ascend_profiler_output_path)
1367
-
1368
- source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
1369
- target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
1370
- PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
1371
-
1372
- source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
1373
- target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
1374
- PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
1375
-
1376
- source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
1377
- target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
1378
- PathManager.copy_file(source_timeline_path, target_timeline_path)
1379
-
1380
- src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
1381
- dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
1382
- PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
1383
-
1384
- ms_output_path = os.path.realpath(
1385
- os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1386
- static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
1387
- src_static_op_mem_path = glob.glob(static_op_mem_path)
1388
- if src_static_op_mem_path:
1389
- dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
1390
- PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
1391
-
1392
- src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
1393
- src_op_statistics_path = glob.glob(src_op_statistics_path)
1394
- if src_op_statistics_path:
1395
- dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
1396
- PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
1397
-
1398
- self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
1399
- self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
1400
- AscendIntegrateGenerator(source_path, ascend_profiler_output_path).parse()
1401
- AscendMemoryGenerator(self._output_path, self._rank_id, source_path, ascend_profiler_output_path).parse()
1402
-
1403
- def _ascend_graph_cluster_analyse(self, source_path, ascend_profiler_output_path):
1404
- """Analyse step trace time info"""
1405
-
1406
- try:
1407
- logger.info("Profiling: analyzing the step trace time profiler info.")
1408
-
1409
- step_trace_time_path = os.path.join(ascend_profiler_output_path, f'step_trace_time.csv')
1410
- step_trace_time_path = validate_and_normalize_path(step_trace_time_path)
1411
-
1412
- cluster_analyse = AscendClusterGenerator(source_path)
1413
- cluster_analyse.parse()
1414
- cluster_analyse.write(step_trace_time_path)
1415
- except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1416
- logger.warning(err.message)
1417
- finally:
1418
- pass
1419
-
1420
- def _ascend_graph_communicate_analyse(self, source_path, ascend_profiler_output_path):
1421
- """Analyse communicate info"""
1422
- if not self._profile_communication:
1423
- return
1424
-
1425
- try:
1426
- logger.info("Profiling: analyzing the communicate and communicate_matrix profiler info.")
1427
-
1428
- communication_file_path = os.path.join(ascend_profiler_output_path, f'communication.json')
1429
- communication_file_path = validate_and_normalize_path(communication_file_path)
1430
-
1431
- communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
1432
- f"communication_matrix.json")
1433
- communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
1434
-
1435
- analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
1436
- communicate_analyser = AscendCommunicationGenerator(analyze_path)
1437
- communicate_analyser.parse()
1438
- communicate_analyser.write(communication_file_path, communication_matrix_file_path)
1439
- except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1440
- logger.warning(err.message)
1441
- finally:
1442
- pass
1443
-
1444
- def _ascend_graph_hccl_analyse(self, mindstudio_profiler_output, steptrace):
1445
- """Analyse hccl profiler info."""
1446
- if not self._profile_communication:
1447
- return
1448
- if self._profile_communication and context.get_context("mode") == context.PYNATIVE_MODE:
1449
- logger.warning("[Profiler]The parameter profile_communication is not supported on Ascend "
1450
- "PyNative mode currently.")
1451
- return
1452
- try:
1453
- logger.info("Profiling: analyzing the hccl profiler info.")
1454
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1455
-
1456
- hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
1457
- hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
1458
- hccl_analyse = AscendHCCLGenerator(mindstudio_profiler_output, steptrace)
1459
- hccl_analyse.parse()
1460
- hccl_analyse.write(hccl_raw_path)
1461
-
1462
- except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
1463
- logger.warning(err.message)
1464
- finally:
1465
- pass
1466
-
1467
- def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
1468
- """Get the mapping between framework operator and device kernel."""
1469
- if not kernels:
1470
- return []
1471
- kernel_map = {}
1472
- for kernel in kernels:
1473
- key = kernel.name if kernel.name.startswith('hcom_') else (kernel.name, str(kernel.ts))
1474
- kernel_map[key] = kernel.parent
1475
- launch_ops = [None] * len(op_summary)
1476
- for index, summary in enumerate(op_summary):
1477
- ts = str(summary['Task Start Time(us)']).strip("\t")
1478
- name = summary['Op Name']
1479
- key = name if name.startswith("hcom_") else (name, ts)
1480
- launch_op = kernel_map.get(key)
1481
- if not launch_op:
1482
- continue
1483
- launch_ops[index] = launch_op.name
1484
- return launch_ops
1485
-
1486
- def _ascend_graph_analyse(self, offline_path=None):
1487
- if offline_path or self._analyse_mode == ANALYSIS_SYNC_MODE:
1488
- self._ascend_graph_analyse_inner(offline_path)
1489
- else:
1490
- MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
1491
-
1492
- @timeit("Profiler analyse done")
1493
- def _ascend_graph_analyse_inner(self, offline_path=None):
1494
- """Ascend graph mode analyse."""
1495
- job_id = self._get_profiling_job_id(offline_path)
1496
- if not job_id:
1497
- return
1498
- logger.info("Profiling: job id is %s ", job_id)
1499
-
1500
- self._check_output_path(output_path=self._output_path)
1501
- source_path = os.path.join(self._output_path, job_id)
1502
- self._minddata_analyse()
1503
- if self._op_time:
1504
- mindstudio_profiler_output = os.path.realpath(
1505
- os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
1506
- flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
1507
- if not flag:
1508
- logger.warning('Current driver package not support all export mode, use single export mode, '
1509
- 'this may lead to performance degradation. Suggest upgrading the driver package.')
1510
- ProfilerInfo.set_export_flag(flag)
1511
- op_summary, op_statistic, steptrace, steptrace_model \
1512
- = _ascend_graph_msprof_analyse(mindstudio_profiler_output)
1513
- kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
1514
-
1515
- if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
1516
- not isinstance(op_statistic, np.ndarray) and not op_statistic:
1517
- logger.warning('Op statistic data is empty!')
1518
- return
1519
-
1520
- launch_ops = self._get_kernel_op_map(op_summary, kernels)
1521
- self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
1522
- graph_ids = np.unique(op_summary['Model ID']).tolist()
1523
- self._ascend_fpbp_analyse(op_summary, steptrace)
1524
- if len(graph_ids) == 1:
1525
- self._ascend_step_trace_analyse(steptrace)
1526
- else:
1527
- self._ascend_step_trace_analyse(steptrace_model)
1528
- if self._dynamic_status:
1529
- self._ascend_dynamic_net_analyse(op_summary)
1530
- self._ascend_flops_analyse(op_summary, launch_ops)
1531
- self._ascend_graph_memory_analyse()
1532
- self._ascend_ms_analyze(mindstudio_profiler_output)
1533
- self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
1534
- self._minddata_aicpu_analyse(self._output_path, job_id)
1535
- ProfilerInfo.set_graph_ids(graph_ids)
1536
- try:
1537
- ProfilerInfo.set_data_simplification(self._data_simplification)
1538
- ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
1539
- except RuntimeError as err:
1540
- logger.error('Profilier simplify data failed, %s', str(err))
1541
-
1542
- def _ascend_graph_start(self):
1543
- """Ascend graph mode start profiling."""
1544
- op_range_file = os.path.join(self._framework_path, "op_range_" + str(self._rank_id))
1545
- if os.path.exists(op_range_file):
1546
- os.remove(op_range_file)
1547
- logger.info("Clear old op range filer.")
1548
- self._ascend_profiler.start()
1549
-
1550
- def _gpu_analyse(self):
1551
- """Collect and analyse gpu performance data."""
1552
- self._dev_id = context.get_context("device_id")
1553
- self._rank_size = 1
1554
- if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
1555
- self._dev_id = str(get_rank())
1556
-
1557
- if GlobalComm.INITED:
1558
- self._rank_size = get_group_size()
1559
- else:
1560
- self._rank_size = int(os.getenv('RANK_SIZE', '1'))
1561
-
1562
- ProfilerInfo.set_rank_size(self._rank_size)
1563
-
1564
- if self._has_started:
1565
- self.stop()
1566
- else:
1567
- logger.info("No need to stop profiler because profiler has been stopped.")
1568
-
1569
- self._minddata_analyse()
1570
-
1571
- try:
1572
- self._analyse_step_relation_info()
1573
- except ProfilerException as err:
1574
- logger.warning(err.message)
1575
- finally:
1576
- pass
1577
-
1578
- def _is_support_step_info_collect(self, analyse_step_trace=True):
1579
- """Whether iteration related information needs to be parsed."""
1580
- profiler_info = ProfilerInfo.get_profiler_info()
1581
- graph_ids = profiler_info.get("graph_ids")
1582
- if graph_ids and len(graph_ids) > 1:
1583
- analyse_step_trace = False
1584
- logger.warning(
1585
- "[Profiler]Current model has multiple sub graphs, the segmentation of steps may be inaccurate.")
1586
- if context.get_context("mode") == context.PYNATIVE_MODE:
1587
- analyse_step_trace = False
1588
- logger.warning(
1589
- "[Profiler]Pynative mode does not support collecting step trace performance data currently.")
1590
- if self._is_heterogeneous:
1591
- analyse_step_trace = False
1592
- logger.warning(
1593
- "[Profiler]Profiler does not support collecting step trace performance data for heterogeneous "
1594
- "scenarios currently.")
1595
- return analyse_step_trace
1596
-
1597
- def _analyse_step_relation_info(self):
1598
- """Parse iteration related information."""
1599
- if not self._op_time:
1600
- return
1601
- reduce_op_type = self._get_step_reduce_op_type()
1602
- timeline_generator = self._generate_timeline(reduce_op_type)
1603
- parser = GpuFrameWorkParser(self._output_path, self._dev_id)
1604
- graph_ids = parser.get_graph_ids()
1605
- ProfilerInfo.set_graph_ids(graph_ids)
1606
- self._analyse_step_trace(
1607
- is_training_mode_flag=timeline_generator.check_op_name('Gradients'),
1608
- is_gpu_kernel_async_launch_flag=timeline_generator.is_gpu_kernel_async_launch()
1609
- )
1610
- if self._dynamic_status:
1611
- parser.analyse_dynamic_shape_data(self._timeline_meta)
1612
-
1613
- def _get_step_reduce_op_type(self):
1614
- """Gets all communication operator names."""
1615
-
1616
- step_trace_original_filename = f'step_trace_profiling_{self._dev_id}.txt'
1617
- step_trace_file_path = os.path.join(self._output_path, step_trace_original_filename)
1618
- step_trace_file_path = validate_and_normalize_path(step_trace_file_path)
1619
- reduce_op_type = []
1620
- with open(step_trace_file_path, 'r') as f_obj:
1621
- one_step_info = f_obj.readline().strip().split()
1622
- # The communication operator starts at index 4.
1623
- for reduce_item in one_step_info[4:]:
1624
- reduce_op_type.append(reduce_item.split(',')[0].split('/')[-1])
1625
- return reduce_op_type
1626
-
1627
- def _cpu_analyse(self):
1628
- """Collect and analyse cpu performance data."""
1629
- if self._has_started:
1630
- self.stop()
1631
- else:
1632
- logger.info("No need to stop profiler because profiler has been stopped.")
1633
-
1634
- if not self._op_time:
1635
- return
1636
- try:
1637
- timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
1638
- timeline_generator.init_timeline(pretty=self._pretty_json)
1639
- timeline_generator.write_timeline()
1640
- timeline_generator.write_timeline_summary()
1641
- except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1642
- logger.warning('Fail to write timeline data: %s', err)
1643
- raise RuntimeError('Fail to write timeline data.') from err
1644
- if context.get_context("mode") == context.PYNATIVE_MODE:
1645
- raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
1646
- "data.")
1647
-
1648
- def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
1649
- """
1650
- Analyse step trace data and save the result.
1651
-
1652
- Args:
1653
- is_training_mode_flag (bool): Whether in training mode or not.
1654
- is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
1655
- """
1656
- logger.info("Begin to parse step trace.")
1657
- # construct output path
1658
- dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
1659
- step_trace_intermediate_file_path = os.path.join(
1660
- self._output_path,
1661
- f'step_trace_raw_{dev_id}_detail_time.csv'
1662
- )
1663
- point_info_file_path = os.path.join(
1664
- self._output_path,
1665
- f'step_trace_point_info_{dev_id}.json'
1666
- )
1667
- step_trace_intermediate_file_path = validate_and_normalize_path(step_trace_intermediate_file_path)
1668
- point_info_file_path = validate_and_normalize_path(point_info_file_path)
1669
-
1670
- if self._device_target and self._device_target == DeviceTarget.GPU.value:
1671
- if context.get_context("mode") != context.PYNATIVE_MODE:
1672
- input_file_path = os.path.join(self._output_path, f'step_trace_profiling_{self._dev_id}.txt')
1673
- input_file_path = validate_and_normalize_path(input_file_path)
1674
- parser = GpuStepTraceParser(input_dir=input_file_path,
1675
- output_file_path=step_trace_intermediate_file_path,
1676
- is_training_mode=is_training_mode_flag,
1677
- is_gpu_kernel_async_launch=is_gpu_kernel_async_launch_flag)
1678
- parser.parse_and_save()
1679
- point_info = parser.record_point_info(point_info_file_path)
1680
- # print parser result
1681
- parser.show()
1682
- logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
1683
- logger.info("The point info is: %s", point_info)
1684
-
1685
- def _generate_timeline(self, reduce_op_type):
1686
- """Used for gpu, generate timeline info, write to json format file."""
1687
- try:
1688
- timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
1689
- context.get_context("mode"))
1690
- timeline_generator.init_timeline(reduce_op_type)
1691
- self._timeline_meta = timeline_generator.write_timeline()
1692
- timeline_generator.write_timeline_summary()
1693
- timeline_generator.parse_fwk_data()
1694
- timeline_generator.write_fwk_timeline()
1695
- return timeline_generator
1696
- except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
1697
- logger.warning('Fail to write timeline data: %s', err)
1698
- raise RuntimeError('Fail to write timeline data.') from err
1699
-
1700
- def _analyse_memory_usage(self):
1701
- """Analyse memory usage data."""
1702
- integrator = Integrator(self._output_path, self._rank_id)
1703
- integrator.get_aicore_detail_data()
1704
-
1705
- def _get_profiling_job_id(self, offline_path):
1706
- """Get profiling job id, which was generated by ada service.
1707
-
1708
- Returns:
1709
- str, profiling job id, eg: PROF_XXX/device_*.
1710
- """
1711
-
1712
- if offline_path:
1713
- self._output_path = os.path.join(offline_path, 'profiler')
1714
-
1715
- job_id = ""
1716
- job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
1717
- os.path.join(self._output_path, item)), os.listdir(self._output_path))
1718
- sorted_job_dirs = sorted(
1719
- job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
1720
-
1721
- for dir_name in sorted_job_dirs:
1722
- prof_dir = os.path.join(self._output_path, dir_name)
1723
- device_dir = [dir for dir in os.listdir(prof_dir) \
1724
- if dir.startswith('device') and os.path.isdir(os.path.join(prof_dir, dir))]
1725
- job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
1726
-
1727
- if get_file_path(job_dir, "start_info") is None:
1728
- logger.warning("Find profiling job path %s, but host_start.log not exist, "
1729
- "profiler will ignore this job dir.", job_dir)
1730
- continue
1731
-
1732
- info_file_path = get_file_path(job_dir, "info.json")
1733
- if info_file_path is None:
1734
- logger.warning("Find profiling job path %s, but info.json not exist, "
1735
- "profiler will ignore this job dir.", job_dir)
1736
- continue
1737
-
1738
- prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
1739
- prof_device_id = ProfilerInfo.get_device_id(prof_dir)
1740
- job_start_time = self._parse_job_start_time(prof_dir)
1741
-
1742
- if offline_path:
1743
- self._start_time = int(job_start_time)
1744
- else:
1745
- if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
1746
- logger.warning("Find profiling find job path %s, but not current training device id. "
1747
- "Current training rank id %s, but job path rank id: %s, "
1748
- "profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
1749
- continue
1750
-
1751
- if job_start_time < self._start_time:
1752
- logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
1753
- "start_time(%d), profiler will ignore this job dir.",
1754
- job_dir, job_start_time, self._start_time)
1755
- continue
1756
-
1757
- job_id = os.path.join(dir_name, device_dir[0])
1758
- break
1759
-
1760
- if not job_id:
1761
- msg = "Fail to get profiling job, output path is {}, " \
1762
- "please check whether job dir or prof dir(name startswith JOB or PROF) in output path " \
1763
- "was generated, or may be the device id from job dir dismatch the " \
1764
- "device_id in current process.".format(self._output_path)
1765
- logger.warning(msg)
1766
-
1767
- return job_id
1768
-
1769
- def _query_op_type_info(self):
1770
- """
1771
- Query AICORE operator type information.
1772
-
1773
- Returns:
1774
- list[list], the AICORE operator type and execution time information.
1775
- """
1776
- integrator = Integrator(self._output_path, self._rank_id)
1777
- return integrator.get_aicore_data()
1778
-
1779
- def _query_op_detail_info(self, op_type_order):
1780
- """
1781
- Query AICORE operator detail information.
1782
-
1783
- Args:
1784
- op_type_order(list): The name of the op type in order.
1785
-
1786
- Returns:
1787
- dict, the AICORE operator detail information.
1788
- """
1789
-
1790
- op_type_condition = {}
1791
- if self._filt_optype_names:
1792
- op_type_condition['not_in'] = self._filt_optype_names
1793
-
1794
- filter_condition = {
1795
- 'op_type': op_type_condition,
1796
- 'is_display_detail': False,
1797
- }
1798
- integrator = Integrator(self._output_path, self._rank_id)
1799
- return integrator.query_and_sort_by_op_type(filter_condition, op_type_order)
1800
-
1801
- def _get_devid_rankid_and_devtarget(self):
1802
- """Get device id and rank id and target of this training."""
1803
-
1804
- device_target = ""
1805
- dev_id = ""
1806
- rank_id = ""
1807
- try:
1808
- dev_id = str(context.get_context("device_id"))
1809
- device_target = context.get_context("device_target").lower()
1810
- except ValueError as err:
1811
- logger.error("Profiling: fail to get context, %s", err)
1812
-
1813
- if not dev_id or not dev_id.isdigit():
1814
- dev_id = str(get_local_rank()) if GlobalComm.INITED and device_target == DeviceTarget.ASCEND.value \
1815
- else os.getenv('DEVICE_ID')
1816
- if not dev_id or not dev_id.isdigit():
1817
- dev_id = "0"
1818
- logger.warning("Fail to get DEVICE_ID, use 0 instead.")
1819
-
1820
- if device_target and device_target not in [DeviceTarget.ASCEND.value, DeviceTarget.GPU.value,
1821
- DeviceTarget.CPU.value]:
1822
- msg = "Profiling: unsupported backend: %s" % device_target
1823
- raise RuntimeError(msg)
1824
-
1825
- rank_id = str(get_rank()) if GlobalComm.INITED and device_target == DeviceTarget.ASCEND.value \
1826
- else os.getenv("RANK_ID")
1827
- if not rank_id or not rank_id.isdigit():
1828
- rank_id = "0"
1829
- logger.warning(f"For '{self.__class__.__name__}', fail to get RANK_ID from environment, "
1830
- f"use 0 instead.")
1831
-
1832
- self._dev_id = dev_id
1833
- self._device_target = device_target.lower()
1834
- if device_target == DeviceTarget.GPU.value:
1835
- self._rank_id = dev_id
1836
- else:
1837
- self._rank_id = rank_id
1838
-
1839
- def _get_output_path(self, kwargs):
1840
- """Get output path of profiling data."""
1841
- if os.getenv("MS_DIAGNOSTIC_DATA_PATH") and kwargs.get("output_path") is not None:
1842
- logger.warning("Both parameter output_path and environment variable MS_DIAGNOSTIC_DATA_PATH"
1843
- " have values set, and the profiling data saving path is the value set "
1844
- "in parameter output_path")
1845
- if kwargs.get("output_path") is None:
1846
- if "output_path" in kwargs:
1847
- kwargs.pop("output_path")
1848
- # Environment variables are mainly set for the convenience of cloud profiler.
1849
- output_path = os.getenv("MS_DIAGNOSTIC_DATA_PATH")
1850
- if output_path:
1851
- self._output_path = validate_and_normalize_path(output_path)
1852
- else:
1853
- output_path = "data"
1854
- self._output_path = validate_and_normalize_path(output_path)
1855
- else:
1856
- output_path = kwargs.pop("output_path")
1857
- if not isinstance(output_path, str):
1858
- logger.warning(
1859
- f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
1860
- output_path = "data"
1861
- self._output_path = validate_and_normalize_path(output_path)
1862
-
1863
- self._output_path = os.path.join(self._output_path, "profiler")
1864
- if not os.path.exists(self._output_path):
1865
- os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1866
- else:
1867
- logger.warning("The target dir already exists. "
1868
- "There may be some old profiling data, and they will be rewritten in the end.")
1869
- self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
1870
- if not os.path.exists(self._framework_path):
1871
- os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
1872
-
1873
- def _parser_kwargs(self, kwargs):
1874
- """Parse kwargs vale."""
1875
- self._op_time = kwargs.get("op_time", True)
1876
-
1877
- env_run_config = json.loads(os.getenv("MS_PROFILER_RUN_CONFIG", "{}"))
1878
- params = list(kwargs.keys())
1879
- if not env_run_config.get("start"):
1880
- for param in params:
1881
- if param not in DeviceSupportParam.__getattr__(f'{self._device_target}'.upper()).value:
1882
- logger.warning("%s is an invalid param which doesn't work.", param)
1883
- kwargs.pop(param)
1884
- elif not self._op_time and param not in ALWAYS_VALID_PARAM:
1885
- logger.warning(f"When op_time is set to False, the parameter '{param}' setting is invalid.")
1886
-
1887
- if not isinstance(self._op_time, bool):
1888
- logger.warning(f"For '{self.__class__.__name__}', the parameter op_time must be bool, "
1889
- f"but got type {type(self._op_time)}, it will be set to True.")
1890
- self._op_time = True
1891
-
1892
- self._data_process = kwargs.pop("data_process", False)
1893
- if not isinstance(self._data_process, bool):
1894
- logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
1895
- f"but got type {type(self._data_process)}, it will be set to False.")
1896
- self._data_process = False
1897
-
1898
- timeline_limit = kwargs.pop("timeline_limit", 500)
1899
- if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
1900
- logger.warning(f"For '{self.__class__.__name__}', the parameter timeline_limit must be int, "
1901
- f"but got type {type(timeline_limit)}, it will be set to 500.")
1902
- timeline_limit = 500
1903
- if timeline_limit <= 0:
1904
- logger.warning(
1905
- "[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
1906
- timeline_limit = 500
1907
- self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
1908
- self._profile_framework = kwargs.pop("profile_framework", None)
1909
- if self._profile_framework not in ["time", "all", None]:
1910
- logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
1911
- f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
1912
- self._profile_framework = None
1913
-
1914
- if not isinstance(self._data_simplification, bool):
1915
- logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
1916
- f"but got type {type(self._data_simplification)}, it will be set to True.")
1917
- self._data_simplification = True
1918
-
1919
- self._with_stack = kwargs.pop("with_stack", False)
1920
- if not isinstance(self._with_stack, bool):
1921
- logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
1922
- f"type {type(self._with_stack)}, it will be set to False.")
1923
- self._with_stack = False
1924
- if self._with_stack and self._profile_framework not in ["time", "all"]:
1925
- logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
1926
- self._with_stack = False