mindspore 2.4.10__cp39-cp39-win_amd64.whl → 2.6.0rc1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (577) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +13 -6
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_check_jit_forbidden_api.py +3 -0
  7. mindspore/_checkparam.py +3 -38
  8. mindspore/_deprecated/__init__.py +17 -0
  9. mindspore/_deprecated/jit.py +198 -0
  10. mindspore/_extends/builtin_operations.py +1 -1
  11. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  12. mindspore/_extends/parse/__init__.py +6 -7
  13. mindspore/_extends/parse/compile_config.py +83 -0
  14. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  15. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
  16. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  17. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  18. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  19. mindspore/_extends/parse/parser.py +46 -197
  20. mindspore/_extends/parse/resources.py +1 -5
  21. mindspore/_extends/parse/standard_method.py +217 -98
  22. mindspore/_extends/pijit/__init__.py +2 -2
  23. mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
  24. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  25. mindspore/_extends/utils.py +1 -1
  26. mindspore/amp.py +11 -5
  27. mindspore/avcodec-59.dll +0 -0
  28. mindspore/avdevice-59.dll +0 -0
  29. mindspore/avfilter-8.dll +0 -0
  30. mindspore/avformat-59.dll +0 -0
  31. mindspore/avutil-57.dll +0 -0
  32. mindspore/boost/__init__.py +2 -2
  33. mindspore/boost/base.py +3 -7
  34. mindspore/boost/boost_cell_wrapper.py +138 -43
  35. mindspore/common/__init__.py +6 -3
  36. mindspore/common/_grad_function.py +56 -0
  37. mindspore/common/_pijit_context.py +14 -5
  38. mindspore/common/_register_for_tensor.py +1 -2
  39. mindspore/common/_stub_tensor.py +30 -14
  40. mindspore/common/_tensor_cpp_method.py +17 -0
  41. mindspore/common/_tensor_docs.py +4760 -0
  42. mindspore/common/api.py +435 -371
  43. mindspore/common/auto_dynamic_shape.py +41 -44
  44. mindspore/common/dtype.py +39 -36
  45. mindspore/common/dump.py +9 -6
  46. mindspore/common/file_system.py +9 -1
  47. mindspore/common/generator.py +2 -0
  48. mindspore/common/hook_handle.py +6 -2
  49. mindspore/common/initializer.py +13 -10
  50. mindspore/common/jit_begin_end.py +94 -0
  51. mindspore/common/jit_config.py +6 -1
  52. mindspore/common/jit_context.py +76 -0
  53. mindspore/common/jit_trace.py +378 -0
  54. mindspore/common/lazy_inline.py +9 -3
  55. mindspore/common/mindir_util.py +10 -2
  56. mindspore/common/mutable.py +5 -4
  57. mindspore/common/parameter.py +135 -52
  58. mindspore/common/seed.py +2 -2
  59. mindspore/common/sparse_tensor.py +23 -17
  60. mindspore/common/tensor.py +951 -1992
  61. mindspore/communication/__init__.py +7 -5
  62. mindspore/communication/_comm_helper.py +52 -2
  63. mindspore/communication/comm_func.py +240 -181
  64. mindspore/communication/management.py +95 -26
  65. mindspore/context.py +314 -566
  66. mindspore/dataset/__init__.py +65 -37
  67. mindspore/dataset/audio/__init__.py +2 -8
  68. mindspore/dataset/audio/transforms.py +3 -17
  69. mindspore/dataset/callback/ds_callback.py +2 -1
  70. mindspore/dataset/core/config.py +87 -6
  71. mindspore/dataset/engine/cache_admin.py +3 -3
  72. mindspore/dataset/engine/cache_client.py +6 -5
  73. mindspore/dataset/engine/datasets.py +292 -267
  74. mindspore/dataset/engine/datasets_audio.py +22 -8
  75. mindspore/dataset/engine/datasets_standard_format.py +46 -27
  76. mindspore/dataset/engine/datasets_text.py +78 -48
  77. mindspore/dataset/engine/datasets_user_defined.py +182 -116
  78. mindspore/dataset/engine/datasets_vision.py +120 -44
  79. mindspore/dataset/engine/iterators.py +283 -63
  80. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  81. mindspore/dataset/engine/obs/util.py +8 -0
  82. mindspore/dataset/engine/queue.py +40 -0
  83. mindspore/dataset/engine/samplers.py +289 -43
  84. mindspore/dataset/engine/serializer_deserializer.py +3 -2
  85. mindspore/dataset/engine/validators.py +53 -11
  86. mindspore/dataset/text/__init__.py +7 -6
  87. mindspore/dataset/text/transforms.py +6 -5
  88. mindspore/dataset/text/utils.py +3 -3
  89. mindspore/dataset/transforms/__init__.py +0 -9
  90. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  91. mindspore/dataset/transforms/transforms.py +31 -14
  92. mindspore/dataset/utils/browse_dataset.py +1 -1
  93. mindspore/dataset/vision/__init__.py +2 -9
  94. mindspore/dataset/vision/transforms.py +202 -158
  95. mindspore/dataset/vision/utils.py +7 -5
  96. mindspore/dataset/vision/validators.py +1 -2
  97. mindspore/device_context/__init__.py +21 -0
  98. mindspore/device_context/ascend/__init__.py +25 -0
  99. mindspore/device_context/ascend/device.py +72 -0
  100. mindspore/device_context/ascend/op_debug.py +153 -0
  101. mindspore/device_context/ascend/op_precision.py +193 -0
  102. mindspore/device_context/ascend/op_tuning.py +123 -0
  103. mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
  104. mindspore/device_context/cpu/device.py +62 -0
  105. mindspore/device_context/cpu/op_tuning.py +43 -0
  106. mindspore/device_context/gpu/__init__.py +21 -0
  107. mindspore/device_context/gpu/device.py +70 -0
  108. mindspore/device_context/gpu/op_precision.py +67 -0
  109. mindspore/device_context/gpu/op_tuning.py +175 -0
  110. mindspore/device_manager.py +170 -0
  111. mindspore/experimental/es/embedding_service.py +35 -27
  112. mindspore/experimental/llm_boost/__init__.py +1 -0
  113. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  114. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  115. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  116. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  117. mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
  118. mindspore/experimental/llm_boost/register.py +1 -0
  119. mindspore/experimental/map_parameter.py +4 -4
  120. mindspore/experimental/optim/adadelta.py +6 -6
  121. mindspore/experimental/optim/adagrad.py +4 -4
  122. mindspore/experimental/optim/adam.py +7 -0
  123. mindspore/experimental/optim/adamax.py +4 -4
  124. mindspore/experimental/optim/adamw.py +4 -0
  125. mindspore/experimental/optim/asgd.py +1 -1
  126. mindspore/experimental/optim/lr_scheduler.py +73 -46
  127. mindspore/experimental/optim/radam.py +34 -31
  128. mindspore/experimental/optim/rprop.py +1 -1
  129. mindspore/experimental/optim/sgd.py +1 -1
  130. mindspore/hal/contiguous_tensors_handle.py +6 -10
  131. mindspore/hal/device.py +55 -53
  132. mindspore/hal/event.py +52 -52
  133. mindspore/hal/memory.py +157 -117
  134. mindspore/hal/stream.py +150 -109
  135. mindspore/include/api/context.h +0 -1
  136. mindspore/include/dataset/constants.h +7 -4
  137. mindspore/include/dataset/execute.h +2 -2
  138. mindspore/jpeg62.dll +0 -0
  139. mindspore/log.py +50 -0
  140. mindspore/mindrecord/__init__.py +21 -8
  141. mindspore/mindrecord/config.py +17 -316
  142. mindspore/mindrecord/filereader.py +1 -9
  143. mindspore/mindrecord/filewriter.py +5 -15
  144. mindspore/mindrecord/mindpage.py +1 -9
  145. mindspore/mindspore_backend_common.dll +0 -0
  146. mindspore/mindspore_backend_manager.dll +0 -0
  147. mindspore/mindspore_common.dll +0 -0
  148. mindspore/mindspore_core.dll +0 -0
  149. mindspore/mindspore_dump.dll +0 -0
  150. mindspore/mindspore_frontend.dll +0 -0
  151. mindspore/mindspore_memory_pool.dll +0 -0
  152. mindspore/mindspore_ms_backend.dll +0 -0
  153. mindspore/mindspore_ops.dll +0 -0
  154. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  155. mindspore/mindspore_ops_kernel_common.dll +0 -0
  156. mindspore/mindspore_profiler.dll +0 -0
  157. mindspore/mindspore_pyboost.dll +0 -0
  158. mindspore/mindspore_pynative.dll +0 -0
  159. mindspore/mindspore_res_manager.dll +0 -0
  160. mindspore/mindspore_runtime_pipeline.dll +0 -0
  161. mindspore/mint/__init__.py +796 -759
  162. mindspore/mint/distributed/__init__.py +70 -4
  163. mindspore/mint/distributed/distributed.py +2679 -44
  164. mindspore/mint/linalg/__init__.py +8 -0
  165. mindspore/mint/nn/__init__.py +743 -22
  166. mindspore/mint/nn/functional.py +716 -23
  167. mindspore/mint/nn/layer/__init__.py +21 -4
  168. mindspore/mint/nn/layer/_functions.py +334 -0
  169. mindspore/mint/nn/layer/activation.py +276 -1
  170. mindspore/mint/nn/layer/basic.py +123 -0
  171. mindspore/mint/nn/layer/conv.py +921 -0
  172. mindspore/mint/nn/layer/normalization.py +223 -28
  173. mindspore/mint/nn/layer/padding.py +797 -0
  174. mindspore/mint/nn/layer/pooling.py +235 -0
  175. mindspore/mint/optim/__init__.py +3 -1
  176. mindspore/mint/optim/adam.py +223 -0
  177. mindspore/mint/optim/adamw.py +26 -19
  178. mindspore/mint/optim/sgd.py +171 -0
  179. mindspore/mint/special/__init__.py +2 -1
  180. mindspore/multiprocessing/__init__.py +5 -0
  181. mindspore/nn/__init__.py +4 -1
  182. mindspore/nn/cell.py +1370 -189
  183. mindspore/nn/dynamic_lr.py +2 -1
  184. mindspore/nn/layer/activation.py +29 -27
  185. mindspore/nn/layer/basic.py +51 -35
  186. mindspore/nn/layer/channel_shuffle.py +3 -3
  187. mindspore/nn/layer/container.py +1 -1
  188. mindspore/nn/layer/conv.py +22 -17
  189. mindspore/nn/layer/embedding.py +12 -11
  190. mindspore/nn/layer/normalization.py +56 -49
  191. mindspore/nn/layer/padding.py +4 -3
  192. mindspore/nn/layer/pooling.py +120 -42
  193. mindspore/nn/layer/rnn_cells.py +1 -1
  194. mindspore/nn/layer/rnns.py +2 -1
  195. mindspore/nn/layer/timedistributed.py +5 -5
  196. mindspore/nn/layer/transformer.py +59 -36
  197. mindspore/nn/learning_rate_schedule.py +8 -4
  198. mindspore/nn/loss/loss.py +58 -55
  199. mindspore/nn/optim/ada_grad.py +7 -5
  200. mindspore/nn/optim/adadelta.py +11 -9
  201. mindspore/nn/optim/adafactor.py +1 -1
  202. mindspore/nn/optim/adam.py +17 -13
  203. mindspore/nn/optim/adamax.py +8 -7
  204. mindspore/nn/optim/adasum.py +5 -5
  205. mindspore/nn/optim/asgd.py +1 -1
  206. mindspore/nn/optim/ftrl.py +11 -9
  207. mindspore/nn/optim/lamb.py +1 -1
  208. mindspore/nn/optim/lars.py +1 -4
  209. mindspore/nn/optim/lazyadam.py +12 -10
  210. mindspore/nn/optim/momentum.py +7 -6
  211. mindspore/nn/optim/optimizer.py +3 -3
  212. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  213. mindspore/nn/optim/rmsprop.py +13 -12
  214. mindspore/nn/optim/rprop.py +11 -9
  215. mindspore/nn/optim/sgd.py +9 -6
  216. mindspore/nn/optim/tft_wrapper.py +5 -2
  217. mindspore/nn/optim/thor.py +2 -1
  218. mindspore/nn/probability/bijector/bijector.py +17 -11
  219. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  220. mindspore/nn/probability/bijector/invert.py +2 -2
  221. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  222. mindspore/nn/probability/bijector/softplus.py +3 -2
  223. mindspore/nn/probability/distribution/beta.py +3 -3
  224. mindspore/nn/probability/distribution/categorical.py +1 -1
  225. mindspore/nn/probability/distribution/cauchy.py +4 -2
  226. mindspore/nn/probability/distribution/exponential.py +6 -7
  227. mindspore/nn/probability/distribution/gamma.py +2 -2
  228. mindspore/nn/probability/distribution/gumbel.py +2 -2
  229. mindspore/nn/probability/distribution/half_normal.py +5 -3
  230. mindspore/nn/probability/distribution/logistic.py +5 -3
  231. mindspore/nn/probability/distribution/poisson.py +1 -1
  232. mindspore/nn/probability/distribution/uniform.py +5 -3
  233. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  234. mindspore/nn/reinforcement/tensor_array.py +1 -1
  235. mindspore/nn/utils/init.py +13 -11
  236. mindspore/nn/wrap/__init__.py +6 -6
  237. mindspore/nn/wrap/cell_wrapper.py +181 -122
  238. mindspore/nn/wrap/grad_reducer.py +45 -36
  239. mindspore/nn/wrap/loss_scale.py +6 -7
  240. mindspore/numpy/array_creations.py +63 -65
  241. mindspore/numpy/array_ops.py +149 -144
  242. mindspore/numpy/logic_ops.py +41 -42
  243. mindspore/numpy/math_ops.py +365 -363
  244. mindspore/numpy/utils.py +17 -18
  245. mindspore/numpy/utils_const.py +5 -6
  246. mindspore/opencv_core452.dll +0 -0
  247. mindspore/opencv_imgcodecs452.dll +0 -0
  248. mindspore/opencv_imgproc452.dll +0 -0
  249. mindspore/ops/__init__.py +5 -3
  250. mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
  251. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
  252. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  253. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  254. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  255. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  256. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  257. mindspore/ops/_register_for_op.py +0 -11
  258. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  259. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
  260. mindspore/ops/_vmap/vmap_array_ops.py +27 -25
  261. mindspore/ops/_vmap/vmap_base.py +0 -2
  262. mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
  263. mindspore/ops/_vmap/vmap_math_ops.py +15 -16
  264. mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
  265. mindspore/ops/auto_generate/__init__.py +4 -3
  266. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
  267. mindspore/ops/auto_generate/gen_extend_func.py +764 -124
  268. mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
  269. mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
  270. mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
  271. mindspore/ops/composite/__init__.py +2 -1
  272. mindspore/ops/composite/base.py +20 -25
  273. mindspore/ops/composite/math_ops.py +6 -16
  274. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  275. mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
  276. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  277. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  278. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  279. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  280. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  281. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  282. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  283. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  284. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  285. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  286. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  287. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  288. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  289. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  290. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  291. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  292. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  293. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  294. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  295. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  296. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  297. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  298. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  299. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  300. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  301. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
  302. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  303. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  304. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  305. mindspore/ops/function/__init__.py +40 -2
  306. mindspore/ops/function/_add_attr_func.py +58 -0
  307. mindspore/ops/function/array_func.py +2089 -2403
  308. mindspore/ops/function/clip_func.py +80 -23
  309. mindspore/ops/function/debug_func.py +57 -57
  310. mindspore/ops/function/grad/__init__.py +1 -0
  311. mindspore/ops/function/grad/grad_func.py +104 -71
  312. mindspore/ops/function/image_func.py +2 -2
  313. mindspore/ops/function/linalg_func.py +47 -78
  314. mindspore/ops/function/math_func.py +4501 -3802
  315. mindspore/ops/function/nn_func.py +1726 -620
  316. mindspore/ops/function/other_func.py +159 -1
  317. mindspore/ops/function/parameter_func.py +18 -84
  318. mindspore/ops/function/random_func.py +440 -387
  319. mindspore/ops/function/reshard_func.py +4 -70
  320. mindspore/ops/function/sparse_func.py +3 -3
  321. mindspore/ops/function/sparse_unary_func.py +6 -6
  322. mindspore/ops/function/spectral_func.py +25 -58
  323. mindspore/ops/function/vmap_func.py +24 -17
  324. mindspore/ops/functional.py +22 -7
  325. mindspore/ops/functional_overload.py +1440 -0
  326. mindspore/ops/op_info_register.py +32 -244
  327. mindspore/ops/operations/__init__.py +13 -7
  328. mindspore/ops/operations/_custom_ops_utils.py +247 -0
  329. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  330. mindspore/ops/operations/_grad_ops.py +2 -43
  331. mindspore/ops/operations/_infer_ops.py +2 -1
  332. mindspore/ops/operations/_inner_ops.py +43 -84
  333. mindspore/ops/operations/_ms_kernel.py +4 -10
  334. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  335. mindspore/ops/operations/_scalar_ops.py +3 -2
  336. mindspore/ops/operations/_sequence_ops.py +1 -1
  337. mindspore/ops/operations/_tensor_array.py +1 -1
  338. mindspore/ops/operations/array_ops.py +81 -324
  339. mindspore/ops/operations/comm_ops.py +154 -108
  340. mindspore/ops/operations/custom_ops.py +232 -78
  341. mindspore/ops/operations/debug_ops.py +153 -59
  342. mindspore/ops/operations/inner_ops.py +7 -5
  343. mindspore/ops/operations/linalg_ops.py +1 -57
  344. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  345. mindspore/ops/operations/manually_defined/ops_def.py +928 -180
  346. mindspore/ops/operations/math_ops.py +32 -234
  347. mindspore/ops/operations/nn_ops.py +210 -498
  348. mindspore/ops/operations/other_ops.py +62 -9
  349. mindspore/ops/operations/random_ops.py +13 -7
  350. mindspore/ops/operations/reshard_ops.py +1 -1
  351. mindspore/ops/operations/sparse_ops.py +2 -2
  352. mindspore/ops/primitive.py +66 -53
  353. mindspore/ops/tensor_method.py +1888 -0
  354. mindspore/ops_generate/__init__.py +0 -5
  355. mindspore/ops_generate/aclnn/__init__.py +0 -0
  356. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
  357. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
  358. mindspore/ops_generate/api/__init__.py +0 -0
  359. mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
  360. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
  361. mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
  362. mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
  363. mindspore/ops_generate/api/functions_cc_generator.py +237 -0
  364. mindspore/ops_generate/api/gen_api.py +103 -0
  365. mindspore/ops_generate/api/op_api_proto.py +235 -0
  366. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
  367. mindspore/ops_generate/common/__init__.py +0 -0
  368. mindspore/ops_generate/common/base_generator.py +11 -0
  369. mindspore/ops_generate/common/gen_constants.py +91 -0
  370. mindspore/ops_generate/common/gen_utils.py +348 -0
  371. mindspore/ops_generate/common/op_proto.py +473 -0
  372. mindspore/ops_generate/common/template.py +523 -0
  373. mindspore/ops_generate/gen_ops.py +22 -1069
  374. mindspore/ops_generate/op_def/__init__.py +0 -0
  375. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  376. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
  377. mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
  378. mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
  379. mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
  380. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  381. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  382. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  383. mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
  384. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
  385. mindspore/ops_generate/pyboost/__init__.py +0 -0
  386. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
  387. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
  388. mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
  389. mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
  390. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
  391. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
  392. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
  393. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
  394. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
  395. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
  396. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
  397. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
  398. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
  399. mindspore/ops_generate/resources/__init__.py +0 -0
  400. mindspore/ops_generate/resources/resource_list.py +30 -0
  401. mindspore/ops_generate/resources/resource_loader.py +36 -0
  402. mindspore/ops_generate/resources/resource_manager.py +64 -0
  403. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  404. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  405. mindspore/parallel/__init__.py +7 -3
  406. mindspore/parallel/_auto_parallel_context.py +152 -34
  407. mindspore/parallel/_cell_wrapper.py +130 -15
  408. mindspore/parallel/_parallel_serialization.py +107 -5
  409. mindspore/parallel/_ps_context.py +1 -1
  410. mindspore/parallel/_recovery_context.py +7 -2
  411. mindspore/parallel/_tensor.py +142 -18
  412. mindspore/parallel/_utils.py +199 -23
  413. mindspore/parallel/algo_parameter_config.py +4 -4
  414. mindspore/parallel/auto_parallel.py +732 -0
  415. mindspore/parallel/checkpoint_convert.py +159 -0
  416. mindspore/parallel/checkpoint_transform.py +698 -35
  417. mindspore/parallel/cluster/process_entity/_api.py +276 -50
  418. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  419. mindspore/parallel/cluster/run.py +21 -4
  420. mindspore/parallel/function/__init__.py +24 -0
  421. mindspore/parallel/function/reshard_func.py +259 -0
  422. mindspore/parallel/nn/__init__.py +25 -0
  423. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  424. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  425. mindspore/parallel/parameter_broadcast.py +25 -14
  426. mindspore/parallel/shard.py +137 -58
  427. mindspore/parallel/transform_safetensors.py +363 -305
  428. mindspore/profiler/__init__.py +22 -5
  429. mindspore/profiler/analysis/__init__.py +0 -0
  430. mindspore/profiler/analysis/parser/__init__.py +0 -0
  431. mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
  432. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  433. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  434. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  435. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  436. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  437. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
  438. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  439. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
  440. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  441. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  442. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  443. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  444. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  445. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  446. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  447. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  448. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  449. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  450. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
  451. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  452. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  453. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  454. mindspore/profiler/analysis/task_manager.py +131 -0
  455. mindspore/profiler/analysis/time_converter.py +84 -0
  456. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  457. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
  458. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  459. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
  460. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
  461. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
  462. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
  463. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  464. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  465. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
  466. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  467. mindspore/profiler/analysis/work_flow.py +73 -0
  468. mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
  469. mindspore/profiler/common/command_executor.py +90 -0
  470. mindspore/profiler/common/constant.py +186 -3
  471. mindspore/profiler/common/file_manager.py +208 -0
  472. mindspore/profiler/common/log.py +130 -0
  473. mindspore/profiler/common/msprof_cmd_tool.py +221 -0
  474. mindspore/profiler/common/path_manager.py +395 -0
  475. mindspore/profiler/common/process_bar.py +168 -0
  476. mindspore/profiler/common/process_pool.py +9 -3
  477. mindspore/profiler/common/profiler_context.py +500 -0
  478. mindspore/profiler/common/profiler_info.py +304 -0
  479. mindspore/profiler/common/profiler_meta_data.py +74 -0
  480. mindspore/profiler/common/profiler_output_path.py +284 -0
  481. mindspore/profiler/common/profiler_parameters.py +251 -0
  482. mindspore/profiler/common/profiler_path_manager.py +179 -0
  483. mindspore/profiler/common/record_function.py +76 -0
  484. mindspore/profiler/common/tlv_decoder.py +76 -0
  485. mindspore/profiler/common/util.py +75 -2
  486. mindspore/profiler/dynamic_profiler.py +341 -75
  487. mindspore/profiler/envprofiler.py +163 -0
  488. mindspore/profiler/experimental_config.py +197 -0
  489. mindspore/profiler/mstx.py +242 -0
  490. mindspore/profiler/platform/__init__.py +21 -0
  491. mindspore/profiler/platform/base_profiler.py +40 -0
  492. mindspore/profiler/platform/cpu_profiler.py +124 -0
  493. mindspore/profiler/platform/gpu_profiler.py +74 -0
  494. mindspore/profiler/platform/npu_profiler.py +335 -0
  495. mindspore/profiler/profiler.py +1073 -90
  496. mindspore/profiler/profiler_action_controller.py +187 -0
  497. mindspore/profiler/profiler_interface.py +118 -0
  498. mindspore/profiler/schedule.py +243 -0
  499. mindspore/rewrite/api/node.py +15 -13
  500. mindspore/rewrite/api/symbol_tree.py +2 -3
  501. mindspore/run_check/_check_version.py +27 -20
  502. mindspore/run_check/run_check.py +1 -1
  503. mindspore/runtime/__init__.py +37 -0
  504. mindspore/runtime/device.py +27 -0
  505. mindspore/runtime/event.py +209 -0
  506. mindspore/runtime/executor.py +177 -0
  507. mindspore/runtime/memory.py +409 -0
  508. mindspore/runtime/stream.py +460 -0
  509. mindspore/runtime/thread_bind_core.py +401 -0
  510. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  511. mindspore/swresample-4.dll +0 -0
  512. mindspore/swscale-6.dll +0 -0
  513. mindspore/tinyxml2.dll +0 -0
  514. mindspore/train/__init__.py +8 -8
  515. mindspore/train/_utils.py +88 -25
  516. mindspore/train/amp.py +9 -5
  517. mindspore/train/callback/__init__.py +2 -2
  518. mindspore/train/callback/_callback.py +2 -16
  519. mindspore/train/callback/_checkpoint.py +53 -55
  520. mindspore/train/callback/_cluster_monitor.py +14 -18
  521. mindspore/train/callback/_early_stop.py +1 -1
  522. mindspore/train/callback/_flops_collector.py +103 -68
  523. mindspore/train/callback/_history.py +8 -5
  524. mindspore/train/callback/_lambda_callback.py +2 -2
  525. mindspore/train/callback/_landscape.py +0 -3
  526. mindspore/train/callback/_loss_monitor.py +2 -1
  527. mindspore/train/callback/_on_request_exit.py +6 -5
  528. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  529. mindspore/train/callback/_summary_collector.py +52 -19
  530. mindspore/train/callback/_time_monitor.py +2 -1
  531. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
  532. mindspore/train/data_sink.py +25 -2
  533. mindspore/train/dataset_helper.py +15 -16
  534. mindspore/train/loss_scale_manager.py +8 -7
  535. mindspore/train/metrics/accuracy.py +3 -3
  536. mindspore/train/metrics/confusion_matrix.py +9 -9
  537. mindspore/train/metrics/error.py +3 -3
  538. mindspore/train/metrics/hausdorff_distance.py +4 -4
  539. mindspore/train/metrics/mean_surface_distance.py +3 -3
  540. mindspore/train/metrics/metric.py +0 -12
  541. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  542. mindspore/train/metrics/precision.py +11 -10
  543. mindspore/train/metrics/recall.py +9 -9
  544. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  545. mindspore/train/mind_ir_pb2.py +174 -46
  546. mindspore/train/model.py +184 -113
  547. mindspore/train/serialization.py +622 -978
  548. mindspore/train/summary/_summary_adapter.py +2 -2
  549. mindspore/train/summary/summary_record.py +2 -3
  550. mindspore/train/train_thor/model_thor.py +1 -1
  551. mindspore/turbojpeg.dll +0 -0
  552. mindspore/utils/__init__.py +6 -3
  553. mindspore/utils/dryrun.py +140 -0
  554. mindspore/utils/hooks.py +81 -0
  555. mindspore/utils/runtime_execution_order_check.py +550 -0
  556. mindspore/utils/utils.py +138 -4
  557. mindspore/version.py +1 -1
  558. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
  559. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +562 -393
  560. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
  561. mindspore/_install_custom.py +0 -43
  562. mindspore/common/_register_for_adapter.py +0 -74
  563. mindspore/common/_tensor_overload.py +0 -139
  564. mindspore/mindspore_np_dtype.dll +0 -0
  565. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  566. mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
  567. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  568. mindspore/ops_generate/gen_aclnn_implement.py +0 -263
  569. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  570. mindspore/ops_generate/gen_pyboost_func.py +0 -1052
  571. mindspore/ops_generate/gen_utils.py +0 -209
  572. mindspore/ops_generate/op_proto.py +0 -145
  573. mindspore/ops_generate/template.py +0 -261
  574. mindspore/profiler/envprofiling.py +0 -254
  575. mindspore/profiler/profiling.py +0 -1926
  576. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
  577. {mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0
@@ -13,98 +13,364 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """Profiling api file."""
16
- from mindspore.profiler.common.registry import PROFILERS
17
- from mindspore.profiler.common.constant import DeviceTarget
18
- from mindspore.profiler.common.constant import ProfilerLevel
19
- from mindspore.profiler.platform_profiler.prof_context import ProfContext
16
+ import os
17
+ import json
18
+ from typing import Optional, Dict, Callable, Any
19
+ from sys import getsizeof
20
+ from concurrent.futures import ProcessPoolExecutor, as_completed
20
21
 
22
+ from mindspore import log as logger
23
+ from mindspore.profiler.common.constant import ProfilerStepNameConstant, DeviceTarget
24
+ from mindspore.profiler.common.profiler_context import ProfilerContext
25
+ from mindspore.profiler.platform.npu_profiler import NPUProfilerAnalysis
26
+ from mindspore.profiler.profiler_action_controller import ProfilerActionController
27
+ from mindspore.profiler.experimental_config import _ExperimentalConfig
28
+ from mindspore.profiler.profiler_interface import ProfilerInterface
29
+ from mindspore.profiler.schedule import _default_schedule_fn, ProfilerAction, Schedule
30
+ from mindspore.profiler.common.record_function import RecordFunction
31
+ from mindspore.profiler.common.path_manager import PathManager
32
+ from mindspore.profiler.common.profiler_path_manager import ProfilerPathManager
33
+ from mindspore.profiler.common.profiler_meta_data import ProfilerMetaData
21
34
 
22
- class NewProfiler:
35
+
36
+ def tensorboard_trace_handler(dir_name: str = None, worker_name: str = None,
37
+ analyse_flag: bool = True, async_mode: bool = False):
23
38
  """
24
- Refactor profiler
39
+ For each step in dynamic graph mode, call this method for online analyse.
40
+
41
+ Args:
42
+ dir_name (str, optional): Specifies the directory path to save the analysis results. The default is ``None``.
43
+ The default save path is ``"./data"``.
44
+ worker_name (str, optional): Specifies the system version name. The default is ``None``. The default project
45
+ thread name is ``"Name of the current operating system + process ID"``.
46
+ analyse_flag (bool, optional): Whether to enable online analysis. The default value is ``True``.
47
+ Indicates online analysis.
48
+ async_mode (bool, optional): Whether to use asynchronous parsing mode. The default value is ``False``. Indicates
49
+ the use of synchronous parsing mode.
50
+
51
+ Examples:
52
+ >>> import numpy as np
53
+ >>> import mindspore
54
+ >>> import mindspore.dataset as ds
55
+ >>> from mindspore import context, nn
56
+ >>> from mindspore.profiler import ProfilerLevel, AicoreMetrics, ExportType, ProfilerActivity
57
+ >>>
58
+ >>> class Net(nn.Cell):
59
+ ... def __init__(self):
60
+ ... super(Net, self).__init__()
61
+ ... self.fc = nn.Dense(2, 2)
62
+ ...
63
+ ... def construct(self, x):
64
+ ... return self.fc(x)
65
+ >>>
66
+ >>> def generator_net():
67
+ ... for _ in range(2):
68
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
69
+ >>>
70
+ >>> def train(test_net):
71
+ ... optimizer = nn.Momentum(test_net.trainable_params(), 1, 0.9)
72
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
73
+ ... data = ds.GeneratorDataset(generator_net(), ["data", "label"])
74
+ ... model = mindspore.train.Model(test_net, loss, optimizer)
75
+ ... model.train(1, data)
76
+ >>>
77
+ >>> if __name__ == '__main__':
78
+ ... # If the device_target is GPU, set the device_target to "GPU"
79
+ ... context.set_context(mode=mindspore.GRAPH_MODE)
80
+ ... mindspore.set_device("Ascend")
81
+ ...
82
+ ... # Init Profiler
83
+ ... experimental_config = mindspore.profiler._ExperimentalConfig(
84
+ ... profiler_level=ProfilerLevel.Level0,
85
+ ... aic_metrics=AicoreMetrics.AiCoreNone,
86
+ ... l2_cache=False,
87
+ ... mstx=False,
88
+ ... data_simplification=False,
89
+ ... export_type=[ExportType.Text])
90
+ ... steps = 10
91
+ ... net = Net()
92
+ ... # Note that the Profiler should be initialized before model.train
93
+ ... with mindspore.profiler.profile(activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
94
+ ... schedule=mindspore.profiler.schedule(wait=1, warmup=1, active=2,
95
+ ... repeat=1, skip_first=2),
96
+ ... on_trace_ready=mindspore.profiler.tensorboard_trace_handler("./data"),
97
+ ... profile_memory=False,
98
+ ... experimental_config=experimental_config) as prof:
99
+ ...
100
+ ... # Train Model
101
+ ... for step in range(steps):
102
+ ... train(net)
103
+ ... prof.step()
25
104
  """
105
+ ProfilerPathManager().init(worker_name, dir_name)
106
+ if not isinstance(analyse_flag, bool):
107
+ logger.warning("analyse_flag is not bool, set by default.")
108
+ analyse_flag = True
109
+ if not isinstance(async_mode, bool):
110
+ logger.warning("async_mode is not bool, set by default.")
111
+ async_mode = False
26
112
 
27
- def __init__(
28
- self,
29
- output_path: str = "./data",
30
- profiler_level: ProfilerLevel = None,
31
- op_time: bool = True,
32
- profile_communication: bool = False,
33
- profile_memory: bool = False,
34
- parallel_strategy: bool = False,
35
- start_profile: bool = True,
36
- aicore_metrics: int = 0,
37
- l2_cache: bool = False,
38
- hbm_ddr: bool = False,
39
- pcie: bool = False,
40
- sync_enable: bool = True,
41
- data_process: bool = False,
42
- timeline_limit: int = 500,
43
- profile_framework: str = None,
44
- with_stack: bool = False,
45
- data_simplification: bool = True,
46
- **kwargs) -> None:
47
-
48
- self._prof_context = ProfContext(
49
- output_path=output_path,
50
- profiler_level=profiler_level,
51
- op_time=op_time,
52
- profile_communication=profile_communication,
53
- profile_memory=profile_memory,
54
- parallel_strategy=parallel_strategy,
55
- start_profile=start_profile,
56
- aicore_metrics=aicore_metrics,
57
- l2_cache=l2_cache,
58
- hbm_ddr=hbm_ddr,
59
- pcie=pcie,
60
- sync_enable=sync_enable,
61
- data_process=data_process,
62
- timeline_limit=timeline_limit,
63
- profile_framework=profile_framework,
64
- with_stack=with_stack,
65
- data_simplification=data_simplification
66
- )
113
+ def handler_fn() -> None:
114
+ if analyse_flag:
115
+ NPUProfilerAnalysis.online_analyse(async_mode=async_mode)
67
116
 
68
- self._has_started = False
117
+ return handler_fn
69
118
 
70
- self._cpu_profiler = PROFILERS.get_modules().get(DeviceTarget.CPU.value)(
71
- op_time=self._prof_context.op_time,
72
- with_stack=self._prof_context.with_stack,
73
- data_process=self._prof_context.data_process,
74
- output_path=self._prof_context.output_path,
75
- profile_memory=self._prof_context.profile_memory,
76
- profile_framework=self._prof_context.profile_framework
77
- )
78
119
 
79
- self._device_target = self._prof_context.device_target
80
- self._device_profiler = PROFILERS.get_modules().get(self._device_target)(
81
- self._prof_context.get_args()
82
- )
120
+ class Profiler:
121
+ r"""
122
+ The current interface is deprecated, please use: :class:`mindspore.profiler.profile` instead.
123
+ This class to enable the profiling of MindSpore neural networks.
124
+ MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
125
+ and use Profiler.analyse() to stop profiling and analyse the results.
126
+ Users can visualize the results using the `MindStudio Insight
127
+ <https://www.hiascend.com/developer/download/community/result?module=pt+sto+cann>`_ tool.
128
+ Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
129
+ correspondence, cluster, etc data analysis.
130
+
131
+ Args:
132
+ start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
133
+ data collection based on conditions. Default: ``True`` .
134
+ output_path (str, optional): Output data path. Default: ``"./data"`` .
135
+ profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling.
136
+ Default: ``ProfilerLevel.Level0``.
137
+
138
+ - ProfilerLevel.LevelNone: This setting takes effect only when mstx is enabled, indicating that no operator
139
+ data is collected on the device side.
140
+ - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
141
+ time of the computational operators on the NPU and communication large operator information.
142
+ - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
143
+ communication mini operator information based on Level0.
144
+ - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
145
+ activities (list, optional): The activities to collect.
146
+ Default: ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
147
+
148
+ - ProfilerActivity.CPU: Collect MindSpore framework data.
149
+ - ProfilerActivity.NPU: Collect CANN software stack and NPU data.
150
+ - ProfilerActivity.GPU: Collect GPU data.
151
+ schedule (schedule, optional): Sets the action strategy for the capture, defined by the schedule class,
152
+ to be used with the step interface. Default: ``None``. Performance data of all steps is collected.
153
+ For details, see :class:`mindspore.profiler.schedule` .
154
+ on_trace_ready (Callable, optional): Sets the callback function to be executed when the performance data
155
+ is collected. Default: ``None``. It indicates that only performance data is collected, but not resolved.
156
+ For details, see :func:`mindspore.profiler.tensorboard_trace_handler` .
157
+ profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
158
+ When using this parameter, `activities` must set to ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
159
+ Collecting operator memory data when the graph compilation level is O2 requires collecting from the
160
+ first step. Default: ``False`` . The operator name currently collected by this parameter is incomplete.
161
+ This issue will be resolved in later versions. It is recommended to use the environment variable
162
+ ``MS_ALLOC_CONF`` instead.
163
+ aic_metrics (AicoreMetrics, optional): (Ascend only) Types of AICORE performance data collected,
164
+ when using this parameter, `activities` must include ``ProfilerActivity.NPU`` , and the value
165
+ must be a member of AicoreMetrics. When `profiler_level` is ``ProfilerLevel.Level0``, the default value is
166
+ ``AicoreMetrics.AiCoreNone``; when `profiler_level` is ``ProfilerLevel.Level1`` or ``ProfilerLevel.Level2``,
167
+ the default value is ``AicoreMetrics.PipeUtilization``.
168
+
169
+ The data items contained in each metric are as follows:
170
+
171
+ - AicoreMetrics.AiCoreNone: Does not collect AICORE data.
172
+ - AicoreMetrics.ArithmeticUtilization: ArithmeticUtilization contains mac_fp16/int8_ratio,
173
+ vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
174
+ - AicoreMetrics.PipeUtilization: PipeUtilization contains vec_ratio, mac_ratio, scalar_ratio,
175
+ mte1/mte2/mte3_ratio, icache_miss_rate etc.
176
+ - AicoreMetrics.Memory: Memory contains ub_read/write_bw, l1_read/write_bw, l2_read/write_bw,
177
+ main_mem_read/write_bw etc.
178
+ - AicoreMetrics.MemoryL0: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
179
+ - AicoreMetrics.ResourceConflictRatio: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio
180
+ etc.
181
+ - AicoreMetrics.MemoryUB: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector,
182
+ ub\_/write_bw_scalar etc.
183
+ - AicoreMetrics.L2Cache: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit,
184
+ r1_read_cache_hit etc. This function only support Atlas A2 training series products.
185
+ - AicoreMetrics.MemoryAccess: Statistics on storage access bandwidth and storage capacity of main
186
+ storage and l2 cache etc.
187
+ with_stack (bool, optional): (Ascend only) Whether to collect frame host call stack data
188
+ on the Python side. This
189
+ data is presented in the form of a flame graph in the timeline. When using this parameter, `activities` must
190
+ include ``ProfilerActivity.CPU``. Default value: ``False`` .
191
+ data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
192
+ If set to True, only the profiler deliverables and raw performance data under the PROF_XXX directory are
193
+ kept to save space. Default value: ``True`` .
194
+ l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
195
+ Default: ``False`` . The l2_cache.csv file is generated in the ASCEND_PROFILER_OUTPUT folder.In O2 mode,
196
+ only wait and skip_first parameters in schedule configuration can be set to 0.
197
+ hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
198
+ collect when True. Default: ``False`` .
199
+ pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
200
+ Default: ``False`` .
201
+ data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
202
+ Default value: ``False`` .
203
+ parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
204
+ Default value: ``False`` .
205
+ sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
206
+ Default: ``True`` .
207
+
208
+ - True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
209
+ Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
210
+ The duration of the operator is the difference between the two timestamps.
211
+ - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
212
+ This method can reduce the impact of adding profiler on overall training time.
213
+ Raises:
214
+ RuntimeError: When the version of CANN does not match the version of MindSpore,
215
+ MindSpore cannot parse the generated ascend_job_id directory structure.
216
+
217
+ Supported Platforms:
218
+ ``Ascend`` ``GPU``
219
+
220
+ Examples:
221
+ >>> import numpy as np
222
+ >>> import mindspore as ms
223
+ >>> from mindspore import nn
224
+ >>> import mindspore.dataset as ds
225
+ >>> from mindspore import Profiler
226
+ >>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics
227
+ >>>
228
+ >>> class Net(nn.Cell):
229
+ ... def __init__(self):
230
+ ... super(Net, self).__init__()
231
+ ... self.fc = nn.Dense(2,2)
232
+ ... def construct(self, x):
233
+ ... return self.fc(x)
234
+ >>>
235
+ >>> def generator():
236
+ ... for i in range(2):
237
+ ... yield (np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32))
238
+ >>>
239
+ >>> def train(net):
240
+ ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
241
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
242
+ ... data = ds.GeneratorDataset(generator, ["data", "label"])
243
+ ... model = ms.train.Model(net, loss, optimizer)
244
+ ... model.train(1, data)
245
+ >>>
246
+ >>> if __name__ == '__main__':
247
+ ... # If the device_target is GPU, set the device_target to "GPU"
248
+ ... ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
249
+ ...
250
+ ... # Init Profiler
251
+ ... # Note that the Profiler should be initialized before model.train
252
+ ... profiler = Profiler(profiler_level=ProfilerLevel.Level0,
253
+ ... activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
254
+ ... aic_metrics=AicoreMetrics.AiCoreNone)
255
+ ...
256
+ ... # Train Model
257
+ ... net = Net()
258
+ ... train(net)
259
+ ...
260
+ ... # Profiler end
261
+ ... profiler.analyse()
262
+ """
263
+ MAX_META_SIZE = 100 * 1024 * 1024 # 100MB
264
+
265
+ def __init__(self, **kwargs) -> None:
266
+ self._metadata: Dict[str, str] = {}
267
+ self._prof_context: ProfilerContext = ProfilerContext()
268
+ self._prof_context.set_params(**kwargs)
269
+ self._has_started: bool = False
270
+ self.schedule_arg = kwargs.get('schedule')
271
+ if self.schedule_arg is not None:
272
+ self.schedule = self._prof_context.schedule
273
+ self._record_steps: bool = True
274
+ self._schedule_no_use_step = True
275
+ else:
276
+ self.schedule = _default_schedule_fn
277
+ self._record_steps: bool = False
278
+ self._schedule_no_use_step = None
279
+ self._step_rec_fn: Optional[RecordFunction] = None
280
+ self.step_num = 0
281
+ self.current_action: ProfilerAction = self.schedule(self.step_num)
282
+ self.action_controller = ProfilerActionController(ProfilerInterface, self._prof_context.on_trace_ready)
283
+ if self._prof_context.start_profile:
284
+ self.start()
83
285
 
84
286
  def start(self) -> None:
85
287
  """
86
- Used for Ascend, GPU, start profiling. Profiling can be turned on based on step and epoch.
87
- """
88
- if not self._has_started:
89
- self._has_started = True
90
- else:
91
- raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
288
+ Turn on Profiler data collection. Profiler can be turned on by condition.
92
289
 
93
- self._cpu_profiler.start()
94
- self._device_profiler.start()
290
+ Raises:
291
+ RuntimeError: If the profiler has already started.
292
+ RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
293
+
294
+ Examples:
295
+ >>> from mindspore.train import Callback
296
+ >>> from mindspore import Profiler
297
+ >>> class StopAtStep(Callback):
298
+ ... def __init__(self, start_step, stop_step):
299
+ ... super(StopAtStep, self).__init__()
300
+ ... self.start_step = start_step
301
+ ... self.stop_step = stop_step
302
+ ... self.profiler = Profiler(start_profile=False)
303
+ ...
304
+ ... def step_begin(self, run_context):
305
+ ... cb_params = run_context.original_args()
306
+ ... step_num = cb_params.cur_step_num
307
+ ... if step_num == self.start_step:
308
+ ... self.profiler.start()
309
+ ...
310
+ ... def step_end(self, run_context):
311
+ ... cb_params = run_context.original_args()
312
+ ... step_num = cb_params.cur_step_num
313
+ ... if step_num == self.stop_step:
314
+ ... self.profiler.stop()
315
+ ...
316
+ ... def end(self, run_context):
317
+ ... self.profiler.analyse()
318
+ """
319
+ if self._has_started:
320
+ logger.warning("The profiler has already started. Do not turn on again in the open state.")
321
+ return
322
+ self._has_started = True
323
+ self.action_controller.transit_action(ProfilerAction.NONE, self.current_action)
324
+ if self._record_steps:
325
+ self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
326
+ self._step_rec_fn.start()
95
327
 
96
328
  def stop(self) -> None:
97
329
  """
98
- Used for Ascend, GPU, stop profiling. Profiling can be turned off based on step and epoch.
330
+ Turn off Profiler data collection. Profiler can be turned off by condition.
331
+
332
+ Raises:
333
+ RuntimeError: If the profiler has not started, this function is disabled.
334
+
335
+ Examples:
336
+ >>> from mindspore.train import Callback
337
+ >>> from mindspore import Profiler
338
+ >>> class StopAtEpoch(Callback):
339
+ ... def __init__(self, start_epoch, stop_epoch):
340
+ ... super(StopAtEpoch, self).__init__()
341
+ ... self.start_epoch = start_epoch
342
+ ... self.stop_epoch = stop_epoch
343
+ ... self.profiler = Profiler(start_profile=False)
344
+ ...
345
+ ... def epoch_begin(self, run_context):
346
+ ... cb_params = run_context.original_args()
347
+ ... epoch_num = cb_params.cur_epoch_num
348
+ ... if epoch_num == self.start_epoch:
349
+ ... self.profiler.start()
350
+ ...
351
+ ... def epoch_end(self, run_context):
352
+ ... cb_params = run_context.original_args()
353
+ ... epoch_num = cb_params.cur_epoch_num
354
+ ... if epoch_num == self.stop_epoch:
355
+ ... self.profiler.stop()
356
+ ...
357
+ ... def end(self, run_context):
358
+ ... self.profiler.analyse()
99
359
  """
100
- if self._has_started:
101
- self._has_started = False
360
+ if self._schedule_no_use_step:
361
+ logger.warning("The profiler has schedule. Please use step() to collect data.")
362
+ return
363
+ if not self._has_started:
364
+ logger.error("The profiler has not started. Do not turn off again in the closed state.")
365
+ return
366
+ self._has_started = False
367
+ if self._record_steps and self._step_rec_fn:
368
+ self._step_rec_fn.stop()
369
+ if self.schedule_arg:
370
+ self.action_controller.transit_action(self.current_action, None)
102
371
  else:
103
- raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
104
- "before calling the stop() method.")
105
-
106
- self._cpu_profiler.stop()
107
- self._device_profiler.stop()
372
+ ProfilerInterface.stop()
373
+ ProfilerMetaData.dump_metadata()
108
374
 
109
375
  def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync") -> None:
110
376
  """
@@ -115,19 +381,232 @@ class NewProfiler:
115
381
  Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
116
382
  for online mode. Default: ``None``.
117
383
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
118
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
119
- By default, all steps will be analyzed.
384
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
385
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
120
386
  mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
121
387
 
122
388
  - sync: analyse data in current process, it will block the current process.
123
- - async: analyse data in subprocess, it will not the current process.Since the parsing process
389
+ - async: analyse data in subprocess, it will not block the current process. Since the parsing process
124
390
  will take up extra CPU resources, please enable this mode according to the actual resource situation.
125
391
 
392
+ Examples:
393
+ >>> from mindspore.train import Callback
394
+ >>> from mindspore import Profiler
395
+ >>> class StopAtStep(Callback):
396
+ ... def __init__(self, start_step=1, stop_step=5):
397
+ ... super(StopAtStep, self).__init__()
398
+ ... self.start_step = start_step
399
+ ... self.stop_step = stop_step
400
+ ... self.profiler = Profiler(start_profile=False)
401
+ ...
402
+ ... def step_begin(self, run_context):
403
+ ... cb_params = run_context.original_args()
404
+ ... step_num = cb_params.cur_step_num
405
+ ... if step_num == self.start_step:
406
+ ... self.profiler.start()
407
+ ...
408
+ ... def step_end(self, run_context):
409
+ ... cb_params = run_context.original_args()
410
+ ... step_num = cb_params.cur_step_num
411
+ ... if step_num == self.stop_step:
412
+ ... self.profiler.stop()
413
+ ...
414
+ ... def end(self, run_context):
415
+ ... self.profiler.analyse(step_list=[2,3,4], mode="sync")
416
+ """
417
+ if self._has_started:
418
+ ProfilerInterface.stop()
419
+ self._has_started = False
420
+
421
+ if self.schedule_arg:
422
+ logger.warning("The profiler has schedule. Please use 'on_trace_ready' to analyse data.")
423
+ return
424
+
425
+ if offline_path:
426
+ logger.warning("The parameter 'offline_path' for Profiler.analyse() is deprecated, "
427
+ "please use Profiler.offline_analyse() instead.")
428
+
429
+ self._prof_context.pretty = pretty
430
+ self._prof_context.step_list = step_list
431
+ self._prof_context.mode = mode
432
+
433
+ ProfilerInterface.finalize()
434
+ ProfilerInterface.analyse()
435
+ ProfilerInterface.clear()
436
+
437
+ @classmethod
438
+ def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True) -> None:
439
+ """
440
+ Analyze training performance data offline, which is invoked after performance data collection is completed.
441
+
442
+ Args:
443
+ path (str): The profiling data path which needs to be analyzed offline.
444
+ There needs to be a profiler directory in this path.
445
+ pretty (bool, optional): Whether to pretty json files. Default: ``False``.
446
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
447
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
448
+ data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
449
+
450
+ Examples:
451
+ >>> from mindspore import Profiler
452
+ >>> Profiler.offline_analyse("./profiling_path")
453
+ """
454
+ real_path = PathManager.get_real_path(path)
455
+ PathManager.check_input_directory_path(real_path)
456
+ ascend_ms_path_list = PathManager.get_ascend_ms_path_list(real_path)
457
+
458
+ if not ascend_ms_path_list:
459
+ msg = (f"Invalid path: {real_path}. Expected a *_ascend_ms_* directory "
460
+ "or a parent directory of multiple *_ascend_ms_*")
461
+ logger.error(msg)
462
+ return
463
+
464
+ worker_number = min(os.cpu_count() // 2, len(ascend_ms_path_list))
465
+ with ProcessPoolExecutor(max_workers=worker_number) as executor:
466
+ futures = [
467
+ executor.submit(
468
+ NPUProfilerAnalysis.offline_analyse,
469
+ ascend_ms_path,
470
+ pretty,
471
+ step_list,
472
+ data_simplification
473
+ ) for ascend_ms_path in ascend_ms_path_list
474
+ ]
475
+ # 等待所有任务完成
476
+ for future in as_completed(futures):
477
+ try:
478
+ future.result()
479
+ except Exception as e: # pylint: disable=W0703
480
+ logger.error("offline analysis failed: %s", str(e))
481
+
482
+ def step(self) -> None:
483
+ """
484
+ Used for Ascend, distinguish step collection and parsing performance data through schedule and on_trace_ready.
485
+
486
+ Raises:
487
+ RuntimeError: If the `start_profile` parameter is not set or the Profiler is not started.
488
+ RuntimeError: If the `schedule` parameter is not set.
489
+
490
+ Examples:
491
+ >>> import numpy as np
492
+ >>> import mindspore as ms
493
+ >>> import mindspore.dataset as ds
494
+ >>> from mindspore import context, nn, Profiler
495
+ >>> from mindspore.profiler import schedule, tensorboard_trace_handler, ProfilerLevel, AicoreMetrics,
496
+ >>> ExportType, ProfilerActivity
497
+ >>>
498
+ >>> class Net(nn.Cell):
499
+ ... def __init__(self):
500
+ ... super(Net, self).__init__()
501
+ ... self.fc = nn.Dense(2, 2)
502
+ ...
503
+ ... def construct(self, x):
504
+ ... return self.fc(x)
505
+ >>>
506
+ >>> def generator_net():
507
+ ... for _ in range(2):
508
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
509
+ >>>
510
+ >>> def train(test_net):
511
+ ... optimizer = nn.Momentum(test_net.trainable_params(), 1, 0.9)
512
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
513
+ ... data = ds.GeneratorDataset(generator_net(), ["data", "label"])
514
+ ... model = ms.train.Model(test_net, loss, optimizer)
515
+ ... model.train(1, data)
516
+ >>>
517
+ >>> if __name__ == '__main__':
518
+ ... context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend")
519
+ ...
520
+ ... net = Net()
521
+ ... STEP_NUM = 15
522
+ ...
523
+ ... with Profiler(schedule=schedule(wait=1, warmup=1, active=2, repeat=1, skip_first=2),
524
+ ... on_trace_ready=tensorboard_trace_handler) as prof:
525
+ ... for _ in range(STEP_NUM):
526
+ ... train(net)
527
+ ... prof.step()
528
+ """
529
+ if self.schedule_arg is None:
530
+ logger.error("With no schedule in the Profiler, step takes no effect!")
531
+ return
532
+ if not self._has_started:
533
+ logger.error("Profiler is stopped, step takes no effect!")
534
+ return
535
+ if self._step_rec_fn:
536
+ self._step_rec_fn.stop()
537
+ prev_action = self.current_action
538
+ self.step_num += 1
539
+ self.current_action = self.schedule(self.step_num)
540
+ self.action_controller.transit_action(prev_action, self.current_action)
541
+ self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
542
+ self._step_rec_fn.start()
543
+ self._schedule_no_use_step = False
544
+
545
+ def add_metadata(self, key: str, value: str):
546
+ """
547
+ Report custom metadata key-value pair data.
548
+
549
+ Args:
550
+ key (str): The key to the metadata.
551
+ value (str): The value to the metadata.
552
+
553
+ Examples:
554
+ >>> from mindspore import Profiler
555
+ >>> # Profiler init.
556
+ >>> profiler = Profiler()
557
+ >>> # Call Profiler add_metadata
558
+ >>> profiler.add_metadata("test_key", "test_value")
559
+ >>> # Profiler end
560
+ >>> profiler.stop()
561
+ """
562
+ if not isinstance(key, str) or not isinstance(value, str):
563
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
564
+ return
565
+
566
+ add_size = getsizeof(key) + getsizeof(value)
567
+ if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
568
+ if key in self._metadata:
569
+ logger.warning(f"{key} is already saved as metadata, override it.")
570
+ self._metadata[key] = value
571
+ ProfilerMetaData.set_metadata(self._metadata)
572
+ else:
573
+ logger.warning("Too many metadata added. Skip this metadata")
574
+
575
+ def add_metadata_json(self, key: str, value: str):
576
+ """
577
+ Report custom metadata key-value pair data with the value as a JSON string data.
578
+
579
+ Args:
580
+ key (str): The key to the metadata.
581
+ value (str): The json str format value to the metadata.
582
+
583
+ Examples:
584
+ >>> import json
585
+ >>> from mindspore import Profiler
586
+ >>> # Profiler init.
587
+ >>> profiler = Profiler()
588
+ >>> # Call Profiler add_metadata_json
589
+ >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
590
+ >>> # Profiler end, metadata will be saved in profiler_metadata.json
591
+ >>> profiler.stop()
126
592
  """
127
- self._cpu_profiler.stop(offline_path, pretty, step_list)
128
- self._device_profiler.stop(offline_path, pretty, step_list, mode)
593
+ if not isinstance(key, str) or not isinstance(value, str):
594
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
595
+ return
129
596
 
130
- def op_analyse(self, op_name, device_id=None) -> None:
597
+ add_size = getsizeof(key) + getsizeof(value)
598
+ if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
599
+ try:
600
+ if key in self._metadata:
601
+ logger.warning(f"{key} is already saved as metadata, override it.")
602
+ self._metadata[key] = json.loads(value)
603
+ ProfilerMetaData.set_metadata(self._metadata)
604
+ except ValueError:
605
+ logger.warning("The metadata value must be json format string. Skip this metadata")
606
+ else:
607
+ logger.warning("Too many metadata added. Skip this metadata")
608
+
609
+ def op_analyse(self, op_name, device_id=None):
131
610
  """
132
611
  Profiler users can use this interface to obtain operator performance data.
133
612
 
@@ -135,19 +614,523 @@ class NewProfiler:
135
614
  op_name (str or list): The primitive operator name to query.
136
615
  device_id (int, optional): ID of the target device. This parameter is optional during network training or
137
616
  inference, and users can use device_id parameter to specify which card operator performance data to
138
- parse. If this interface is used for offline data parsing, Default: ``0`` .
617
+ parse. If this interface is used for offline data parsing, the default value is ``None`` .
618
+
619
+ Raises:
620
+ TypeError: If the `op_name` parameter type is incorrect.
621
+ TypeError: If the `device_id` parameter type is incorrect.
622
+ RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
623
+
624
+ Supported Platforms:
625
+ ``GPU`` ``CPU``
626
+
627
+ Examples:
628
+ >>> from mindspore import Profiler
629
+ >>> from mindspore import nn
630
+ >>> from mindspore import Model
631
+ >>> # Profiler init.
632
+ >>> profiler = Profiler()
633
+ >>> # Train Model or eval Model, taking LeNet5 as an example.
634
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
635
+ >>> net = LeNet5()
636
+ >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
637
+ >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
638
+ >>> # Create the dataset taking MNIST as an example.
639
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
640
+ >>> dataloader = create_dataset()
641
+ >>> model = Model(net, loss, optimizer)
642
+ >>> model.train(5, dataloader, dataset_sink_mode=False)
643
+ >>>
644
+ >>> # Profiler end
645
+ >>> profiler.analyse()
646
+ >>>
647
+ >>> profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
139
648
  """
649
+ if self._prof_context.device_target == DeviceTarget.NPU.value:
650
+ raise RuntimeError("The Interface 'Profiler.op_analyse()' is not supported on Ascend currently.")
140
651
 
141
- @classmethod
142
- def offline_analyse(cls, path: str, pretty=False, step_list=None) -> None:
652
+ if device_id and not isinstance(device_id, int):
653
+ raise TypeError(f"For 'Profiler.op_analyse()', the parameter device_id must be int, "
654
+ f"but got type {type(device_id)}")
655
+
656
+ if not isinstance(op_name, str) and not isinstance(op_name, list):
657
+ raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name must be str or list, "
658
+ f"but got type {type(op_name)}")
659
+ if not op_name:
660
+ raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name cannot be "", '' or [].")
661
+
662
+ from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser
663
+ dev_id = self._prof_context.device_id if device_id is None else device_id
664
+ parser = GpuFrameWorkParser(self._prof_context.framework_path, dev_id, op_name)
665
+ op_info = parser.parse()
666
+ return op_info
667
+
668
+ def __enter__(self) -> 'Profiler':
669
+ if not self._has_started:
670
+ self.start()
671
+ return self
672
+
673
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
674
+ if self._has_started:
675
+ self.stop()
676
+
677
+ def __del__(self):
678
+ if self._has_started:
679
+ self.stop()
680
+ logger.warning("Profiler is stopped at the end of the program.")
681
+
682
+
683
+ class Profile:
684
+ r"""
685
+ This class to enable the profiling of MindSpore neural networks.
686
+ MindSpore users can import the mindspore.profiler.profile, initialize the profile object to start profiling,
687
+ Use profile.start() to start the analysis, and use profile.stop() to stop collecting and analyzing the results.
688
+ Users can visualize the results using the `MindStudio Insight
689
+ <https://www.hiascend.com/developer/download/community/result?module=pt+sto+cann>`_ tool.
690
+ Now, profile supports AICORE operator, AICPU operator, HostCPU operator, memory,
691
+ correspondence, cluster, etc data analysis.
692
+
693
+ Args:
694
+ start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
695
+ data collection based on conditions. Default: ``True`` .
696
+ activities (list, optional): The activities to collect.
697
+ Default: ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
698
+
699
+ - ProfilerActivity.CPU: Collect MindSpore framework data.
700
+ - ProfilerActivity.NPU: Collect CANN software stack and NPU data.
701
+ - ProfilerActivity.GPU: Collect GPU data.
702
+ schedule (schedule, optional): Sets the action strategy for the capture, defined by the schedule class,
703
+ to be used with the step interface. Default: ``None``. Performance data of all steps is collected.
704
+ For details, see :class:`mindspore.profiler.schedule` .
705
+ on_trace_ready (Callable, optional): Sets the callback function to be executed when the performance data
706
+ is collected. Default: ``None``. It indicates that only performance data is collected, but not resolved.
707
+ For details, see :func:`mindspore.profiler.tensorboard_trace_handler` .
708
+ profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
709
+ When using this parameter, `activities` must set to ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
710
+ Collecting operator memory data when the graph compilation level is O2 requires collecting from the
711
+ first step. Default: ``False`` . The operator name currently collected by this parameter is incomplete.
712
+ This issue will be resolved in later versions. It is recommended to use the environment variable
713
+ ``MS_ALLOC_CONF`` instead.
714
+ with_stack (bool, optional): (Ascend only) Whether to collect frame host call stack data
715
+ on the Python side. This
716
+ data is presented in the form of a flame graph in the timeline. When using this parameter, `activities` must
717
+ include ``ProfilerActivity.CPU``. Default value: ``False`` .
718
+ hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
719
+ collect when True. Default: ``False`` .
720
+ pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
721
+ Default: ``False`` .
722
+ data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
723
+ Default value: ``False`` .
724
+ parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
725
+ Default value: ``False`` .
726
+ sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
727
+ Default: ``True`` .
728
+
729
+ - True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
730
+ Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
731
+ The duration of the operator is the difference between the two timestamps.
732
+ - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
733
+ This method can reduce the impact of adding profiler on overall training time.
734
+ experimental_config (_ExperimentalConfig, optional): expandable parameters can be configured in this
735
+ configuration item. For details, see :class:`mindspore.profiler._ExperimentalConfig` .
736
+ Raises:
737
+ RuntimeError: When the version of CANN does not match the version of MindSpore,
738
+ MindSpore cannot parse the generated ascend_job_id directory structure.
739
+
740
+ Supported Platforms:
741
+ ``Ascend`` ``GPU``
742
+
743
+ Examples:
744
+ >>> import numpy as np
745
+ >>> import mindspore
746
+ >>> from mindspore import nn, context
747
+ >>> import mindspore.dataset as ds
748
+ >>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics, ExportType
749
+ >>>
750
+ >>> class Net(nn.Cell):
751
+ ... def __init__(self):
752
+ ... super(Net, self).__init__()
753
+ ... self.fc = nn.Dense(2,2)
754
+ ... def construct(self, x):
755
+ ... return self.fc(x)
756
+ >>>
757
+ >>> def generator():
758
+ ... for i in range(2):
759
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
760
+ >>>
761
+ >>> def train(net):
762
+ ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
763
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
764
+ ... data = ds.GeneratorDataset(generator, ["data", "label"])
765
+ ... model = mindspore.train.Model(net, loss, optimizer)
766
+ ... model.train(1, data)
767
+ >>>
768
+ >>> if __name__ == '__main__':
769
+ ... # If the device_target is GPU, set the device_target to "GPU"
770
+ ... context.set_context(mode=mindspore.GRAPH_MODE)
771
+ ... mindspore.set_device("Ascend")
772
+ ...
773
+ ... # Init Profiler
774
+ ... experimental_config = mindspore.profiler._ExperimentalConfig(
775
+ ... profiler_level=ProfilerLevel.Level0,
776
+ ... aic_metrics=AicoreMetrics.AiCoreNone,
777
+ ... l2_cache=False,
778
+ ... mstx=False,
779
+ ... data_simplification=False,
780
+ ... export_type=[ExportType.Text])
781
+ ... steps = 10
782
+ ... net = Net()
783
+ ... # Note that the Profiler should be initialized before model.train
784
+ ... with mindspore.profiler.profile(activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
785
+ ... schedule=mindspore.profiler.schedule(wait=1, warmup=1, active=2,
786
+ ... repeat=1, skip_first=2),
787
+ ... on_trace_ready=mindspore.profiler.
788
+ ... tensorboard_trace_handler("./data"),
789
+ ... profile_memory=False,
790
+ ... experimental_config=experimental_config) as prof:
791
+ ...
792
+ ... # Train Model
793
+ ... for step in range(steps):
794
+ ... train(net)
795
+ ... prof.step()
796
+ """
797
+
798
+ def __init__(
799
+ self,
800
+ activities: list = None,
801
+ with_stack: bool = False,
802
+ profile_memory: bool = False,
803
+ data_process: bool = False,
804
+ parallel_strategy: bool = False,
805
+ start_profile: bool = True,
806
+ hbm_ddr: bool = False,
807
+ pcie: bool = False,
808
+ sync_enable: bool = True,
809
+ schedule: Schedule = None,
810
+ on_trace_ready: Optional[Callable[..., Any]] = None,
811
+ experimental_config: Optional[_ExperimentalConfig] = None,
812
+ ):
813
+ self._activities = activities
814
+ self._with_stack = with_stack
815
+ self._profile_memory = profile_memory
816
+ self._data_process = data_process
817
+ self._parallel_strategy = parallel_strategy
818
+ self._start_profile = start_profile
819
+ self._hbm_ddr = hbm_ddr
820
+ self._pcie = pcie
821
+ self._sync_enable = sync_enable
822
+ self._schedule = schedule
823
+ self._on_trace_ready = on_trace_ready
824
+ self._experimental_config = experimental_config or _ExperimentalConfig()
825
+ self._profiler = Profiler(
826
+ profiler_level=self._experimental_config.profiler_level,
827
+ activities=self._activities,
828
+ aic_metrics=self._experimental_config.aic_metrics,
829
+ with_stack=self._with_stack,
830
+ profile_memory=self._profile_memory,
831
+ data_process=self._data_process,
832
+ parallel_strategy=self._parallel_strategy,
833
+ start_profile=self._start_profile,
834
+ l2_cache=self._experimental_config.l2_cache,
835
+ hbm_ddr=self._hbm_ddr,
836
+ pcie=self._pcie,
837
+ sync_enable=self._sync_enable,
838
+ data_simplification=self._experimental_config.data_simplification,
839
+ mstx=self._experimental_config.mstx,
840
+ export_type=self._experimental_config.export_type,
841
+ schedule=self._schedule,
842
+ on_trace_ready=self._on_trace_ready,
843
+ )
844
+
845
+ def __enter__(self) -> 'Profile':
846
+ self._profiler.__enter__()
847
+ return self
848
+
849
+ def __exit__(self, exe_type, exe_val, exc_tb):
850
+ self._profiler.__exit__(exe_type, exe_val, exc_tb)
851
+
852
+ def __del__(self):
853
+ self._profiler.__del__()
854
+
855
+ def start(self) -> None:
143
856
  """
144
- Analyze training performance data offline, which is invoked after performance data collection is completed.
857
+ Turn on profile data collection. profile can be turned on by condition.
858
+
859
+ Raises:
860
+ RuntimeError: If the profile has already started.
861
+ RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
862
+
863
+ Examples:
864
+ >>> import numpy as np
865
+ >>> import mindspore
866
+ >>> from mindspore import nn, context
867
+ >>> import mindspore.dataset as ds
868
+ >>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics, ExportType
869
+ >>>
870
+ >>> class Net(nn.Cell):
871
+ ... def __init__(self):
872
+ ... super(Net, self).__init__()
873
+ ... self.fc = nn.Dense(2,2)
874
+ ... def construct(self, x):
875
+ ... return self.fc(x)
876
+ >>>
877
+ >>> def generator():
878
+ ... for i in range(2):
879
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
880
+ >>>
881
+ >>> def train(net):
882
+ ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
883
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
884
+ ... data = ds.GeneratorDataset(generator, ["data", "label"])
885
+ ... model = mindspore.train.Model(net, loss, optimizer)
886
+ ... model.train(1, data)
887
+ >>>
888
+ >>> if __name__ == '__main__':
889
+ ... # If the device_target is GPU, set the device_target to "GPU"
890
+ ... context.set_context(mode=mindspore.GRAPH_MODE)
891
+ ... mindspore.set_device("Ascend")
892
+ ...
893
+ ... # Init Profiler
894
+ ... experimental_config = mindspore.profiler._ExperimentalConfig(
895
+ ... profiler_level=ProfilerLevel.Level0,
896
+ ... aic_metrics=AicoreMetrics.AiCoreNone,
897
+ ... l2_cache=False,
898
+ ... mstx=False,
899
+ ... data_simplification=False,
900
+ ... export_type=[ExportType.Text])
901
+ ... steps = 10
902
+ ... net = Net()
903
+ ... # Note that the Profiler should be initialized before model.train
904
+ ... prof = mindspore.profiler.profile(activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
905
+ ... schedule=mindspore.profiler.schedule(wait=1, warmup=1, active=2,
906
+ ... repeat=1, skip_first=2),
907
+ ... on_trace_ready=mindspore.profiler.
908
+ ... tensorboard_trace_handler("./data"),
909
+ ... profile_memory=False,
910
+ ... experimental_config=experimental_config)
911
+ ... prof.start()
912
+ ... # Train Model
913
+ ... for step in range(steps):
914
+ ... train(net)
915
+ ... prof.step()
916
+ ... prof.stop()
917
+ """
918
+ self._profiler.start()
919
+
920
+ def stop(self) -> None:
921
+ """
922
+ Turn off profile data collection. profile can be turned off by condition.
923
+
924
+ Raises:
925
+ RuntimeError: If the profile has not started, this function is disabled.
926
+
927
+ Examples:
928
+ >>> import numpy as np
929
+ >>> import mindspore
930
+ >>> from mindspore import nn, context
931
+ >>> import mindspore.dataset as ds
932
+ >>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics, ExportType
933
+ >>>
934
+ >>> class Net(nn.Cell):
935
+ ... def __init__(self):
936
+ ... super(Net, self).__init__()
937
+ ... self.fc = nn.Dense(2,2)
938
+ ... def construct(self, x):
939
+ ... return self.fc(x)
940
+ >>>
941
+ >>> def generator():
942
+ ... for i in range(2):
943
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
944
+ >>>
945
+ >>> def train(net):
946
+ ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
947
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
948
+ ... data = ds.GeneratorDataset(generator, ["data", "label"])
949
+ ... model = mindspore.train.Model(net, loss, optimizer)
950
+ ... model.train(1, data)
951
+ >>>
952
+ >>> if __name__ == '__main__':
953
+ ... # If the device_target is GPU, set the device_target to "GPU"
954
+ ... context.set_context(mode=mindspore.GRAPH_MODE)
955
+ ... mindspore.set_device("Ascend")
956
+ ...
957
+ ... # Init Profiler
958
+ ... experimental_config = mindspore.profiler._ExperimentalConfig(
959
+ ... profiler_level=ProfilerLevel.Level0,
960
+ ... aic_metrics=AicoreMetrics.AiCoreNone,
961
+ ... l2_cache=False,
962
+ ... mstx=False,
963
+ ... data_simplification=False,
964
+ ... export_type=[ExportType.Text])
965
+ ... steps = 10
966
+ ... net = Net()
967
+ ... # Note that the Profiler should be initialized before model.train
968
+ ... prof = mindspore.profiler.profile(activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
969
+ ... schedule=mindspore.profiler.schedule(wait=1, warmup=1, active=2,
970
+ ... repeat=1, skip_first=2),
971
+ ... on_trace_ready=mindspore.profiler.
972
+ ... tensorboard_trace_handler("./data"),
973
+ ... profile_memory=False,
974
+ ... experimental_config=experimental_config)
975
+ ... prof.start()
976
+ ... # Train Model
977
+ ... for step in range(steps):
978
+ ... train(net)
979
+ ... prof.step()
980
+ ... prof.stop()
981
+ """
982
+ self._profiler.stop()
983
+
984
+ def step(self) -> None:
985
+ """
986
+ Used for Ascend, distinguish step collection and parsing performance data through schedule and on_trace_ready.
987
+
988
+ Raises:
989
+ RuntimeError: If the `start_profile` parameter is not set or the Profiler is not started.
990
+ RuntimeError: If the `schedule` parameter is not set.
991
+
992
+ Examples:
993
+ >>> import numpy as np
994
+ >>> import mindspore
995
+ >>> from mindspore import nn, context
996
+ >>> import mindspore.dataset as ds
997
+ >>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics, ExportType
998
+ >>>
999
+ >>> class Net(nn.Cell):
1000
+ ... def __init__(self):
1001
+ ... super(Net, self).__init__()
1002
+ ... self.fc = nn.Dense(2,2)
1003
+ ... def construct(self, x):
1004
+ ... return self.fc(x)
1005
+ >>>
1006
+ >>> def generator():
1007
+ ... for i in range(2):
1008
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
1009
+ >>>
1010
+ >>> def train(net):
1011
+ ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
1012
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
1013
+ ... data = ds.GeneratorDataset(generator, ["data", "label"])
1014
+ ... model = mindspore.train.Model(net, loss, optimizer)
1015
+ ... model.train(1, data)
1016
+ >>>
1017
+ >>> if __name__ == '__main__':
1018
+ ... # If the device_target is GPU, set the device_target to "GPU"
1019
+ ... context.set_context(mode=mindspore.GRAPH_MODE)
1020
+ ... mindspore.set_device("Ascend")
1021
+ ...
1022
+ ... # Init Profiler
1023
+ ... experimental_config = mindspore.profiler._ExperimentalConfig(
1024
+ ... profiler_level=ProfilerLevel.Level0,
1025
+ ... aic_metrics=AicoreMetrics.AiCoreNone,
1026
+ ... l2_cache=False,
1027
+ ... mstx=False,
1028
+ ... data_simplification=False,
1029
+ ... export_type=[ExportType.Text])
1030
+ ... steps = 10
1031
+ ... net = Net()
1032
+ ... # Note that the Profiler should be initialized before model.train
1033
+ ... with mindspore.profiler.profile(activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
1034
+ ... schedule=mindspore.profiler.schedule(wait=1, warmup=1, active=2,
1035
+ ... repeat=1, skip_first=2),
1036
+ ... on_trace_ready=mindspore.profiler.tensorboard_trace_handler("./data"),
1037
+ ... profile_memory=False,
1038
+ ... experimental_config=experimental_config) as prof:
1039
+ ...
1040
+ ... # Train Model
1041
+ ... for step in range(steps):
1042
+ ... train(net)
1043
+ ... prof.step()
1044
+ """
1045
+ self._profiler.step()
1046
+
1047
+ def add_metadata(self, key: str, value: str):
1048
+ """
1049
+ Report custom metadata key-value pair data.
145
1050
 
146
1051
  Args:
147
- path (str): The profiling data path which need to be analyzed offline.
148
- There needs to be a profiler directory in this path.
149
- pretty (bool, optional): Whether to pretty json files. Default: ``False``.
150
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
151
- By default, all steps will be analyzed.
1052
+ key (str): The key to the metadata.
1053
+ value (str): The value to the metadata.
1054
+
1055
+ Examples:
1056
+ >>> import mindspore
1057
+ >>> # Profiler init.
1058
+ >>> with mindspore.profiler.profile() as prof:
1059
+ ... # Call Profiler add_metadata
1060
+ ... prof.add_metadata("test_key", "test_value")
1061
+ """
1062
+
1063
+ self._profiler.add_metadata(key, value)
1064
+
1065
+ def add_metadata_json(self, key: str, value: str):
1066
+ """
1067
+ Report custom metadata key-value pair data with the value as a JSON string data.
1068
+
1069
+ Args:
1070
+ key (str): The key to the metadata.
1071
+ value (str): The json str format value to the metadata.
1072
+
1073
+ Examples:
1074
+ >>> import json
1075
+ >>> import mindspore
1076
+ >>> # Profiler init.
1077
+ >>> with mindspore.profiler.profile() as prof:
1078
+ ... # Call Profiler add_metadata_json
1079
+ ... prof.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
152
1080
  """
1081
+ self._profiler.add_metadata_json(key, value)
1082
+
1083
+
1084
+ def analyse(profiler_path: str, max_process_number: int = os.cpu_count() // 2, pretty=False, step_list=None,
1085
+ data_simplification=True):
1086
+ """
1087
+ Analyze training performance data offline, which is invoked after performance data collection is completed.
1088
+
1089
+ Args:
1090
+ profiler_path (str): The path to profiling data that needs to be analyzed offline,
1091
+ specified to the upper directory ``*_ascend_ms``.
1092
+ max_process_number (int, optional): Maximum number of processes.
1093
+ The default value is ``os.cpu_count() // 2``.
1094
+ pretty (bool, optional): Format the JSON file. Default: ``False``,
1095
+ indicating that the formatting is not performed.
1096
+ step_list (list, optional): Only the performance data of the specified step is parsed. The specified step must
1097
+ be a consecutive integer. It supports CallBack collection only in GRAPH mode, and can only slice the CANN
1098
+ layer and the following information. Default value: ``None``, that is, full resolution.
1099
+ data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``,
1100
+ indicating the data simplification is enabled.
1101
+
1102
+ Examples:
1103
+ >>> from mindspore.profiler.profiler import analyse
1104
+ >>> analyse(profiler_path="./profiling_path")
1105
+ """
1106
+ if not isinstance(max_process_number, int) or isinstance(max_process_number, bool) or max_process_number <= 0:
1107
+ logger.warning(f"Parameter 'max_process_number' should be of type int, but got "
1108
+ f"{type(max_process_number).__name__}. reset to int {os.cpu_count() // 2}.")
1109
+ max_process_number = os.cpu_count() // 2
1110
+
1111
+ real_path = PathManager.get_real_path(profiler_path)
1112
+ PathManager.check_input_directory_path(real_path)
1113
+ ascend_ms_path_list = PathManager.get_ascend_ms_path_list(real_path)
1114
+
1115
+ if not ascend_ms_path_list:
1116
+ msg = (f"Invalid path: {real_path}. Expected a *_ascend_ms_* directory "
1117
+ "or a parent directory of multiple *_ascend_ms_*")
1118
+ logger.error(msg)
153
1119
  return
1120
+
1121
+ with ProcessPoolExecutor(max_workers=max_process_number) as executor:
1122
+ futures = [
1123
+ executor.submit(
1124
+ NPUProfilerAnalysis.offline_analyse,
1125
+ ascend_ms_path,
1126
+ pretty,
1127
+ step_list,
1128
+ data_simplification
1129
+ ) for ascend_ms_path in ascend_ms_path_list
1130
+ ]
1131
+ # Wait for all tasks to complete
1132
+ for future in as_completed(futures):
1133
+ try:
1134
+ future.result()
1135
+ except Exception as e: # pylint: disable=W0703
1136
+ logger.error("offline analysis failed: %s", str(e))