mindspore 2.4.1__cp39-cp39-win_amd64.whl → 2.5.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (395) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +8 -3
  5. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +0 -5
  9. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  10. mindspore/_extends/parse/compile_config.py +64 -0
  11. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  12. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
  13. mindspore/_extends/parse/parser.py +23 -5
  14. mindspore/_extends/parse/standard_method.py +123 -27
  15. mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
  16. mindspore/amp.py +7 -1
  17. mindspore/atlprov.dll +0 -0
  18. mindspore/avcodec-59.dll +0 -0
  19. mindspore/avdevice-59.dll +0 -0
  20. mindspore/avfilter-8.dll +0 -0
  21. mindspore/avformat-59.dll +0 -0
  22. mindspore/avutil-57.dll +0 -0
  23. mindspore/boost/boost_cell_wrapper.py +136 -41
  24. mindspore/c1.dll +0 -0
  25. mindspore/c1xx.dll +0 -0
  26. mindspore/c2.dll +0 -0
  27. mindspore/common/__init__.py +3 -1
  28. mindspore/common/_register_for_tensor.py +0 -1
  29. mindspore/common/_stub_tensor.py +25 -4
  30. mindspore/common/_tensor_cpp_method.py +17 -0
  31. mindspore/common/_tensor_docs.py +6132 -0
  32. mindspore/common/api.py +99 -25
  33. mindspore/common/dtype.py +34 -34
  34. mindspore/common/dump.py +2 -1
  35. mindspore/common/file_system.py +8 -1
  36. mindspore/common/generator.py +2 -0
  37. mindspore/common/hook_handle.py +3 -1
  38. mindspore/common/initializer.py +3 -4
  39. mindspore/common/lazy_inline.py +8 -2
  40. mindspore/common/mindir_util.py +10 -2
  41. mindspore/common/parameter.py +30 -27
  42. mindspore/common/tensor.py +713 -1337
  43. mindspore/communication/__init__.py +1 -1
  44. mindspore/communication/_comm_helper.py +10 -0
  45. mindspore/communication/comm_func.py +215 -173
  46. mindspore/communication/management.py +23 -20
  47. mindspore/context.py +292 -193
  48. mindspore/dataset/__init__.py +23 -19
  49. mindspore/dataset/callback/ds_callback.py +2 -1
  50. mindspore/dataset/core/config.py +84 -3
  51. mindspore/dataset/engine/cache_admin.py +3 -3
  52. mindspore/dataset/engine/cache_client.py +5 -4
  53. mindspore/dataset/engine/datasets.py +192 -149
  54. mindspore/dataset/engine/datasets_audio.py +14 -0
  55. mindspore/dataset/engine/datasets_standard_format.py +28 -11
  56. mindspore/dataset/engine/datasets_text.py +38 -1
  57. mindspore/dataset/engine/datasets_user_defined.py +125 -65
  58. mindspore/dataset/engine/datasets_vision.py +81 -8
  59. mindspore/dataset/engine/iterators.py +281 -63
  60. mindspore/dataset/engine/obs/util.py +8 -0
  61. mindspore/dataset/engine/queue.py +40 -0
  62. mindspore/dataset/engine/samplers.py +26 -2
  63. mindspore/dataset/engine/serializer_deserializer.py +1 -1
  64. mindspore/dataset/engine/validators.py +43 -11
  65. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  66. mindspore/dataset/transforms/transforms.py +29 -12
  67. mindspore/dataset/vision/validators.py +1 -2
  68. mindspore/device_context/__init__.py +21 -0
  69. mindspore/device_context/ascend/__init__.py +25 -0
  70. mindspore/device_context/ascend/device.py +72 -0
  71. mindspore/device_context/ascend/op_debug.py +94 -0
  72. mindspore/device_context/ascend/op_precision.py +193 -0
  73. mindspore/device_context/ascend/op_tuning.py +127 -0
  74. mindspore/device_context/cpu/__init__.py +25 -0
  75. mindspore/device_context/cpu/device.py +62 -0
  76. mindspore/device_context/cpu/op_tuning.py +43 -0
  77. mindspore/device_context/gpu/__init__.py +21 -0
  78. mindspore/device_context/gpu/device.py +70 -0
  79. mindspore/device_context/gpu/op_precision.py +67 -0
  80. mindspore/device_context/gpu/op_tuning.py +175 -0
  81. mindspore/device_manager.py +134 -0
  82. mindspore/dnnl.dll +0 -0
  83. mindspore/dpcmi.dll +0 -0
  84. mindspore/experimental/llm_boost/__init__.py +3 -2
  85. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  86. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  87. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  88. mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
  89. mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
  90. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  91. mindspore/experimental/llm_boost/register.py +1 -0
  92. mindspore/experimental/optim/adadelta.py +26 -22
  93. mindspore/experimental/optim/adam.py +3 -0
  94. mindspore/experimental/optim/lr_scheduler.py +33 -24
  95. mindspore/experimental/optim/radam.py +33 -30
  96. mindspore/hal/device.py +28 -0
  97. mindspore/hal/event.py +17 -0
  98. mindspore/hal/memory.py +94 -3
  99. mindspore/hal/stream.py +91 -6
  100. mindspore/include/api/context.h +1 -2
  101. mindspore/include/dataset/constants.h +2 -2
  102. mindspore/jpeg62.dll +0 -0
  103. mindspore/log.py +12 -0
  104. mindspore/mindrecord/__init__.py +1 -1
  105. mindspore/mindrecord/config.py +17 -316
  106. mindspore/mindrecord/filereader.py +1 -9
  107. mindspore/mindrecord/filewriter.py +5 -15
  108. mindspore/mindrecord/mindpage.py +1 -9
  109. mindspore/mindspore_backend.dll +0 -0
  110. mindspore/mindspore_common.dll +0 -0
  111. mindspore/mindspore_core.dll +0 -0
  112. mindspore/mindspore_glog.dll +0 -0
  113. mindspore/mindspore_ops.dll +0 -0
  114. mindspore/mint/__init__.py +824 -218
  115. mindspore/mint/distributed/__init__.py +66 -4
  116. mindspore/mint/distributed/distributed.py +2594 -44
  117. mindspore/mint/linalg/__init__.py +6 -0
  118. mindspore/mint/nn/__init__.py +473 -14
  119. mindspore/mint/nn/functional.py +486 -11
  120. mindspore/mint/nn/layer/__init__.py +17 -4
  121. mindspore/mint/nn/layer/_functions.py +330 -0
  122. mindspore/mint/nn/layer/activation.py +169 -1
  123. mindspore/mint/nn/layer/basic.py +123 -0
  124. mindspore/mint/nn/layer/conv.py +727 -0
  125. mindspore/mint/nn/layer/normalization.py +215 -19
  126. mindspore/mint/nn/layer/padding.py +797 -0
  127. mindspore/mint/nn/layer/pooling.py +170 -0
  128. mindspore/mint/optim/__init__.py +2 -1
  129. mindspore/mint/optim/adam.py +223 -0
  130. mindspore/mint/optim/adamw.py +26 -19
  131. mindspore/mint/special/__init__.py +2 -1
  132. mindspore/msobj140.dll +0 -0
  133. mindspore/mspdb140.dll +0 -0
  134. mindspore/mspdbcore.dll +0 -0
  135. mindspore/mspdbst.dll +0 -0
  136. mindspore/mspft140.dll +0 -0
  137. mindspore/msvcdis140.dll +0 -0
  138. mindspore/msvcp140_1.dll +0 -0
  139. mindspore/msvcp140_2.dll +0 -0
  140. mindspore/msvcp140_atomic_wait.dll +0 -0
  141. mindspore/msvcp140_codecvt_ids.dll +0 -0
  142. mindspore/multiprocessing/__init__.py +5 -0
  143. mindspore/nn/__init__.py +2 -0
  144. mindspore/nn/cell.py +142 -21
  145. mindspore/nn/dynamic_lr.py +2 -1
  146. mindspore/nn/layer/activation.py +6 -6
  147. mindspore/nn/layer/basic.py +35 -25
  148. mindspore/nn/layer/channel_shuffle.py +3 -3
  149. mindspore/nn/layer/conv.py +3 -0
  150. mindspore/nn/layer/embedding.py +3 -3
  151. mindspore/nn/layer/normalization.py +8 -7
  152. mindspore/nn/layer/padding.py +4 -3
  153. mindspore/nn/layer/pooling.py +55 -23
  154. mindspore/nn/layer/rnn_cells.py +1 -1
  155. mindspore/nn/layer/rnns.py +2 -1
  156. mindspore/nn/layer/timedistributed.py +5 -5
  157. mindspore/nn/layer/transformer.py +48 -26
  158. mindspore/nn/learning_rate_schedule.py +5 -3
  159. mindspore/nn/loss/loss.py +31 -36
  160. mindspore/nn/optim/ada_grad.py +1 -0
  161. mindspore/nn/optim/adadelta.py +2 -2
  162. mindspore/nn/optim/adam.py +1 -1
  163. mindspore/nn/optim/lars.py +1 -4
  164. mindspore/nn/optim/optimizer.py +1 -1
  165. mindspore/nn/optim/rprop.py +2 -2
  166. mindspore/nn/optim/thor.py +2 -1
  167. mindspore/nn/utils/__init__.py +22 -0
  168. mindspore/nn/utils/init.py +73 -0
  169. mindspore/nn/wrap/cell_wrapper.py +4 -6
  170. mindspore/nn/wrap/loss_scale.py +3 -4
  171. mindspore/numpy/array_creations.py +60 -62
  172. mindspore/numpy/array_ops.py +148 -143
  173. mindspore/numpy/logic_ops.py +41 -42
  174. mindspore/numpy/math_ops.py +361 -359
  175. mindspore/numpy/utils.py +16 -16
  176. mindspore/numpy/utils_const.py +4 -4
  177. mindspore/opencv_core452.dll +0 -0
  178. mindspore/opencv_imgcodecs452.dll +0 -0
  179. mindspore/opencv_imgproc452.dll +0 -0
  180. mindspore/ops/__init__.py +2 -1
  181. mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
  182. mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
  183. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  184. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  185. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  186. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  187. mindspore/ops/_vmap/vmap_array_ops.py +20 -19
  188. mindspore/ops/_vmap/vmap_base.py +0 -2
  189. mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
  190. mindspore/ops/_vmap/vmap_math_ops.py +11 -9
  191. mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
  192. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
  193. mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
  194. mindspore/ops/auto_generate/gen_extend_func.py +554 -60
  195. mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
  196. mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
  197. mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
  198. mindspore/ops/composite/base.py +1 -1
  199. mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
  200. mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
  201. mindspore/ops/function/__init__.py +12 -0
  202. mindspore/ops/function/array_func.py +561 -159
  203. mindspore/ops/function/clip_func.py +64 -0
  204. mindspore/ops/function/debug_func.py +28 -20
  205. mindspore/ops/function/image_func.py +1 -1
  206. mindspore/ops/function/linalg_func.py +5 -4
  207. mindspore/ops/function/math_func.py +1664 -294
  208. mindspore/ops/function/nn_func.py +988 -317
  209. mindspore/ops/function/parameter_func.py +3 -56
  210. mindspore/ops/function/random_func.py +243 -33
  211. mindspore/ops/function/sparse_unary_func.py +1 -1
  212. mindspore/ops/functional.py +18 -5
  213. mindspore/ops/functional_overload.py +897 -0
  214. mindspore/ops/operations/__init__.py +3 -2
  215. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  216. mindspore/ops/operations/_grad_ops.py +2 -34
  217. mindspore/ops/operations/_infer_ops.py +2 -1
  218. mindspore/ops/operations/_inner_ops.py +38 -8
  219. mindspore/ops/operations/array_ops.py +45 -303
  220. mindspore/ops/operations/comm_ops.py +23 -17
  221. mindspore/ops/operations/custom_ops.py +7 -49
  222. mindspore/ops/operations/debug_ops.py +42 -47
  223. mindspore/ops/operations/inner_ops.py +6 -4
  224. mindspore/ops/operations/linalg_ops.py +3 -2
  225. mindspore/ops/operations/manually_defined/ops_def.py +185 -104
  226. mindspore/ops/operations/math_ops.py +11 -216
  227. mindspore/ops/operations/nn_ops.py +153 -310
  228. mindspore/ops/primitive.py +23 -21
  229. mindspore/ops/tensor_method.py +1669 -0
  230. mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
  231. mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
  232. mindspore/ops_generate/arg_handler.py +0 -61
  233. mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
  234. mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
  235. mindspore/ops_generate/base_generator.py +11 -0
  236. mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
  237. mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
  238. mindspore/ops_generate/functional_overload_py_generator.py +110 -0
  239. mindspore/ops_generate/functions_cc_generator.py +233 -0
  240. mindspore/ops_generate/gen_aclnn_implement.py +110 -114
  241. mindspore/ops_generate/gen_constants.py +157 -3
  242. mindspore/ops_generate/gen_ops.py +245 -990
  243. mindspore/ops_generate/gen_pyboost_func.py +97 -998
  244. mindspore/ops_generate/gen_utils.py +119 -33
  245. mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
  246. mindspore/ops_generate/op_api_proto.py +206 -0
  247. mindspore/ops_generate/op_def_py_generator.py +131 -0
  248. mindspore/ops_generate/op_prim_py_generator.py +480 -0
  249. mindspore/ops_generate/op_proto.py +373 -108
  250. mindspore/ops_generate/op_template_parser.py +436 -0
  251. mindspore/ops_generate/ops_def_cc_generator.py +288 -0
  252. mindspore/ops_generate/ops_def_h_generator.py +74 -0
  253. mindspore/ops_generate/ops_name_h_generator.py +68 -0
  254. mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
  255. mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
  256. mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
  257. mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
  258. mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
  259. mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
  260. mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
  261. mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
  262. mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
  263. mindspore/ops_generate/pyboost_utils.py +92 -33
  264. mindspore/ops_generate/template.py +294 -44
  265. mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
  266. mindspore/parallel/__init__.py +3 -3
  267. mindspore/parallel/_auto_parallel_context.py +44 -34
  268. mindspore/parallel/_cell_wrapper.py +22 -3
  269. mindspore/parallel/_parallel_serialization.py +13 -2
  270. mindspore/parallel/_utils.py +4 -2
  271. mindspore/parallel/algo_parameter_config.py +1 -1
  272. mindspore/parallel/checkpoint_transform.py +44 -0
  273. mindspore/parallel/cluster/process_entity/_api.py +131 -37
  274. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  275. mindspore/parallel/cluster/run.py +20 -3
  276. mindspore/parallel/parameter_broadcast.py +1 -1
  277. mindspore/parallel/shard.py +3 -0
  278. mindspore/parallel/transform_safetensors.py +119 -253
  279. mindspore/pgodb140.dll +0 -0
  280. mindspore/pgort140.dll +0 -0
  281. mindspore/profiler/__init__.py +17 -4
  282. mindspore/profiler/analysis/__init__.py +0 -0
  283. mindspore/profiler/analysis/parser/__init__.py +0 -0
  284. mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
  285. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  286. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  287. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  288. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  289. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  290. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
  291. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  292. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
  293. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  294. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  295. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  296. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  297. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  298. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  299. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  300. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  301. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  302. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  303. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
  304. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  305. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  306. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  307. mindspore/profiler/analysis/task_manager.py +131 -0
  308. mindspore/profiler/analysis/time_converter.py +84 -0
  309. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  310. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
  311. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  312. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
  313. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
  314. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
  315. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
  316. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  317. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  318. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
  319. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  320. mindspore/profiler/analysis/work_flow.py +73 -0
  321. mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
  322. mindspore/profiler/common/command_executor.py +90 -0
  323. mindspore/profiler/common/constant.py +174 -3
  324. mindspore/profiler/common/file_manager.py +208 -0
  325. mindspore/profiler/common/log.py +130 -0
  326. mindspore/profiler/common/msprof_cmd_tool.py +202 -0
  327. mindspore/profiler/common/path_manager.py +371 -0
  328. mindspore/profiler/common/process_bar.py +168 -0
  329. mindspore/profiler/common/process_pool.py +9 -3
  330. mindspore/profiler/common/profiler_context.py +476 -0
  331. mindspore/profiler/common/profiler_info.py +304 -0
  332. mindspore/profiler/common/profiler_output_path.py +284 -0
  333. mindspore/profiler/common/profiler_parameters.py +210 -0
  334. mindspore/profiler/common/profiler_path_manager.py +120 -0
  335. mindspore/profiler/common/record_function.py +76 -0
  336. mindspore/profiler/common/tlv_decoder.py +76 -0
  337. mindspore/profiler/common/util.py +75 -2
  338. mindspore/profiler/dynamic_profiler.py +270 -37
  339. mindspore/profiler/envprofiler.py +138 -0
  340. mindspore/profiler/mstx.py +199 -0
  341. mindspore/profiler/platform/__init__.py +21 -0
  342. mindspore/profiler/platform/base_profiler.py +40 -0
  343. mindspore/profiler/platform/cpu_profiler.py +124 -0
  344. mindspore/profiler/platform/gpu_profiler.py +74 -0
  345. mindspore/profiler/platform/npu_profiler.py +309 -0
  346. mindspore/profiler/profiler.py +580 -93
  347. mindspore/profiler/profiler_action_controller.py +187 -0
  348. mindspore/profiler/profiler_interface.py +114 -0
  349. mindspore/profiler/schedule.py +208 -0
  350. mindspore/rewrite/api/symbol_tree.py +1 -2
  351. mindspore/run_check/_check_version.py +18 -13
  352. mindspore/runtime/__init__.py +37 -0
  353. mindspore/runtime/device.py +27 -0
  354. mindspore/runtime/event.py +209 -0
  355. mindspore/runtime/executor.py +148 -0
  356. mindspore/runtime/memory.py +392 -0
  357. mindspore/runtime/stream.py +460 -0
  358. mindspore/runtime/thread_bind_core.py +401 -0
  359. mindspore/swresample-4.dll +0 -0
  360. mindspore/swscale-6.dll +0 -0
  361. mindspore/tbbmalloc.dll +0 -0
  362. mindspore/tinyxml2.dll +0 -0
  363. mindspore/train/__init__.py +2 -2
  364. mindspore/train/_utils.py +53 -18
  365. mindspore/train/amp.py +8 -4
  366. mindspore/train/callback/_checkpoint.py +32 -18
  367. mindspore/train/callback/_early_stop.py +1 -1
  368. mindspore/train/callback/_flops_collector.py +105 -69
  369. mindspore/train/callback/_history.py +1 -1
  370. mindspore/train/callback/_summary_collector.py +44 -6
  371. mindspore/train/callback/_tft_register.py +37 -15
  372. mindspore/train/dataset_helper.py +11 -11
  373. mindspore/train/metrics/precision.py +4 -5
  374. mindspore/train/mind_ir_pb2.py +167 -46
  375. mindspore/train/model.py +13 -14
  376. mindspore/train/serialization.py +461 -72
  377. mindspore/train/summary/summary_record.py +1 -2
  378. mindspore/train/train_thor/model_thor.py +1 -1
  379. mindspore/turbojpeg.dll +0 -0
  380. mindspore/utils/__init__.py +4 -2
  381. mindspore/utils/dryrun.py +138 -0
  382. mindspore/utils/runtime_execution_order_check.py +550 -0
  383. mindspore/vcmeta.dll +0 -0
  384. mindspore/vcruntime140.dll +0 -0
  385. mindspore/vcruntime140_1.dll +0 -0
  386. mindspore/version.py +1 -1
  387. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
  388. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +391 -265
  389. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
  390. mindspore/common/_tensor_overload.py +0 -139
  391. mindspore/mindspore_np_dtype.dll +0 -0
  392. mindspore/profiler/envprofiling.py +0 -254
  393. mindspore/profiler/profiling.py +0 -1926
  394. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
  395. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
@@ -13,98 +13,319 @@
13
13
  # limitations under the License.
14
14
  # ============================================================================
15
15
  """Profiling api file."""
16
- from mindspore.profiler.common.registry import PROFILERS
17
- from mindspore.profiler.common.constant import DeviceTarget
18
- from mindspore.profiler.common.constant import ProfilerLevel
19
- from mindspore.profiler.platform_profiler.prof_context import ProfContext
16
+ import os
17
+ import json
18
+ from typing import Optional, Dict
19
+ from sys import getsizeof
20
+ from concurrent.futures import ProcessPoolExecutor, as_completed
20
21
 
22
+ from mindspore import log as logger
23
+ from mindspore.profiler.common.constant import ProfilerStepNameConstant, DeviceTarget
24
+ from mindspore.profiler.common.profiler_context import ProfilerContext
25
+ from mindspore.profiler.platform.npu_profiler import NPUProfilerAnalysis
26
+ from mindspore.profiler.profiler_action_controller import ProfilerActionController
27
+ from mindspore.profiler.profiler_interface import ProfilerInterface
28
+ from mindspore.profiler.schedule import _default_schedule_fn, ProfilerAction
29
+ from mindspore.profiler.common.record_function import RecordFunction
30
+ from mindspore.profiler.common.path_manager import PathManager
31
+ from mindspore.profiler.common.file_manager import FileManager
32
+ from mindspore.profiler.common.profiler_path_manager import ProfilerPathManager
21
33
 
22
- class NewProfiler:
34
+
35
+ def tensor_board_trace_handler():
23
36
  """
24
- Refactor profiler
37
+ For each step in dynamic graph mode, call this method for online analyse.
38
+
39
+ Examples:
40
+ >>> import numpy as np
41
+ >>> import mindspore as ms
42
+ >>> import mindspore.dataset as ds
43
+ >>> from mindspore import context, nn, Profiler
44
+ >>> from mindspore.profiler import schedule, tensor_board_trace_handler
45
+ >>>
46
+ >>> class Net(nn.Cell):
47
+ ... def __init__(self):
48
+ ... super(Net, self).__init__()
49
+ ... self.fc = nn.Dense(2, 2)
50
+ ...
51
+ ... def construct(self, x):
52
+ ... return self.fc(x)
53
+ >>>
54
+ >>> def generator_net():
55
+ ... for _ in range(2):
56
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
57
+ >>>
58
+ >>> def train(test_net):
59
+ ... optimizer = nn.Momentum(test_net.trainable_params(), 1, 0.9)
60
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
61
+ ... data = ds.GeneratorDataset(generator_net(), ["data", "label"])
62
+ ... model = ms.train.Model(test_net, loss, optimizer)
63
+ ... model.train(1, data)
64
+ >>>
65
+ >>> if __name__ == '__main__':
66
+ ... context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend")
67
+ ...
68
+ ... net = Net()
69
+ ... STEP_NUM = 15
70
+ ...
71
+ ... with Profiler(schedule=schedule(wait=1, warmup=1, active=2, repeat=1, skip_first=2),
72
+ ... on_trace_ready=tensor_board_trace_handler) as prof:
73
+ ... for i in range(STEP_NUM):
74
+ ... train(net)
75
+ ... prof.step()
25
76
  """
26
77
 
27
- def __init__(
28
- self,
29
- output_path: str = "./data",
30
- profiler_level: ProfilerLevel = None,
31
- op_time: bool = True,
32
- profile_communication: bool = False,
33
- profile_memory: bool = False,
34
- parallel_strategy: bool = False,
35
- start_profile: bool = True,
36
- aicore_metrics: int = 0,
37
- l2_cache: bool = False,
38
- hbm_ddr: bool = False,
39
- pcie: bool = False,
40
- sync_enable: bool = True,
41
- data_process: bool = False,
42
- timeline_limit: int = 500,
43
- profile_framework: str = None,
44
- with_stack: bool = False,
45
- data_simplification: bool = True,
46
- **kwargs) -> None:
47
-
48
- self._prof_context = ProfContext(
49
- output_path=output_path,
50
- profiler_level=profiler_level,
51
- op_time=op_time,
52
- profile_communication=profile_communication,
53
- profile_memory=profile_memory,
54
- parallel_strategy=parallel_strategy,
55
- start_profile=start_profile,
56
- aicore_metrics=aicore_metrics,
57
- l2_cache=l2_cache,
58
- hbm_ddr=hbm_ddr,
59
- pcie=pcie,
60
- sync_enable=sync_enable,
61
- data_process=data_process,
62
- timeline_limit=timeline_limit,
63
- profile_framework=profile_framework,
64
- with_stack=with_stack,
65
- data_simplification=data_simplification
66
- )
78
+ try:
79
+ NPUProfilerAnalysis.online_analyse()
80
+ if ProfilerContext().data_simplification:
81
+ ProfilerPathManager().simplify_data()
82
+ except Exception as e: # pylint: disable=W0703
83
+ logger.error("Call tensorboard_trace_handler failed. Exception: %s", str(e))
67
84
 
68
- self._has_started = False
69
85
 
70
- self._cpu_profiler = PROFILERS.get_modules().get(DeviceTarget.CPU.value)(
71
- op_time=self._prof_context.op_time,
72
- with_stack=self._prof_context.with_stack,
73
- data_process=self._prof_context.data_process,
74
- output_path=self._prof_context.output_path,
75
- profile_memory=self._prof_context.profile_memory,
76
- profile_framework=self._prof_context.profile_framework
77
- )
86
+ class Profiler:
87
+ r"""
88
+ This class to enable the profiling of MindSpore neural networks.
89
+ MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
90
+ and use Profiler.analyse() to stop profiling and analyse the results.
91
+ Users can visualize the results using the `MindStudio Insight
92
+ <https://www.hiascend.com/developer/download/community/result?module=pt+sto+cann>`_ tool.
93
+ Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
94
+ correspondence, cluster, etc data analysis.
95
+
96
+ Args:
97
+ start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
98
+ data collection based on conditions. Default: ``True`` .
99
+ output_path (str, optional): Output data path. Default: ``"./data"`` .
100
+ profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling.
101
+ Default: ``ProfilerLevel.Level0``.
102
+
103
+ - ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
104
+ time of the computational operators on the NPU and communication large operator information.
105
+ - ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
106
+ communication mini operator information based on Level0.
107
+ - ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
108
+ activities (list, optional): The activities to collect.
109
+ Default: ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
110
+
111
+ - ProfilerActivity.CPU: Collect MindSpore framework data.
112
+ - ProfilerActivity.NPU: Collect CANN software stack and NPU data.
113
+ - ProfilerActivity.GPU: Collect GPU data.
114
+ schedule (schedule, optional): Sets the action strategy for the capture, defined by the schedule class,
115
+ to be used with the step interface. Default: ``None``.
116
+ on_trace_ready (Callable, optional): Sets the callback function to be executed when the performance data
117
+ is collected. Default: ``None``.
118
+ profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
119
+ When using this parameter, `activities` must set to ``[ProfilerActivity.CPU, ProfilerActivity.NPU]``.
120
+ Collecting operator memory data when the graph compilation level is O2 requires collecting from the
121
+ first step. Default: ``False`` . The operator name currently collected by this parameter is incomplete.
122
+ This issue will be resolved in later versions. It is recommended to use the environment variable
123
+ ``MS_ALLOC_CONF`` instead.
124
+ aicore_metrics (AicoreMetrics, optional): (Ascend only) Types of AICORE performance data collected,
125
+ when using this parameter, `activities` must include ``ProfilerActivity.NPU`` , and the value
126
+ must be a member of AicoreMetrics. Default: ``AicoreMetrics.AiCoreNone`` .
127
+ The data items contained in each metric are as follows:
78
128
 
79
- self._device_target = self._prof_context.device_target
80
- self._device_profiler = PROFILERS.get_modules().get(self._device_target)(
81
- self._prof_context.get_args()
82
- )
129
+ - AicoreMetrics.AiCoreNone: Does not collect AICORE data.
130
+ - AicoreMetrics.ArithmeticUtilization: ArithmeticUtilization contains mac_fp16/int8_ratio,
131
+ vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
132
+ - AicoreMetrics.PipeUtilization: PipeUtilization contains vec_ratio, mac_ratio, scalar_ratio,
133
+ mte1/mte2/mte3_ratio, icache_miss_rate etc.
134
+ - AicoreMetrics.Memory: Memory contains ub_read/write_bw, l1_read/write_bw, l2_read/write_bw,
135
+ main_mem_read/write_bw etc.
136
+ - AicoreMetrics.MemoryL0: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
137
+ - AicoreMetrics.ResourceConflictRatio: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio
138
+ etc.
139
+ - AicoreMetrics.MemoryUB: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector,
140
+ ub\_/write_bw_scalar etc.
141
+ - AicoreMetrics.L2Cache: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit,
142
+ r1_read_cache_hit etc. This function only support Atlas A2 training series products.
143
+ with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
144
+ data is presented in the form of a flame graph in the timeline. When using this parameter, `activities` must
145
+ include ``ProfilerActivity.CPU``. Default value: ``False`` .
146
+ data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
147
+ If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
148
+ directory are retained to save disk space.
149
+ Default value: ``True`` .
150
+ l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
151
+ Default: ``False`` .
152
+ hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
153
+ collect when True. Default: ``False`` .
154
+ pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
155
+ Default: ``False`` .
156
+ data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
157
+ Default value: ``False`` .
158
+ parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
159
+ Default value: ``False`` .
160
+ sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
161
+ Default: ``True`` .
162
+
163
+ - True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
164
+ Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
165
+ The duration of the operator is the difference between the two timestamps.
166
+ - False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
167
+ This method can reduce the impact of adding profiler on overall training time.
168
+ Raises:
169
+ RuntimeError: When the version of CANN does not match the version of MindSpore,
170
+ MindSpore cannot parse the generated ascend_job_id directory structure.
171
+
172
+ Supported Platforms:
173
+ ``Ascend`` ``GPU``
174
+
175
+ Examples:
176
+ >>> import numpy as np
177
+ >>> import mindspore as ms
178
+ >>> from mindspore import nn
179
+ >>> import mindspore.dataset as ds
180
+ >>> from mindspore import Profiler
181
+ >>> from mindspore.profiler import ProfilerLevel, ProfilerActivity, AicoreMetrics
182
+ >>>
183
+ >>> class Net(nn.Cell):
184
+ ... def __init__(self):
185
+ ... super(Net, self).__init__()
186
+ ... self.fc = nn.Dense(2,2)
187
+ ... def construct(self, x):
188
+ ... return self.fc(x)
189
+ >>>
190
+ >>> def generator():
191
+ ... for i in range(2):
192
+ ... yield (np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32))
193
+ >>>
194
+ >>> def train(net):
195
+ ... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
196
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
197
+ ... data = ds.GeneratorDataset(generator, ["data", "label"])
198
+ ... model = ms.train.Model(net, loss, optimizer)
199
+ ... model.train(1, data)
200
+ >>>
201
+ >>> if __name__ == '__main__':
202
+ ... # If the device_target is GPU, set the device_target to "GPU"
203
+ ... ms.set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
204
+ ...
205
+ ... # Init Profiler
206
+ ... # Note that the Profiler should be initialized before model.train
207
+ ... profiler = Profiler(profiler_level=ProfilerLevel.Level0,
208
+ ... activities=[ProfilerActivity.CPU, ProfilerActivity.NPU],
209
+ ... aicore_metrics=AicoreMetrics.AiCoreNone)
210
+ ...
211
+ ... # Train Model
212
+ ... net = Net()
213
+ ... train(net)
214
+ ...
215
+ ... # Profiler end
216
+ ... profiler.analyse()
217
+ """
218
+ MAX_META_SIZE = 100 * 1024 * 1024 # 100MB
219
+
220
+ def __init__(self, **kwargs) -> None:
221
+ self._metadata: Dict[str, str] = {}
222
+ self._prof_context: ProfilerContext = ProfilerContext()
223
+ self._prof_context.set_params(**kwargs)
224
+ self._has_started: bool = False
225
+ self.schedule_arg = kwargs.get('schedule')
226
+ if self.schedule_arg is not None:
227
+ self.schedule = self._prof_context.schedule
228
+ self._record_steps: bool = True
229
+ self._schedule_no_use_step = True
230
+ else:
231
+ self.schedule = _default_schedule_fn
232
+ self._record_steps: bool = False
233
+ self._schedule_no_use_step = None
234
+ self._step_rec_fn: Optional[RecordFunction] = None
235
+ self.step_num = 0
236
+ self.current_action: ProfilerAction = self.schedule(self.step_num)
237
+ self.action_controller = ProfilerActionController(ProfilerInterface, self._prof_context.on_trace_ready)
238
+ if self._prof_context.start_profile:
239
+ self.start()
83
240
 
84
241
  def start(self) -> None:
85
242
  """
86
- Used for Ascend, GPU, start profiling. Profiling can be turned on based on step and epoch.
87
- """
88
- if not self._has_started:
89
- self._has_started = True
90
- else:
91
- raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
243
+ Turn on Profiler data collection. Profiler can be turned on by condition.
244
+
245
+ Raises:
246
+ RuntimeError: If the profiler has already started.
247
+ RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
92
248
 
93
- self._cpu_profiler.start()
94
- self._device_profiler.start()
249
+ Examples:
250
+ >>> from mindspore.train import Callback
251
+ >>> from mindspore import Profiler
252
+ >>> class StopAtStep(Callback):
253
+ ... def __init__(self, start_step, stop_step):
254
+ ... super(StopAtStep, self).__init__()
255
+ ... self.start_step = start_step
256
+ ... self.stop_step = stop_step
257
+ ... self.profiler = Profiler(start_profile=False)
258
+ ...
259
+ ... def step_begin(self, run_context):
260
+ ... cb_params = run_context.original_args()
261
+ ... step_num = cb_params.cur_step_num
262
+ ... if step_num == self.start_step:
263
+ ... self.profiler.start()
264
+ ...
265
+ ... def step_end(self, run_context):
266
+ ... cb_params = run_context.original_args()
267
+ ... step_num = cb_params.cur_step_num
268
+ ... if step_num == self.stop_step:
269
+ ... self.profiler.stop()
270
+ ...
271
+ ... def end(self, run_context):
272
+ ... self.profiler.analyse()
273
+ """
274
+ if self._has_started:
275
+ logger.warning("The profiler has already started. Do not turn on again in the open state.")
276
+ return
277
+ self._has_started = True
278
+ self.action_controller.transit_action(ProfilerAction.NONE, self.current_action)
279
+ if self._record_steps:
280
+ self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
281
+ self._step_rec_fn.start()
95
282
 
96
283
  def stop(self) -> None:
97
284
  """
98
- Used for Ascend, GPU, stop profiling. Profiling can be turned off based on step and epoch.
285
+ Turn off Profiler data collection. Profiler can be turned off by condition.
286
+
287
+ Raises:
288
+ RuntimeError: If the profiler has not started, this function is disabled.
289
+
290
+ Examples:
291
+ >>> from mindspore.train import Callback
292
+ >>> from mindspore import Profiler
293
+ >>> class StopAtEpoch(Callback):
294
+ ... def __init__(self, start_epoch, stop_epoch):
295
+ ... super(StopAtEpoch, self).__init__()
296
+ ... self.start_epoch = start_epoch
297
+ ... self.stop_epoch = stop_epoch
298
+ ... self.profiler = Profiler(start_profile=False)
299
+ ...
300
+ ... def epoch_begin(self, run_context):
301
+ ... cb_params = run_context.original_args()
302
+ ... epoch_num = cb_params.cur_epoch_num
303
+ ... if epoch_num == self.start_epoch:
304
+ ... self.profiler.start()
305
+ ...
306
+ ... def epoch_end(self, run_context):
307
+ ... cb_params = run_context.original_args()
308
+ ... epoch_num = cb_params.cur_epoch_num
309
+ ... if epoch_num == self.stop_epoch:
310
+ ... self.profiler.stop()
311
+ ...
312
+ ... def end(self, run_context):
313
+ ... self.profiler.analyse()
99
314
  """
100
- if self._has_started:
101
- self._has_started = False
315
+ if self._schedule_no_use_step:
316
+ logger.warning("The profiler has schedule. Please use step() to collect data.")
317
+ return
318
+ if not self._has_started:
319
+ logger.error("The profiler has not started. Do not turn off again in the closed state.")
320
+ return
321
+ self._has_started = False
322
+ if self._record_steps and self._step_rec_fn:
323
+ self._step_rec_fn.stop()
324
+ if self.schedule_arg:
325
+ self.action_controller.transit_action(self.current_action, None)
102
326
  else:
103
- raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
104
- "before calling the stop() method.")
105
-
106
- self._cpu_profiler.stop()
107
- self._device_profiler.stop()
327
+ ProfilerInterface.stop()
328
+ self._dump_metadata()
108
329
 
109
330
  def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync") -> None:
110
331
  """
@@ -115,31 +336,61 @@ class NewProfiler:
115
336
  Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
116
337
  for online mode. Default: ``None``.
117
338
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
118
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
119
- By default, all steps will be analyzed.
339
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
340
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
120
341
  mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
121
342
 
122
343
  - sync: analyse data in current process, it will block the current process.
123
- - async: analyse data in subprocess, it will not the current process.Since the parsing process
344
+ - async: analyse data in subprocess, it will not block the current process. Since the parsing process
124
345
  will take up extra CPU resources, please enable this mode according to the actual resource situation.
125
346
 
347
+ Examples:
348
+ >>> from mindspore.train import Callback
349
+ >>> from mindspore import Profiler
350
+ >>> class StopAtStep(Callback):
351
+ ... def __init__(self, start_step=1, stop_step=5):
352
+ ... super(StopAtStep, self).__init__()
353
+ ... self.start_step = start_step
354
+ ... self.stop_step = stop_step
355
+ ... self.profiler = Profiler(start_profile=False)
356
+ ...
357
+ ... def step_begin(self, run_context):
358
+ ... cb_params = run_context.original_args()
359
+ ... step_num = cb_params.cur_step_num
360
+ ... if step_num == self.start_step:
361
+ ... self.profiler.start()
362
+ ...
363
+ ... def step_end(self, run_context):
364
+ ... cb_params = run_context.original_args()
365
+ ... step_num = cb_params.cur_step_num
366
+ ... if step_num == self.stop_step:
367
+ ... self.profiler.stop()
368
+ ...
369
+ ... def end(self, run_context):
370
+ ... self.profiler.analyse(step_list=[2,3,4], mode="sync")
126
371
  """
127
- self._cpu_profiler.stop(offline_path, pretty, step_list)
128
- self._device_profiler.stop(offline_path, pretty, step_list, mode)
372
+ if self._has_started:
373
+ ProfilerInterface.stop()
374
+ self._has_started = False
129
375
 
130
- def op_analyse(self, op_name, device_id=None) -> None:
131
- """
132
- Profiler users can use this interface to obtain operator performance data.
376
+ if self.schedule_arg:
377
+ logger.warning("The profiler has schedule. Please use 'on_trace_ready' to analyse data.")
378
+ return
133
379
 
134
- Args:
135
- op_name (str or list): The primitive operator name to query.
136
- device_id (int, optional): ID of the target device. This parameter is optional during network training or
137
- inference, and users can use device_id parameter to specify which card operator performance data to
138
- parse. If this interface is used for offline data parsing, Default: ``0`` .
139
- """
380
+ if offline_path:
381
+ logger.warning("The parameter 'offline_path' for Profiler.analyse() is deprecated, "
382
+ "please use Profiler.offline_analyse() instead.")
383
+
384
+ self._prof_context.pretty = pretty
385
+ self._prof_context.step_list = step_list
386
+ self._prof_context.mode = mode
387
+
388
+ ProfilerInterface.finalize()
389
+ ProfilerInterface.analyse()
390
+ ProfilerInterface.clear()
140
391
 
141
392
  @classmethod
142
- def offline_analyse(cls, path: str, pretty=False, step_list=None) -> None:
393
+ def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True) -> None:
143
394
  """
144
395
  Analyze training performance data offline, which is invoked after performance data collection is completed.
145
396
 
@@ -147,7 +398,243 @@ class NewProfiler:
147
398
  path (str): The profiling data path which need to be analyzed offline.
148
399
  There needs to be a profiler directory in this path.
149
400
  pretty (bool, optional): Whether to pretty json files. Default: ``False``.
150
- step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
151
- By default, all steps will be analyzed.
401
+ step_list (list, optional): A list of steps that need to be analyzed, the steps must be
402
+ consecutive integers. Default: ``None``. By default, all steps will be analyzed.
403
+ data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
404
+
405
+ Examples:
406
+ >>> from mindspore import Profiler
407
+ >>> Profiler.offline_analyse("./profiling_path")
152
408
  """
153
- return
409
+ real_path = PathManager.get_real_path(path)
410
+ PathManager.check_input_directory_path(real_path)
411
+ ascend_ms_path_list = PathManager.get_ascend_ms_path_list(real_path)
412
+
413
+ if not ascend_ms_path_list:
414
+ msg = (f"Invalid path: {real_path}. Expected a *_ascend_ms_* directory "
415
+ "or a parent directory of multiple *_ascend_ms_*")
416
+ logger.error(msg)
417
+ return
418
+
419
+ worker_number = min(os.cpu_count() // 2, len(ascend_ms_path_list))
420
+ with ProcessPoolExecutor(max_workers=worker_number) as executor:
421
+ futures = [
422
+ executor.submit(
423
+ NPUProfilerAnalysis.offline_analyse,
424
+ ascend_ms_path,
425
+ pretty,
426
+ step_list,
427
+ data_simplification
428
+ ) for ascend_ms_path in ascend_ms_path_list
429
+ ]
430
+ # 等待所有任务完成
431
+ for future in as_completed(futures):
432
+ try:
433
+ future.result()
434
+ except Exception as e: # pylint: disable=W0703
435
+ logger.error("offline analysis failed: %s", str(e))
436
+
437
+ def step(self) -> None:
438
+ """
439
+ Used for Ascend, distinguish step collection and parsing performance data through schedule and on_trace_ready.
440
+
441
+ Raises:
442
+ RuntimeError: If the `start_profile` parameter is not set or the Profiler is not started.
443
+ RuntimeError: If the `schedule` parameter is not set.
444
+
445
+ Examples:
446
+ >>> import numpy as np
447
+ >>> import mindspore as ms
448
+ >>> import mindspore.dataset as ds
449
+ >>> from mindspore import context, nn, Profiler
450
+ >>> from mindspore.profiler import schedule, tensor_board_trace_handler
451
+ >>>
452
+ >>> class Net(nn.Cell):
453
+ ... def __init__(self):
454
+ ... super(Net, self).__init__()
455
+ ... self.fc = nn.Dense(2, 2)
456
+ ...
457
+ ... def construct(self, x):
458
+ ... return self.fc(x)
459
+ >>>
460
+ >>> def generator_net():
461
+ ... for _ in range(2):
462
+ ... yield np.ones([2, 2]).astype(np.float32), np.ones([2]).astype(np.int32)
463
+ >>>
464
+ >>> def train(test_net):
465
+ ... optimizer = nn.Momentum(test_net.trainable_params(), 1, 0.9)
466
+ ... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
467
+ ... data = ds.GeneratorDataset(generator_net(), ["data", "label"])
468
+ ... model = ms.train.Model(test_net, loss, optimizer)
469
+ ... model.train(1, data)
470
+ >>>
471
+ >>> if __name__ == '__main__':
472
+ ... context.set_context(mode=ms.PYNATIVE_MODE, device_target="Ascend")
473
+ ...
474
+ ... net = Net()
475
+ ... STEP_NUM = 15
476
+ ...
477
+ ... with Profiler(schedule=schedule(wait=1, warmup=1, active=2, repeat=1, skip_first=2),
478
+ ... on_trace_ready=tensor_board_trace_handler) as prof:
479
+ ... for i in range(STEP_NUM):
480
+ ... train(net)
481
+ ... prof.step()
482
+ """
483
+ if self.schedule_arg is None:
484
+ logger.error("With no schedule in the Profiler, step takes no effect!")
485
+ return
486
+ if not self._has_started:
487
+ logger.error("Profiler is stopped, step takes no effect!")
488
+ return
489
+ if self._step_rec_fn:
490
+ self._step_rec_fn.stop()
491
+ prev_action = self.current_action
492
+ self.step_num += 1
493
+ self.current_action = self.schedule(self.step_num)
494
+ self.action_controller.transit_action(prev_action, self.current_action)
495
+ self._step_rec_fn = RecordFunction(ProfilerStepNameConstant.PROFILER_STEP + str(self.step_num))
496
+ self._step_rec_fn.start()
497
+ self._schedule_no_use_step = False
498
+
499
+ def add_metadata(self, key: str, value: str):
500
+ """
501
+ Report custom metadata key-value pair data.
502
+
503
+ Args:
504
+ key (str): The key to the metadata.
505
+ value (str): The value to the metadata.
506
+
507
+ Examples:
508
+ >>> from mindspore import Profiler
509
+ >>> # Profiler init.
510
+ >>> profiler = Profiler()
511
+ >>> # Call Profiler add_metadata
512
+ >>> profiler.add_metadata("test_key", "test_value")
513
+ >>> # Profiler end
514
+ >>> profiler.analyse()
515
+ """
516
+ if not isinstance(key, str) or not isinstance(value, str):
517
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
518
+ return
519
+
520
+ add_size = getsizeof(key) + getsizeof(value)
521
+ if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
522
+ if key in self._metadata:
523
+ logger.warning(f"{key} is already saved as metadata, override it.")
524
+ self._metadata[key] = value
525
+ else:
526
+ logger.warning("Too many metadata added. Skip this metadata")
527
+
528
+ def add_metadata_json(self, key: str, value: str):
529
+ """
530
+ Report custom metadata key-value pair data with the value as a JSON string data.
531
+
532
+ Args:
533
+ key (str): The key to the metadata.
534
+ value (str): The json str format value to the metadata.
535
+
536
+ Examples:
537
+ >>> import json
538
+ >>> from mindspore import Profiler
539
+ >>> # Profiler init.
540
+ >>> profiler = Profiler()
541
+ >>> # Call Profiler add_metadata_json
542
+ >>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
543
+ >>> # Profiler end, metadata will be saved in profiler_metadata.json
544
+ >>> profiler.analyse()
545
+ """
546
+ if not isinstance(key, str) or not isinstance(value, str):
547
+ logger.warning("The key and value of metadata must be string. Skip this metadata.")
548
+ return
549
+
550
+ add_size = getsizeof(key) + getsizeof(value)
551
+ if getsizeof(self._metadata) + add_size < self.MAX_META_SIZE:
552
+ try:
553
+ if key in self._metadata:
554
+ logger.warning(f"{key} is already saved as metadata, override it.")
555
+ self._metadata[key] = json.loads(value)
556
+ except ValueError:
557
+ logger.warning("The metadata value must be json format string. Skip this metadata")
558
+ else:
559
+ logger.warning("Too many metadata added. Skip this metadata")
560
+
561
+ def op_analyse(self, op_name, device_id=None):
562
+ """
563
+ Profiler users can use this interface to obtain operator performance data.
564
+
565
+ Args:
566
+ op_name (str or list): The primitive operator name to query.
567
+ device_id (int, optional): ID of the target device. This parameter is optional during network training or
568
+ inference, and users can use device_id parameter to specify which card operator performance data to
569
+ parse. If this interface is used for offline data parsing, the default value is ``None`` .
570
+
571
+ Raises:
572
+ TypeError: If the `op_name` parameter type is incorrect.
573
+ TypeError: If the `device_id` parameter type is incorrect.
574
+ RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
575
+
576
+ Supported Platforms:
577
+ ``GPU`` ``CPU``
578
+
579
+ Examples:
580
+ >>> from mindspore import Profiler
581
+ >>> from mindspore import nn
582
+ >>> from mindspore import Model
583
+ >>> # Profiler init.
584
+ >>> profiler = Profiler()
585
+ >>> # Train Model or eval Model, taking LeNet5 as an example.
586
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
587
+ >>> net = LeNet5()
588
+ >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
589
+ >>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
590
+ >>> # Create the dataset taking MNIST as an example.
591
+ >>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
592
+ >>> dataloader = create_dataset()
593
+ >>> model = Model(net, loss, optimizer)
594
+ >>> model.train(5, dataloader, dataset_sink_mode=False)
595
+ >>>
596
+ >>> # Profiler end
597
+ >>> profiler.analyse()
598
+ >>>
599
+ >>> profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
600
+ """
601
+ if self._prof_context.device_target == DeviceTarget.NPU.value:
602
+ raise RuntimeError("The Interface 'Profiler.op_analyse()' is not supported on Ascend currently.")
603
+
604
+ if device_id and not isinstance(device_id, int):
605
+ raise TypeError(f"For 'Profiler.op_analyse()', the parameter device_id must be int, "
606
+ f"but got type {type(device_id)}")
607
+
608
+ if not isinstance(op_name, str) and not isinstance(op_name, list):
609
+ raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name must be str or list, "
610
+ f"but got type {type(op_name)}")
611
+ if not op_name:
612
+ raise TypeError(f"For 'Profiler.op_analyse()', the parameter op_name cannot be "", '' or [].")
613
+
614
+ from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser
615
+ dev_id = self._prof_context.device_id if device_id is None else device_id
616
+ parser = GpuFrameWorkParser(self._prof_context.framework_path, dev_id, op_name)
617
+ op_info = parser.parse()
618
+ return op_info
619
+
620
+ def _dump_metadata(self):
621
+ """Dump metadata to file."""
622
+ if not self._metadata:
623
+ return
624
+ save_path = os.path.join(self._prof_context.ascend_ms_dir, "profiler_metadata.json")
625
+ FileManager.create_json_file(save_path, self._metadata, indent=4)
626
+ self._metadata.clear()
627
+
628
+ def __enter__(self) -> 'Profiler':
629
+ if not self._has_started:
630
+ self.start()
631
+ return self
632
+
633
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
634
+ if self._has_started:
635
+ self.stop()
636
+
637
+ def __del__(self):
638
+ if self._has_started:
639
+ self.stop()
640
+ logger.warning("Profiler is stopped at the end of the program.")