mindspore 2.6.0__cp310-cp310-win_amd64.whl → 2.7.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (455) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +64 -83
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +47 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +177 -52
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +338 -208
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +2 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +84 -133
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +47 -38
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +69 -23
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +1 -0
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +4 -44
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +425 -19
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +125 -101
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +488 -620
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +86 -85
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +2 -4
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/lamb.py +1 -3
  188. mindspore/nn/optim/optimizer.py +1 -1
  189. mindspore/nn/optim/tft_wrapper.py +2 -3
  190. mindspore/nn/optim/thor.py +2 -2
  191. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  192. mindspore/nn/probability/distribution/exponential.py +2 -1
  193. mindspore/nn/probability/distribution/poisson.py +2 -1
  194. mindspore/nn/sparse/sparse.py +3 -3
  195. mindspore/nn/wrap/cell_wrapper.py +73 -42
  196. mindspore/nn/wrap/grad_reducer.py +37 -52
  197. mindspore/nn/wrap/loss_scale.py +72 -74
  198. mindspore/numpy/array_creations.py +7 -7
  199. mindspore/numpy/fft.py +1 -1
  200. mindspore/numpy/math_ops.py +1 -1
  201. mindspore/numpy/utils_const.py +1 -1
  202. mindspore/opencv_core452.dll +0 -0
  203. mindspore/opencv_imgcodecs452.dll +0 -0
  204. mindspore/opencv_imgproc452.dll +0 -0
  205. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  206. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  207. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  208. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  209. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  210. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  211. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  212. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
  213. mindspore/ops/auto_generate/gen_extend_func.py +5 -55
  214. mindspore/ops/auto_generate/gen_ops_def.py +753 -273
  215. mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
  216. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  217. mindspore/ops/composite/__init__.py +10 -0
  218. mindspore/ops/composite/base.py +9 -5
  219. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  220. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  221. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  222. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  223. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  224. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  225. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  226. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  227. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  228. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  229. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  230. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  231. mindspore/ops/function/__init__.py +4 -1
  232. mindspore/ops/function/_add_attr_func.py +11 -6
  233. mindspore/ops/function/array_func.py +17 -100
  234. mindspore/ops/function/debug_func.py +8 -5
  235. mindspore/ops/function/grad/grad_func.py +5 -13
  236. mindspore/ops/function/math_func.py +65 -399
  237. mindspore/ops/function/nn_func.py +44 -61
  238. mindspore/ops/function/other_func.py +4 -1
  239. mindspore/ops/function/random_func.py +31 -4
  240. mindspore/ops/functional.py +2 -3
  241. mindspore/ops/functional_overload.py +486 -18
  242. mindspore/ops/op_info_register.py +21 -0
  243. mindspore/ops/operations/__init__.py +5 -2
  244. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  245. mindspore/ops/operations/_inner_ops.py +14 -18
  246. mindspore/ops/operations/_sequence_ops.py +1 -1
  247. mindspore/ops/operations/array_ops.py +4 -50
  248. mindspore/ops/operations/comm_ops.py +186 -41
  249. mindspore/ops/operations/custom_ops.py +244 -175
  250. mindspore/ops/operations/debug_ops.py +55 -4
  251. mindspore/ops/operations/image_ops.py +13 -13
  252. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  253. mindspore/ops/operations/math_ops.py +8 -9
  254. mindspore/ops/operations/nn_ops.py +6 -7
  255. mindspore/ops/primitive.py +9 -20
  256. mindspore/ops/tensor_method.py +52 -11
  257. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  258. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  259. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  260. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  261. mindspore/ops_generate/common/base_generator.py +14 -0
  262. mindspore/ops_generate/common/gen_constants.py +7 -2
  263. mindspore/ops_generate/common/gen_utils.py +0 -19
  264. mindspore/ops_generate/common/op_proto.py +11 -4
  265. mindspore/ops_generate/common/template.py +88 -11
  266. mindspore/ops_generate/gen_ops.py +1 -1
  267. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  268. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  269. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  270. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  271. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  272. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  273. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  274. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  275. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  276. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  277. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  278. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  279. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  280. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  281. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  282. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  283. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  284. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  285. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  286. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  287. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  288. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  289. mindspore/parallel/_auto_parallel_context.py +9 -17
  290. mindspore/parallel/_cell_wrapper.py +106 -40
  291. mindspore/parallel/_parallel_serialization.py +4 -3
  292. mindspore/parallel/_ps_context.py +4 -6
  293. mindspore/parallel/_tensor.py +167 -12
  294. mindspore/parallel/_transformer/moe.py +1 -1
  295. mindspore/parallel/_transformer/transformer.py +17 -12
  296. mindspore/parallel/_utils.py +5 -11
  297. mindspore/parallel/auto_parallel.py +33 -12
  298. mindspore/parallel/checkpoint_convert.py +3 -3
  299. mindspore/parallel/checkpoint_transform.py +5 -1
  300. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  301. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  302. mindspore/parallel/cluster/run.py +48 -7
  303. mindspore/parallel/function/__init__.py +8 -1
  304. mindspore/parallel/function/reshard_func.py +7 -6
  305. mindspore/parallel/nn/__init__.py +15 -2
  306. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  307. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  308. mindspore/parallel/shard.py +9 -23
  309. mindspore/parallel/transform_safetensors.py +468 -174
  310. mindspore/pgodb140.dll +0 -0
  311. mindspore/pgort140.dll +0 -0
  312. mindspore/profiler/__init__.py +2 -1
  313. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  314. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  315. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  316. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  317. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  318. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  319. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  321. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  322. mindspore/profiler/analysis/task_manager.py +1 -1
  323. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  324. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  325. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  326. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  327. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  328. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  329. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  330. mindspore/profiler/common/constant.py +16 -0
  331. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  332. mindspore/profiler/common/path_manager.py +9 -0
  333. mindspore/profiler/common/profiler_context.py +50 -29
  334. mindspore/profiler/common/profiler_info.py +0 -16
  335. mindspore/profiler/common/profiler_meta_data.py +1 -0
  336. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  337. mindspore/profiler/common/profiler_output_path.py +23 -8
  338. mindspore/profiler/common/profiler_parameters.py +128 -35
  339. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  340. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  341. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  342. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  343. mindspore/profiler/dynamic_profiler.py +374 -338
  344. mindspore/profiler/envprofiler.py +42 -12
  345. mindspore/profiler/experimental_config.py +112 -7
  346. mindspore/profiler/mstx.py +33 -12
  347. mindspore/profiler/platform/__init__.py +2 -3
  348. mindspore/profiler/platform/cpu_profiler.py +10 -4
  349. mindspore/profiler/platform/npu_profiler.py +30 -20
  350. mindspore/profiler/profiler.py +218 -154
  351. mindspore/profiler/profiler_action_controller.py +65 -77
  352. mindspore/profiler/profiler_interface.py +2 -2
  353. mindspore/profiler/schedule.py +10 -4
  354. mindspore/rewrite/common/config.py +1 -0
  355. mindspore/rewrite/common/namer.py +1 -0
  356. mindspore/rewrite/common/namespace.py +1 -0
  357. mindspore/rewrite/node/node.py +31 -11
  358. mindspore/rewrite/parsers/assign_parser.py +1 -1
  359. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  360. mindspore/run_check/_check_version.py +7 -10
  361. mindspore/runtime/__init__.py +8 -6
  362. mindspore/runtime/event.py +10 -4
  363. mindspore/runtime/executor.py +87 -45
  364. mindspore/runtime/memory.py +22 -30
  365. mindspore/runtime/thread_bind_core.py +299 -165
  366. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  367. mindspore/swresample-4.dll +0 -0
  368. mindspore/swscale-6.dll +0 -0
  369. mindspore/tbbmalloc.dll +0 -0
  370. mindspore/tinyxml2.dll +0 -0
  371. mindspore/train/_utils.py +9 -5
  372. mindspore/train/amp.py +43 -23
  373. mindspore/train/callback/__init__.py +5 -5
  374. mindspore/train/callback/_callback.py +2 -1
  375. mindspore/train/callback/_checkpoint.py +4 -14
  376. mindspore/train/callback/_flops_collector.py +11 -7
  377. mindspore/train/callback/_landscape.py +0 -1
  378. mindspore/train/callback/_train_fault_tolerance.py +72 -18
  379. mindspore/train/data_sink.py +15 -6
  380. mindspore/train/dataset_helper.py +14 -5
  381. mindspore/train/model.py +49 -47
  382. mindspore/train/serialization.py +168 -126
  383. mindspore/train/summary/summary_record.py +13 -2
  384. mindspore/train/train_thor/model_thor.py +2 -2
  385. mindspore/turbojpeg.dll +0 -0
  386. mindspore/utils/__init__.py +3 -2
  387. mindspore/utils/dryrun.py +0 -6
  388. mindspore/utils/runtime_execution_order_check.py +162 -78
  389. mindspore/utils/sdc_detect.py +68 -0
  390. mindspore/utils/utils.py +14 -17
  391. mindspore/vcmeta.dll +0 -0
  392. mindspore/vcruntime140.dll +0 -0
  393. mindspore/vcruntime140_1.dll +0 -0
  394. mindspore/version.py +1 -1
  395. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  396. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
  397. mindspore/_deprecated/jit.py +0 -198
  398. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  399. mindspore/communication/_hccl_management.py +0 -297
  400. mindspore/experimental/es/embedding_service.py +0 -891
  401. mindspore/experimental/es/embedding_service_layer.py +0 -581
  402. mindspore/profiler/common/validator/__init__.py +0 -14
  403. mindspore/profiler/common/validator/validate_path.py +0 -84
  404. mindspore/profiler/parser/__init__.py +0 -14
  405. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  406. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  407. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  408. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  409. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  410. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  411. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  412. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  413. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  414. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  415. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  416. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  417. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  418. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  419. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  420. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  421. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  422. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  423. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  424. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  425. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  426. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  427. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  428. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  429. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  430. mindspore/profiler/parser/container.py +0 -229
  431. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  432. mindspore/profiler/parser/flops_parser.py +0 -531
  433. mindspore/profiler/parser/framework_enum.py +0 -111
  434. mindspore/profiler/parser/framework_parser.py +0 -464
  435. mindspore/profiler/parser/framework_struct.py +0 -61
  436. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  437. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  438. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  439. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  440. mindspore/profiler/parser/hccl_parser.py +0 -573
  441. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  442. mindspore/profiler/parser/integrator.py +0 -526
  443. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  444. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  445. mindspore/profiler/parser/minddata_parser.py +0 -186
  446. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  447. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  448. mindspore/profiler/parser/optime_parser.py +0 -250
  449. mindspore/profiler/parser/profiler_info.py +0 -213
  450. mindspore/profiler/parser/step_trace_parser.py +0 -666
  451. mindspore/utils/hooks.py +0 -81
  452. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  453. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  454. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  455. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -1,545 +0,0 @@
1
- # Copyright 2022 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """The integrator for integrating parsed profiling files."""
16
-
17
- import os.path
18
- import glob
19
- import json
20
- import stat
21
- from decimal import Decimal
22
- import numpy as np
23
- from mindspore import log as logger
24
- from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
25
- from mindspore.profiler.parser.base_timeline_generator import BaseTimelineGenerator
26
- from mindspore.profiler.parser.integrator import DeviceTarget
27
- from mindspore.profiler.parser.ascend_analysis.fwk_cann_parser import FwkCANNParser
28
- from mindspore.profiler.common.util import get_newest_file
29
- from mindspore.profiler.parser.ascend_analysis.constant import Constant
30
-
31
-
32
- class AscendTimelineGenerator(BaseTimelineGenerator):
33
- """Generate ascend Timeline data from file."""
34
- _timeline_display_filename = 'ascend_timeline_display_{}.json'
35
- _timeline_summary_filename = 'ascend_timeline_summary_{}.json'
36
- _cluster_analyse_filename = 'ascend_cluster_analyse_{}_{}_{}_{}.csv'
37
-
38
- def __init__(self, profiling_dir, source_path, mindstudio_profiler_output, rank_id, rank_size, mode,
39
- step_list=None):
40
- super().__init__(DeviceTarget.ASCEND.value, mode)
41
- self._profiling_dir = profiling_dir
42
- self._source_path = source_path
43
- self._mindstudio_profiler_output = mindstudio_profiler_output
44
- self._rank_id = rank_id
45
- self._rank_size = rank_size
46
- self._timeline_display_filename = self._timeline_display_filename.format(rank_id)
47
- self._timeline_summary_filename = self._timeline_summary_filename.format(rank_id)
48
- self._timeline_data = []
49
- self._step_list = step_list
50
-
51
- self.step_time_list_df = np.dtype(
52
- [('Iteration ID', object), ('Steps', object), ('Iteration Start', float), ('Iteration Time', float)])
53
-
54
- self.aicpu_time_list_dt = np.dtype(
55
- [('Op Name', object), ('Stream ID', int), ('Task Start Time', float), ('Task Duration', float)])
56
-
57
- def parse_cluster_data(self, op_summary, steptrace):
58
- """
59
- Parse cluster data and timeline summary data.
60
-
61
- Args:
62
- op_summary: op data
63
- steptrace: step data
64
- """
65
-
66
- logger.info('parse cluster data...')
67
- if isinstance(op_summary, np.ndarray) and op_summary.shape[0] == 0 or \
68
- not isinstance(op_summary, np.ndarray) and not op_summary:
69
- return
70
- timeline_list = op_summary[~np.isin(op_summary['Task Type'], ['AI_CPU', 'HCCL'])][
71
- ['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
72
-
73
- timeline_list = timeline_list.tolist()
74
-
75
- if not isinstance(steptrace, np.ndarray) or steptrace.shape[0] == 0 or not steptrace.tolist():
76
- iteration_time = op_summary[-1]['Task Start Time'] - op_summary[0]['Task Start Time'] + op_summary[-1][
77
- 'Task Duration'] + op_summary[-1]['Task Wait Time']
78
- step_time_list = [['1', 'Steps', op_summary[0]['Task Start Time'], iteration_time]]
79
- else:
80
- step_time_list = np.empty((len(steptrace),), dtype=self.step_time_list_df)
81
- step_time_list['Iteration ID'] = \
82
- np.char.add("Model ID: ",
83
- np.char.add(steptrace['Model ID'].astype(str),
84
- np.char.add(" Iteration ID: ",
85
- steptrace['Iteration ID'].astype(str))))
86
- step_time_list['Steps'] = 'Steps'
87
- step_time_list['Iteration Start'] = steptrace['Iteration End'] - steptrace['Iteration Time']
88
- step_time_list['Iteration Time'] = steptrace['Iteration Time']
89
- step_time_list = step_time_list.tolist()
90
-
91
- # Add AI CPU data into timeline temp list and sort by start time.
92
- aicpu_op = op_summary[op_summary['Task Type'] == 'AI_CPU']
93
- if aicpu_op.size:
94
- aicpu_time_list = np.empty((len(aicpu_op),), dtype=self.aicpu_time_list_dt)
95
- aicpu_time_list['Op Name'] = aicpu_op['Op Name']
96
- aicpu_time_list['Stream ID'] = aicpu_op['Stream ID']
97
- aicpu_time_list['Task Start Time'] = aicpu_op['Task Start Time']
98
- aicpu_time_list['Task Duration'] = aicpu_op['Task Duration'] + aicpu_op['Task Wait Time']
99
- aicpu_time_list = aicpu_time_list.tolist()
100
- timeline_list.extend(aicpu_time_list)
101
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
102
-
103
- # Add AllReduce info to timeline temp list and sort by start time.
104
- communication_info = op_summary[op_summary['Task Type'] == 'HCCL'][
105
- ['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
106
- if communication_info.size:
107
- communication_info = communication_info.tolist()
108
- communication_info.sort(key=lambda x: float(x[self._start_time_idx]))
109
- logger.debug('AllReduce info found. Start adding info into timeline...')
110
- self._get_cluster_timeline(timeline_list, communication_info, step_time_list)
111
-
112
- # Update timeline summary info
113
- timeline_summary = op_summary[['Op Name', 'Stream ID', 'Task Duration']]
114
- self._timeline_summary['total_time'] = np.sum(timeline_summary['Task Duration'])
115
- self._timeline_summary['num_of_streams'] = int(
116
- len(np.unique(timeline_summary['Stream ID'], return_counts=True)[0]))
117
- self._timeline_summary['num_of_ops'] = int(len(np.unique(timeline_summary['Op Name'], return_counts=True)[0]))
118
- self._timeline_summary['op_exe_times'] = int(len(timeline_summary))
119
- if self._timeline_summary['op_exe_times'] != 0:
120
- self._timeline_summary['max_scope_name_num'] = int(np.max(
121
- [len(x) for x in np.char.split(timeline_summary['Op Name'].astype(str), sep='/')]))
122
- else:
123
- self._timeline_summary['max_scope_name_num'] = 0
124
- logger.info('Finished parse cluster data...')
125
-
126
- def write_timeline_display(self):
127
- """Write timeline display"""
128
- logger.info('Writing timeline file...')
129
- display_file_path = os.path.join(
130
- self._profiling_dir,
131
- self._timeline_display_filename
132
- )
133
- try:
134
- with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as fw:
135
- json.dump(self._timeline_data, fw, indent=self.indent)
136
- os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
137
- logger.info('Finished file writing!')
138
- except (IOError, OSError) as err:
139
- logger.critical('Error occurred when write timeline display file: %s', err)
140
- raise ProfilerIOException() from err
141
-
142
- def parse_timeline_data(self, pretty=False):
143
- """
144
- Get detail timeline
145
- Returns:
146
- json, the content of timeline data.
147
- """
148
- logger.info("Start parse timeline data...")
149
- self._pretty = pretty
150
- timeline_data = []
151
- all_scope_data = []
152
-
153
- # get msprof data
154
- msprof_file_name = fr'{self._mindstudio_profiler_output}/msprof_*.json'
155
- file_list_msprof = glob.glob(msprof_file_name)
156
- msprof_timeline = []
157
- if not file_list_msprof:
158
- logger.error('Could not find msprof_*.json file in %s', self._mindstudio_profiler_output)
159
- else:
160
- msprof_timeline = self._parse_msprof_data(get_newest_file(file_list_msprof))
161
-
162
- # get cpu op
163
- cpu_op_file_name = fr'{self._profiling_dir}/cpu_op_execute_timestamp_{self._rank_id}.txt'
164
- file_list = glob.glob(cpu_op_file_name)
165
- if not file_list:
166
- logger.warning('Could not find cpu op file in %s', self._profiling_dir)
167
- else:
168
- cpu_timeline, scope_data = self.parse_cpu_timeline(file_list)
169
- timeline_data.extend(cpu_timeline)
170
- all_scope_data.extend(scope_data)
171
-
172
- oprange_name = self._op_range_name.format(self._rank_id)
173
- fwk_file_path = fr'{self._profiling_dir}/{self._framework_dir}/{oprange_name}'
174
- if os.path.exists(fwk_file_path):
175
- # It is faster not to submit to the pool
176
- result = self._parse_fwk_device_data(msprof_timeline)
177
- all_scope_data.extend(result.get('scope_data', []))
178
- timeline_data.extend(result.get("trace_data", []))
179
- self._kernel_events = result.get("kernels", [])
180
- else:
181
- # get Ascend Hardware for scope
182
- scope_data = self._parse_ascend_hardware_scope(msprof_timeline)
183
- all_scope_data.extend(scope_data)
184
- timeline_data.extend(msprof_timeline)
185
-
186
- # parse scope info
187
- scope_timeline = self._parse_scope_info(all_scope_data)
188
- timeline_data.extend(scope_timeline)
189
-
190
- logger.info("All timeline data parse complete.")
191
- self._timeline_data = timeline_data
192
- return timeline_data
193
-
194
- def parse_cpu_timeline(self, file_list):
195
- """Load cpu operator data from file"""
196
- ms_to_us = 1e3
197
- ns_to_us = 1e-3
198
- new_pid = Constant.CPU_OP
199
- process_list = [{"name": "process_name",
200
- "pid": new_pid,
201
- "args": {
202
- "name": f"CPU OP"
203
- },
204
- "ph": "M"
205
- }, {"name": "process_sort_index", "pid": new_pid,
206
- "args": {"sort_index": new_pid}, "ph": "M"}
207
- ]
208
- tid_set = set()
209
- thread_list = []
210
- new_timeline = []
211
- scope_data = []
212
- try:
213
- flags = os.O_RDONLY
214
- for file_path in file_list:
215
- with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr:
216
- for line in fr:
217
- op_list = line.strip().split(';')
218
- op_full_name = op_list[0]
219
- time_arr = op_list[-1]
220
- time_arr = time_arr.split(" ")
221
- for time in time_arr:
222
- ts, dur, tid = time.split(",") # origin unit of ts is ns and dur is ms.
223
- ts = Decimal(ts).quantize(Decimal('0.000')) * Decimal(ns_to_us).quantize(
224
- Decimal('0.000')) # cast to us
225
- dur = Decimal(dur).quantize(Decimal('0.000')) * Decimal(ms_to_us).quantize(
226
- Decimal('0.000')) # cast to us
227
-
228
- if op_full_name and op_full_name.startswith(Constant.TOP_SCOPE_NAMES):
229
- te = ts + dur
230
- scope_data.append((op_full_name.split('/')[:-1], ts, te))
231
-
232
- if int(tid) not in tid_set:
233
- tid_set.add(int(tid))
234
- thread_list.append({"name": "thread_name",
235
- "pid": new_pid,
236
- "tid": int(tid),
237
- "ph": "M",
238
- 'args': {'name': f'thread {tid}'}
239
- })
240
-
241
- new_timeline.append({'name': op_list[0],
242
- 'pid': new_pid,
243
- 'tid': int(tid),
244
- 'ph': 'X',
245
- 'ts': str(ts),
246
- 'dur': float(dur) * ms_to_us,
247
- 'args':
248
- {'type': op_list[1]}
249
- })
250
- break
251
-
252
- return process_list + thread_list + new_timeline, scope_data
253
-
254
- except (IOError, OSError, json.JSONDecodeError) as err:
255
- logger.error('parse_cann_data failed! please theck. detail: %s', err)
256
- return []
257
-
258
- def _parse_fwk_device_data(self, cann_kernel_data):
259
- """
260
- Get framework op range trace data, flow events and hardware kernel events
261
- """
262
- fwkcann_parser = FwkCANNParser(self._source_path, cann_kernel_data, self._rank_id, self._step_list)
263
- fwk_link_data = fwkcann_parser.generate_trace_data()
264
- kernels = fwkcann_parser.kernels
265
- scope_data = fwkcann_parser.scope_data_with_flow + fwkcann_parser.scope_data_without_flow
266
- result = {"trace_data": fwk_link_data, "kernels": kernels, "scope_data": scope_data}
267
- return result
268
-
269
- def _parse_msprof_data(self, file_list):
270
- """
271
- parse msprof.json file
272
- :param file_list:
273
- :return:
274
- """
275
- flags = os.O_RDONLY
276
- raw_data = []
277
- try:
278
- for file_path in file_list:
279
- with os.fdopen(os.open(file_path, flags, 0o400), 'r') as fr:
280
- raw_data.extend(json.load(fr))
281
-
282
- if not raw_data:
283
- logger.error('Could not found msprof data in file list: %s .', file_list)
284
-
285
- return raw_data
286
-
287
- except (IOError, OSError, json.JSONDecodeError) as err:
288
- logger.error('_parse_msprof_data failed! please theck. detail: %s', err)
289
- return []
290
-
291
- def _parse_ascend_hardware_scope(self, msprof_timeline):
292
- """
293
- parse ascend hardware scope
294
- """
295
- scope_data = []
296
- try:
297
-
298
- for event in msprof_timeline:
299
- if event.get("ph") == "M":
300
- continue
301
-
302
- op_full_name = event.get('name')
303
- if op_full_name and op_full_name.startswith(Constant.TOP_SCOPE_NAMES):
304
- ts = Decimal(event.get('ts')).quantize(Decimal('0.000'))
305
- te = ts + Decimal(event.get('dur')).quantize(Decimal('0.000'))
306
- scope_data.append((op_full_name.split('/')[:-1], ts, te))
307
-
308
- return scope_data
309
-
310
- except (IOError, OSError, json.JSONDecodeError) as err:
311
- logger.error('_parse_ascend_hardware_scope failed! please theck. detail: %s', err)
312
- return []
313
-
314
- def _parse_scope_info(self, scope_data):
315
- """Parse scope info of op"""
316
- if not scope_data:
317
- return []
318
- new_pid = Constant.SCOPE_LAYLER
319
- scope_data.sort(key=lambda x: x[1])
320
- process_list = [
321
- {"name": "process_name",
322
- "pid": new_pid,
323
- "args": {
324
- "name": f"Scope Layer"
325
- },
326
- "ph": "M"},
327
- {"name": "process_sort_index",
328
- "pid": new_pid,
329
- "args": {"sort_index": new_pid},
330
- "ph": "M"}
331
- ]
332
-
333
- new_events = []
334
- layer_stack = []
335
- for layer_name in scope_data[0][0]:
336
- layer_stack.append([layer_name, scope_data[0][1], scope_data[0][2]])
337
-
338
- for op in scope_data[1:]:
339
- if op[1] < layer_stack[0][2]:
340
- # 并行算子只保留前面的
341
- continue
342
- flag = True # 判断上层是否合并, 上层不合并下层也不合并
343
- for layer_depth, layer_name in enumerate(op[0]):
344
- if layer_depth >= len(layer_stack):
345
- layer_stack.append([layer_name, op[1], op[2]])
346
- else:
347
- if layer_stack[layer_depth][0] == layer_name and flag:
348
- layer_stack[layer_depth][2] = op[2] # 合并
349
- else:
350
- ts = layer_stack[layer_depth][1]
351
- new_events.append({
352
- "name": layer_stack[layer_depth][0],
353
- "pid": new_pid,
354
- "tid": layer_depth,
355
- "ph": "X",
356
- "ts": str(ts),
357
- "dur": float(layer_stack[layer_depth][2] - layer_stack[layer_depth][1])
358
- })
359
- layer_stack[layer_depth] = [layer_name, op[1], op[2]]
360
- flag = False
361
-
362
- thread_list = []
363
- for index, layer in enumerate(layer_stack):
364
- thread_list.extend([{
365
- "name": "thread_name",
366
- "pid": new_pid,
367
- "tid": index,
368
- "args": {
369
- "name": f"layer{index}"
370
- },
371
- "ph": "M"
372
- }, {
373
- "name": "thread_sort_index",
374
- "pid": new_pid,
375
- "tid": index,
376
- "args": {"sort_index": index},
377
- "ph": "M"
378
- }])
379
- if layer:
380
- ts = layer[1]
381
- new_events.append({
382
- "name": layer[0],
383
- "pid": new_pid,
384
- "tid": index,
385
- "ph": "X",
386
- "ts": str(ts),
387
- "dur": float(layer[2] - layer[1])
388
- })
389
-
390
- return process_list + thread_list + new_events
391
-
392
- def _produce_two_separated_timeline(self, timeline, op_name):
393
- """Produce two separated timeline based on op_name."""
394
- timeline_include_op_name = []
395
- timeline_exclude_op_name = []
396
- for time_item in timeline:
397
- if op_name in time_item[self._op_name_idx]:
398
- timeline_include_op_name.append(time_item)
399
- else:
400
- timeline_exclude_op_name.append(time_item)
401
- return timeline_include_op_name, timeline_exclude_op_name
402
-
403
- def _get_cluster_timeline(self, aicore_info, comm_info, step_info):
404
- """
405
- Analyse the cluster communication and computation data, and write result to file.
406
-
407
- To analyse the cluster performance bottleneck based on timeline, define the time of a training
408
- step as "t_total", propose five metrics as follows:
409
- 1) The time that "receive" operators not overlapped by others(t1)
410
- 2) The time that is consumed inside the stage(t_total - t1)
411
- 3) The time that "communication" operators not overlapped by others(t2)
412
- 4) The time that consumed by computation(t_total - t2)
413
- 5) The time that "collective communication" operators not overlapped by others(t3)
414
- In pipeline parallel mode, we can locate slow stage based on t_total - t1. Inside each stage,
415
- we can locate slow card based on t_total - t2. The value of t1 indicates the degree that
416
- communication time between stages slow down the training. The value of t3 indicates the degree
417
- that communication inside each stage slow down the training.
418
- """
419
- is_pipeline_parallel = False
420
- comm_timeline = self._get_merged_time_list(
421
- comm_info, display_name="communication"
422
- )
423
- aicore_timeline = self._get_merged_time_list(
424
- aicore_info, get_interval_time=True
425
- )
426
- # Consider if the overlap will be 0 or not.
427
- comm_not_overlapped_timeline = self._get_intersection_time(
428
- aicore_timeline[0], comm_timeline[0]
429
- )
430
-
431
- # Process receive part.
432
- all_timeline = aicore_info + comm_info
433
- all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
434
- receive_timeline = self._produce_two_separated_timeline(
435
- all_timeline, "Receive-op"
436
- )
437
- if receive_timeline[0]:
438
- is_pipeline_parallel = True
439
- receive_op_merged_timeline = self._get_merged_time_list(receive_timeline[0])[0]
440
- timeline_exclude_receive_op_interval = self._get_merged_time_list(
441
- receive_timeline[1], get_interval_time=True
442
- )[0]
443
- receive_op_not_overlapped_timeline = self._get_intersection_time(
444
- timeline_exclude_receive_op_interval, receive_op_merged_timeline
445
- )
446
-
447
- # Process collective communication part.
448
- collective_comm_timeline = self._produce_two_separated_timeline(
449
- comm_info, "Receive-op"
450
- )[-1]
451
-
452
- collective_comm_not_overlapped_timeline = self._get_intersection_time(
453
- aicore_timeline[0], self._get_merged_time_list(collective_comm_timeline)[0]
454
- )
455
-
456
- self._parse_cluster_metrices(step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline,
457
- collective_comm_not_overlapped_timeline, is_pipeline_parallel)
458
-
459
- def _parse_cluster_metrices(self, step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline,
460
- collective_comm_not_overlapped_timeline, is_pipeline_parallel):
461
- """Write the cluster metrices"""
462
- # Compute these five metrics mentioned above per step.
463
- recieve_alone_time = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
464
- time_info = {"stage_time": [], "computation_time": []}
465
- comm_alone_time = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
466
- collective_comm_alone_time = self._compute_time_inside_step(
467
- collective_comm_not_overlapped_timeline, step_info
468
- )
469
- step_num = len(step_info)
470
- for step in range(step_num):
471
- try:
472
- if is_pipeline_parallel:
473
- time_info.get("stage_time").append(step_info[step][self._duration_idx] - recieve_alone_time[step])
474
- except IndexError as err:
475
- logger.error(err)
476
-
477
- try:
478
- time_info.get("computation_time").append(step_info[step][self._duration_idx] - comm_alone_time[step])
479
- except IndexError as err:
480
- logger.error(err)
481
-
482
- metrices_per_step_list = [
483
- time_info.get("computation_time"), comm_alone_time, time_info.get("stage_time"),
484
- recieve_alone_time, collective_comm_alone_time
485
- ]
486
- if step_num > 1:
487
- for metric in metrices_per_step_list:
488
- metric.append(sum(metric[1:]) / (step_num - 1))
489
-
490
- try:
491
- self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Ascend", self._rank_id)
492
- except (IOError, OSError) as err:
493
- logger.warning(err)
494
- raise ProfilerIOException from err
495
-
496
- def _compute_time_inside_step(self, metric_timeline, step_time_list):
497
- """Compute per step time of metric_timeline."""
498
- per_step_time_list = [0 for _ in range(len(step_time_list))]
499
- step = 0
500
- step_end_time = step_time_list[step][self._start_time_idx] + step_time_list[step][self._duration_idx]
501
- for time_item in metric_timeline:
502
- start_time = time_item[self._start_time_idx]
503
- if start_time > step_end_time:
504
- step += 1
505
- if step >= len(step_time_list):
506
- logger.warning("Compute profiler compute_time_inside_step time, "
507
- "find the data length is more than step count, "
508
- "maybe current graph has multi sub graph, skip the last data.")
509
- break
510
- step_end_time = step_time_list[step][self._start_time_idx] + step_time_list[step][self._duration_idx]
511
- per_step_time_list[step] += time_item[self._duration_idx]
512
-
513
- return per_step_time_list
514
-
515
- def _get_intersection_time(self, first_time_list, second_time_list,
516
- display_name="communication_not_overlapped"):
517
- """Get intersection time of two time list."""
518
- first_list_idx, second_list_idx = 0, 0
519
- first_list_len = len(first_time_list)
520
- second_list_len = len(second_time_list)
521
- intersection_segment_display_list = []
522
-
523
- while first_list_idx < first_list_len and \
524
- second_list_idx < second_list_len:
525
- intersection_start = max(
526
- first_time_list[first_list_idx][self._start_time_idx],
527
- second_time_list[second_list_idx][self._start_time_idx]
528
- )
529
- intersection_end = min(
530
- first_time_list[first_list_idx][self._duration_idx],
531
- second_time_list[second_list_idx][self._duration_idx]
532
- )
533
- if intersection_start < intersection_end:
534
- tid = self._tid_dict.get(display_name, [0, 0])
535
- intersection_segment_display_list.append(
536
- [display_name, tid[0],
537
- intersection_start, intersection_end - intersection_start, tid[1]]
538
- )
539
- if first_time_list[first_list_idx][self._duration_idx] >= \
540
- second_time_list[second_list_idx][self._duration_idx]:
541
- second_list_idx += 1
542
- else:
543
- first_list_idx += 1
544
-
545
- return intersection_segment_display_list