mindspore 2.6.0rc1__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (384) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +1 -1
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +40 -9
  7. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  8. mindspore/_extends/optimize/cell_utils.py +96 -0
  9. mindspore/_extends/parse/__init__.py +2 -2
  10. mindspore/_extends/parse/compile_config.py +44 -22
  11. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
  12. mindspore/_extends/parse/parser.py +37 -62
  13. mindspore/_extends/parse/resources.py +39 -0
  14. mindspore/_extends/parse/standard_method.py +43 -13
  15. mindspore/_extends/parse/trope.py +8 -1
  16. mindspore/_extends/pijit/__init__.py +1 -2
  17. mindspore/amp.py +4 -4
  18. mindspore/avcodec-59.dll +0 -0
  19. mindspore/avdevice-59.dll +0 -0
  20. mindspore/avfilter-8.dll +0 -0
  21. mindspore/avformat-59.dll +0 -0
  22. mindspore/avutil-57.dll +0 -0
  23. mindspore/boost/adasum.py +1 -1
  24. mindspore/boost/boost_cell_wrapper.py +4 -4
  25. mindspore/common/__init__.py +27 -2
  26. mindspore/common/_grad_function.py +2 -1
  27. mindspore/common/_pijit_context.py +28 -7
  28. mindspore/common/_stub_tensor.py +1 -209
  29. mindspore/common/_tensor_cpp_method.py +1 -1
  30. mindspore/common/_tensor_docs.py +77 -16
  31. mindspore/common/api.py +238 -113
  32. mindspore/common/dtype.py +21 -11
  33. mindspore/common/dump.py +10 -15
  34. mindspore/common/generator.py +5 -3
  35. mindspore/common/hook_handle.py +11 -2
  36. mindspore/common/jit_config.py +1 -1
  37. mindspore/common/jit_trace.py +84 -105
  38. mindspore/common/parameter.py +26 -12
  39. mindspore/common/recompute.py +3 -3
  40. mindspore/common/sparse_tensor.py +0 -3
  41. mindspore/common/symbol.py +0 -1
  42. mindspore/common/tensor.py +81 -81
  43. mindspore/communication/_comm_helper.py +46 -4
  44. mindspore/communication/management.py +79 -7
  45. mindspore/context.py +58 -40
  46. mindspore/dataset/core/config.py +3 -3
  47. mindspore/dataset/engine/datasets.py +20 -7
  48. mindspore/dataset/engine/datasets_user_defined.py +33 -3
  49. mindspore/dataset/engine/iterators.py +2 -2
  50. mindspore/dataset/engine/obs/config_loader.py +2 -2
  51. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  52. mindspore/dataset/transforms/py_transforms.py +7 -3
  53. mindspore/dataset/transforms/transforms.py +7 -3
  54. mindspore/dataset/vision/validators.py +1 -0
  55. mindspore/device_context/ascend/device.py +1 -1
  56. mindspore/device_context/gpu/__init__.py +2 -2
  57. mindspore/device_context/gpu/device.py +1 -1
  58. mindspore/device_context/gpu/op_precision.py +4 -2
  59. mindspore/device_context/gpu/op_tuning.py +6 -3
  60. mindspore/device_manager.py +16 -9
  61. mindspore/dnnl.dll +0 -0
  62. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
  63. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  64. mindspore/experimental/optim/adadelta.py +13 -20
  65. mindspore/experimental/optim/adagrad.py +15 -22
  66. mindspore/experimental/optim/adam.py +17 -24
  67. mindspore/experimental/optim/adamax.py +14 -22
  68. mindspore/experimental/optim/adamw.py +28 -34
  69. mindspore/experimental/optim/asgd.py +15 -25
  70. mindspore/experimental/optim/lr_scheduler.py +27 -45
  71. mindspore/experimental/optim/nadam.py +14 -24
  72. mindspore/experimental/optim/optimizer.py +13 -23
  73. mindspore/experimental/optim/radam.py +18 -24
  74. mindspore/experimental/optim/rmsprop.py +14 -25
  75. mindspore/experimental/optim/rprop.py +15 -26
  76. mindspore/experimental/optim/sgd.py +9 -19
  77. mindspore/hal/__init__.py +4 -4
  78. mindspore/hal/contiguous_tensors_handle.py +2 -2
  79. mindspore/hal/memory.py +27 -7
  80. mindspore/include/api/cell.h +37 -1
  81. mindspore/include/api/delegate.h +10 -0
  82. mindspore/include/api/model.h +3 -0
  83. mindspore/include/api/types.h +2 -2
  84. mindspore/include/c_api/model_c.h +0 -58
  85. mindspore/include/c_api/tensor_c.h +0 -26
  86. mindspore/include/dataset/vision_ascend.h +1 -1
  87. mindspore/jpeg62.dll +0 -0
  88. mindspore/mindrecord/tools/cifar10.py +60 -11
  89. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  90. mindspore/mindspore_backend_common.dll +0 -0
  91. mindspore/mindspore_backend_manager.dll +0 -0
  92. mindspore/mindspore_common.dll +0 -0
  93. mindspore/mindspore_core.dll +0 -0
  94. mindspore/mindspore_cpu_res_manager.dll +0 -0
  95. mindspore/mindspore_dump.dll +0 -0
  96. mindspore/mindspore_frontend.dll +0 -0
  97. mindspore/mindspore_glog.dll +0 -0
  98. mindspore/mindspore_memory_pool.dll +0 -0
  99. mindspore/mindspore_ms_backend.dll +0 -0
  100. mindspore/mindspore_ops.dll +0 -0
  101. mindspore/mindspore_ops_host.dll +0 -0
  102. mindspore/mindspore_ops_kernel_common.dll +0 -0
  103. mindspore/mindspore_profiler.dll +0 -0
  104. mindspore/mindspore_pyboost.dll +0 -0
  105. mindspore/mindspore_pynative.dll +0 -0
  106. mindspore/mindspore_res_manager.dll +0 -0
  107. mindspore/mindspore_runtime_pipeline.dll +0 -0
  108. mindspore/mint/__init__.py +6 -46
  109. mindspore/mint/distributed/__init__.py +1 -0
  110. mindspore/mint/distributed/distributed.py +212 -9
  111. mindspore/mint/nn/__init__.py +1 -1
  112. mindspore/mint/nn/functional.py +53 -6
  113. mindspore/mint/nn/layer/_functions.py +164 -294
  114. mindspore/mint/nn/layer/activation.py +8 -6
  115. mindspore/mint/nn/layer/conv.py +137 -101
  116. mindspore/mint/nn/layer/normalization.py +8 -22
  117. mindspore/mint/optim/adam.py +19 -18
  118. mindspore/mint/optim/adamw.py +14 -8
  119. mindspore/mint/optim/sgd.py +5 -5
  120. mindspore/nn/cell.py +328 -502
  121. mindspore/nn/grad/cell_grad.py +11 -12
  122. mindspore/nn/layer/activation.py +32 -34
  123. mindspore/nn/layer/basic.py +67 -64
  124. mindspore/nn/layer/channel_shuffle.py +4 -4
  125. mindspore/nn/layer/combined.py +4 -2
  126. mindspore/nn/layer/conv.py +117 -110
  127. mindspore/nn/layer/dense.py +9 -7
  128. mindspore/nn/layer/embedding.py +50 -52
  129. mindspore/nn/layer/image.py +37 -39
  130. mindspore/nn/layer/math.py +111 -112
  131. mindspore/nn/layer/normalization.py +56 -44
  132. mindspore/nn/layer/pooling.py +58 -63
  133. mindspore/nn/layer/rnn_cells.py +33 -33
  134. mindspore/nn/layer/rnns.py +56 -56
  135. mindspore/nn/layer/thor_layer.py +74 -73
  136. mindspore/nn/layer/transformer.py +11 -1
  137. mindspore/nn/learning_rate_schedule.py +20 -20
  138. mindspore/nn/loss/loss.py +79 -81
  139. mindspore/nn/optim/adam.py +3 -3
  140. mindspore/nn/optim/adasum.py +2 -2
  141. mindspore/nn/optim/asgd.py +2 -0
  142. mindspore/nn/optim/optimizer.py +1 -1
  143. mindspore/nn/optim/thor.py +2 -2
  144. mindspore/nn/probability/distribution/exponential.py +2 -1
  145. mindspore/nn/probability/distribution/poisson.py +2 -1
  146. mindspore/nn/sparse/sparse.py +3 -3
  147. mindspore/nn/wrap/cell_wrapper.py +34 -37
  148. mindspore/nn/wrap/grad_reducer.py +37 -37
  149. mindspore/nn/wrap/loss_scale.py +72 -74
  150. mindspore/numpy/array_creations.py +5 -5
  151. mindspore/numpy/fft.py +1 -1
  152. mindspore/numpy/math_ops.py +5 -5
  153. mindspore/opencv_core452.dll +0 -0
  154. mindspore/opencv_imgcodecs452.dll +0 -0
  155. mindspore/opencv_imgproc452.dll +0 -0
  156. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  157. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  158. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  159. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  160. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
  161. mindspore/ops/auto_generate/gen_extend_func.py +23 -141
  162. mindspore/ops/auto_generate/gen_ops_def.py +727 -321
  163. mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
  164. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  165. mindspore/ops/composite/__init__.py +10 -0
  166. mindspore/ops/composite/base.py +8 -4
  167. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  168. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  169. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  170. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  171. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  172. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  173. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  174. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  175. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  176. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  177. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  178. mindspore/ops/function/__init__.py +3 -1
  179. mindspore/ops/function/_add_attr_func.py +11 -6
  180. mindspore/ops/function/array_func.py +9 -96
  181. mindspore/ops/function/debug_func.py +4 -3
  182. mindspore/ops/function/grad/grad_func.py +1 -1
  183. mindspore/ops/function/math_func.py +33 -540
  184. mindspore/ops/function/nn_func.py +28 -74
  185. mindspore/ops/function/other_func.py +4 -1
  186. mindspore/ops/function/random_func.py +44 -5
  187. mindspore/ops/function/vmap_func.py +2 -1
  188. mindspore/ops/functional.py +2 -3
  189. mindspore/ops/functional_overload.py +571 -6
  190. mindspore/ops/op_info_register.py +21 -0
  191. mindspore/ops/operations/__init__.py +16 -11
  192. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  193. mindspore/ops/operations/_inner_ops.py +3 -6
  194. mindspore/ops/operations/_sequence_ops.py +1 -1
  195. mindspore/ops/operations/array_ops.py +2 -2
  196. mindspore/ops/operations/comm_ops.py +185 -26
  197. mindspore/ops/operations/custom_ops.py +294 -174
  198. mindspore/ops/operations/debug_ops.py +59 -4
  199. mindspore/ops/operations/image_ops.py +13 -13
  200. mindspore/ops/operations/manually_defined/ops_def.py +15 -16
  201. mindspore/ops/operations/math_ops.py +3 -4
  202. mindspore/ops/operations/nn_ops.py +7 -39
  203. mindspore/ops/primitive.py +6 -10
  204. mindspore/ops/tensor_method.py +47 -8
  205. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  206. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  207. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  208. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  209. mindspore/ops_generate/common/base_generator.py +14 -0
  210. mindspore/ops_generate/common/gen_constants.py +8 -3
  211. mindspore/ops_generate/common/gen_utils.py +0 -19
  212. mindspore/ops_generate/common/op_proto.py +11 -4
  213. mindspore/ops_generate/common/template.py +88 -11
  214. mindspore/ops_generate/gen_ops.py +1 -1
  215. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  216. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  217. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  218. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  219. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  220. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  221. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  222. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
  223. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  224. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  225. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  226. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  227. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  228. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  229. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  230. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  231. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  232. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  233. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  234. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  235. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  236. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  237. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  238. mindspore/parallel/_auto_parallel_context.py +11 -8
  239. mindspore/parallel/_cell_wrapper.py +113 -45
  240. mindspore/parallel/_parallel_serialization.py +1 -1
  241. mindspore/parallel/_ps_context.py +4 -6
  242. mindspore/parallel/_tensor.py +167 -12
  243. mindspore/parallel/_transformer/moe.py +1 -1
  244. mindspore/parallel/_transformer/transformer.py +13 -8
  245. mindspore/parallel/auto_parallel.py +14 -7
  246. mindspore/parallel/checkpoint_convert.py +3 -3
  247. mindspore/parallel/checkpoint_transform.py +11 -7
  248. mindspore/parallel/cluster/process_entity/_api.py +84 -48
  249. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  250. mindspore/parallel/cluster/run.py +43 -4
  251. mindspore/parallel/function/__init__.py +8 -1
  252. mindspore/parallel/function/reshard_func.py +6 -7
  253. mindspore/parallel/nn/__init__.py +15 -2
  254. mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
  255. mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
  256. mindspore/parallel/shard.py +3 -4
  257. mindspore/parallel/transform_safetensors.py +463 -174
  258. mindspore/profiler/__init__.py +2 -1
  259. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  260. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  261. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  262. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  263. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  264. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  265. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  266. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  267. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  268. mindspore/profiler/analysis/task_manager.py +1 -1
  269. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  270. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  271. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
  272. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  273. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  274. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  275. mindspore/profiler/common/constant.py +16 -0
  276. mindspore/profiler/common/profiler_context.py +25 -27
  277. mindspore/profiler/common/profiler_info.py +0 -16
  278. mindspore/profiler/common/profiler_op_analyse.py +235 -0
  279. mindspore/profiler/common/profiler_output_path.py +23 -8
  280. mindspore/profiler/common/profiler_parameters.py +128 -35
  281. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  282. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  283. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  284. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  285. mindspore/profiler/dynamic_profiler.py +305 -314
  286. mindspore/profiler/envprofiler.py +12 -7
  287. mindspore/profiler/experimental_config.py +96 -6
  288. mindspore/profiler/mstx.py +33 -12
  289. mindspore/profiler/platform/__init__.py +2 -3
  290. mindspore/profiler/platform/npu_profiler.py +29 -19
  291. mindspore/profiler/profiler.py +35 -19
  292. mindspore/profiler/profiler_action_controller.py +64 -76
  293. mindspore/profiler/schedule.py +10 -4
  294. mindspore/rewrite/common/config.py +1 -0
  295. mindspore/rewrite/common/namer.py +1 -0
  296. mindspore/rewrite/common/namespace.py +1 -0
  297. mindspore/rewrite/node/node.py +31 -11
  298. mindspore/rewrite/parsers/assign_parser.py +1 -1
  299. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  300. mindspore/run_check/_check_version.py +7 -10
  301. mindspore/runtime/__init__.py +5 -5
  302. mindspore/runtime/event.py +10 -4
  303. mindspore/runtime/executor.py +60 -45
  304. mindspore/runtime/memory.py +30 -32
  305. mindspore/runtime/thread_bind_core.py +298 -164
  306. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  307. mindspore/swresample-4.dll +0 -0
  308. mindspore/swscale-6.dll +0 -0
  309. mindspore/tinyxml2.dll +0 -0
  310. mindspore/train/_utils.py +14 -4
  311. mindspore/train/amp.py +43 -20
  312. mindspore/train/callback/__init__.py +5 -5
  313. mindspore/train/callback/_checkpoint.py +3 -6
  314. mindspore/train/callback/_flops_collector.py +1 -1
  315. mindspore/train/callback/_landscape.py +0 -1
  316. mindspore/train/callback/_train_fault_tolerance.py +97 -16
  317. mindspore/train/data_sink.py +11 -2
  318. mindspore/train/dataset_helper.py +9 -0
  319. mindspore/train/model.py +135 -55
  320. mindspore/train/serialization.py +133 -111
  321. mindspore/train/summary/summary_record.py +13 -2
  322. mindspore/turbojpeg.dll +0 -0
  323. mindspore/utils/__init__.py +3 -2
  324. mindspore/utils/dryrun.py +0 -6
  325. mindspore/utils/runtime_execution_order_check.py +163 -77
  326. mindspore/utils/sdc_detect.py +68 -0
  327. mindspore/utils/utils.py +6 -9
  328. mindspore/version.py +1 -1
  329. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
  330. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +333 -371
  331. mindspore/_deprecated/jit.py +0 -198
  332. mindspore/experimental/es/__init__.py +0 -22
  333. mindspore/experimental/es/embedding_service.py +0 -891
  334. mindspore/experimental/es/embedding_service_layer.py +0 -581
  335. mindspore/profiler/parser/__init__.py +0 -14
  336. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  337. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  338. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  339. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  340. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  341. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  342. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  343. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  344. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  345. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  346. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  347. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  348. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  349. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  350. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  351. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  352. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  353. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  354. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  355. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  356. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  357. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  358. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  359. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  360. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  361. mindspore/profiler/parser/container.py +0 -229
  362. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  363. mindspore/profiler/parser/flops_parser.py +0 -531
  364. mindspore/profiler/parser/framework_enum.py +0 -111
  365. mindspore/profiler/parser/framework_parser.py +0 -464
  366. mindspore/profiler/parser/framework_struct.py +0 -61
  367. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  368. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  369. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  370. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  371. mindspore/profiler/parser/hccl_parser.py +0 -573
  372. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  373. mindspore/profiler/parser/integrator.py +0 -526
  374. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  375. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  376. mindspore/profiler/parser/minddata_parser.py +0 -186
  377. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  378. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  379. mindspore/profiler/parser/optime_parser.py +0 -250
  380. mindspore/profiler/parser/profiler_info.py +0 -213
  381. mindspore/profiler/parser/step_trace_parser.py +0 -666
  382. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
  383. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  384. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,483 +0,0 @@
1
- # Copyright 2022 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """The integrator for integrating parsed profiling files."""
16
- import os
17
- import stat
18
- import csv
19
- import json
20
-
21
- from mindspore import context
22
- from mindspore import log as logger
23
- from mindspore.context import get_auto_parallel_context
24
- from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
25
- from mindspore.profiler.parser.integrator import DeviceTarget
26
- from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
27
-
28
- SIZE_LIMIT_DEFAULT = 20 * 1024 * 1024 # 20MB
29
-
30
-
31
- class BaseTimelineGenerator:
32
- """
33
- Analyse timeline data from file.
34
- """
35
- # AI Core Op pid is device_id
36
- _AI_CPU_PID = 9000
37
- _COMMUNICATION_OP_PID = 10000
38
- _HOST_CPU_PID = 11000
39
- _OP_OVERLAP_PID = 12000
40
-
41
- _OP_GPU_ACTIVITY_PID = 13000
42
-
43
- _RECEIVE_ALONE = 7997
44
- _ALLREDUCE_ALONE = 7998
45
- _MERGED_COMPUTATION_TID = 7999
46
- _PURE_COMMUNICATION_TID = 8000
47
- _MERGED_COMMUNICATION_TID = 8001
48
- _FREE_TIME_TID = 8002
49
- _STEPS_TID = 100000
50
- _SCOPE_NAME_TID = 100001
51
- _GPU_OP_TID = 100002
52
- _HOST_CPU_OP_TID = 100003
53
- _SINGLE_TID = 0
54
-
55
- _STEPS_SORT_INDEX = -4
56
-
57
- _output_timeline_data_file_path = 'output_timeline_data_{}.txt'
58
- _timeline_meta = []
59
- _format_meta_data_list = []
60
- _thread_processed_list = []
61
-
62
- _map_tid_name_to_int = {
63
- "Steps": (-4, _STEPS_TID),
64
- "Scope Name": (-3, _SCOPE_NAME_TID),
65
- "GpuOps": (-2, _GPU_OP_TID),
66
- "HostCpuOps": (-1, _HOST_CPU_OP_TID)
67
- }
68
- _timeline_summary = {
69
- 'total_time': 0,
70
- 'num_of_streams': 0,
71
- 'num_of_ops': 0,
72
- 'op_exe_times': 0,
73
- 'max_scope_name_num': 0,
74
- }
75
- _op_name_idx, _tid_idx, _start_time_idx, _duration_idx = 0, 1, 2, 3
76
- _max_scope_name_num = 0
77
- _host_cpu_op_label = 'Host CPU OP'
78
- _gpu_op_label = "GPU Op"
79
- _ascend_op_label = "Ascend Op"
80
- _aicore_op_label = "AICORE OP"
81
- _aicpu_op_label = "AICPU OP"
82
-
83
- _device_id = 0
84
- _rank_size = 1
85
- _profiling_dir = ""
86
- _timeline_summary_filename = ""
87
- _display_filename = ""
88
- _op_name_list = []
89
- _device_target = DeviceTarget.ASCEND.value
90
- _model = context.GRAPH_MODE
91
- _framework_dir = "FRAMEWORK"
92
- _op_range_name = "op_range_{}"
93
-
94
- _col_names = ['op_name', 'stream_id', 'start_time', 'duration']
95
-
96
- def __init__(self, device_target, model):
97
- self._tid_dict = {
98
- "computation_op": (self._MERGED_COMPUTATION_TID, self._OP_OVERLAP_PID),
99
- "communication_not_overlapped": (self._PURE_COMMUNICATION_TID, self._OP_OVERLAP_PID),
100
- "communication": (self._MERGED_COMMUNICATION_TID, self._OP_OVERLAP_PID),
101
- "free_time": (self._FREE_TIME_TID, self._OP_OVERLAP_PID)
102
- }
103
- self._device_target = str(device_target).lower()
104
- self._model = model
105
- self._step_start_op_name = ""
106
- self._step_end_op_name = ""
107
- self._kernel_events = []
108
- self._pretty = False
109
-
110
- def get_kernel_event_list(self):
111
- return self._kernel_events
112
-
113
- @property
114
- def indent(self):
115
- indent = 1 if self._pretty else None
116
- return indent
117
-
118
- @staticmethod
119
- def get_parallel_context():
120
- """Get parallel context."""
121
- try:
122
- parallel_mode, stage_num = get_auto_parallel_context("parallel_mode"), get_auto_parallel_context(
123
- "pipeline_stages")
124
- except RuntimeError:
125
- logger.warning("[profiler] the feature of cluster bottleneck analyse "
126
- "is not supported in offline parse mode.")
127
- parallel_mode = "data_parallel"
128
- stage_num = 1
129
- if stage_num > 1:
130
- parallel_mode = "pipeline-parallel"
131
- elif parallel_mode != "data_parallel":
132
- parallel_mode = "model-parallel"
133
- else:
134
- parallel_mode = "data-parallel"
135
- return parallel_mode, stage_num
136
-
137
- @staticmethod
138
- def _update_num_of_streams(timeline, stream_count_dict):
139
- """Update number of streams."""
140
- stream_id = timeline[1]
141
- if stream_id in ["Steps", "Scope Name"]:
142
- return
143
- if stream_id not in stream_count_dict.keys():
144
- stream_count_dict[stream_id] = 1
145
- else:
146
- stream_count_dict[stream_id] += 1
147
-
148
- def get_thread_label_name(self):
149
- """Get process and thread config."""
150
- device_process_label = self._get_device_process_label()
151
- return [
152
- {"name": "process_labels", "ph": "M", "pid": f'2{self._device_id}',
153
- "args": {"labels": device_process_label}},
154
- {"name": "process_labels", "ph": "M", "pid": self._AI_CPU_PID, "args": {"labels": self._aicpu_op_label}},
155
- {"name": "process_labels", "ph": "M", "pid": self._COMMUNICATION_OP_PID,
156
- "args": {"labels": "Communication Op"}},
157
- {"name": "process_labels", "ph": "M", "pid": self._HOST_CPU_PID,
158
- "args": {"labels": self._host_cpu_op_label}},
159
- {"name": "process_labels", "ph": "M", "pid": self._OP_OVERLAP_PID,
160
- "args": {"labels": "Op Overlap Analyse"}},
161
- {"name": "process_labels", "ph": "M", "pid": self._OP_GPU_ACTIVITY_PID,
162
- "args": {"labels": "Activity Op"}},
163
-
164
- {"name": "process_sort_index", "ph": "M", "pid": f'2{self._device_id}', "args": {"sort_index": 2}},
165
- {"name": "process_sort_index", "ph": "M", "pid": self._AI_CPU_PID, "args": {"sort_index": 10}},
166
- {"name": "process_sort_index", "ph": "M", "pid": self._COMMUNICATION_OP_PID, "args": {"sort_index": 20}},
167
- {"name": "process_sort_index", "ph": "M", "pid": self._HOST_CPU_PID, "args": {"sort_index": 30}},
168
- {"name": "process_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "args": {"sort_index": 40}},
169
-
170
- {"name": "thread_name", "ph": "M", "pid": self._HOST_CPU_PID, "tid": self._HOST_CPU_OP_TID,
171
- "args": {"name": "Host CPU Op"}},
172
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMPUTATION_TID,
173
- "args": {"name": "Merged Computation Op"}},
174
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._PURE_COMMUNICATION_TID,
175
- "args": {"name": "Pure Communication Op"}},
176
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMMUNICATION_TID,
177
- "args": {"name": "Merged Communication Op"}},
178
- {"name": "thread_name", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._FREE_TIME_TID,
179
- "args": {"name": "Free Time"}},
180
- {"name": "thread_name", "ph": "M", "pid": f'2{self._device_id}', "tid": self._STEPS_TID,
181
- "args": {"name": "Steps"}},
182
- {"name": "thread_name", "ph": "M", "pid": f'2{self._device_id}', "tid": self._SINGLE_TID,
183
- "args": {"name": "Ops"}},
184
-
185
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMPUTATION_TID,
186
- "args": {"sort_index": self._MERGED_COMPUTATION_TID}},
187
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._PURE_COMMUNICATION_TID,
188
- "args": {"sort_index": self._PURE_COMMUNICATION_TID}},
189
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._MERGED_COMMUNICATION_TID,
190
- "args": {"sort_index": self._MERGED_COMMUNICATION_TID}},
191
- {"name": "thread_sort_index", "ph": "M", "pid": self._OP_OVERLAP_PID, "tid": self._FREE_TIME_TID,
192
- "args": {"sort_index": self._FREE_TIME_TID}},
193
- {"name": "thread_sort_index", "ph": "M", "pid": f'2{self._device_id}', "tid": self._STEPS_TID,
194
- "args": {"sort_index": self._STEPS_SORT_INDEX}},
195
- ]
196
-
197
- def write_timeline(self):
198
- """Load data according to the parsed profiling files."""
199
- # Write timeline to file.
200
- logger.info('Writing timeline file...')
201
- timeline_meta = self.write_timeline_to_json_by_limitation()
202
- logger.info('Finished file writing!')
203
- return timeline_meta
204
-
205
- def write_timeline_to_json_by_limitation(self):
206
- """Write timeline to json by limitation."""
207
- display_file_path = os.path.join(
208
- self._profiling_dir,
209
- self._display_filename
210
- )
211
- display_file_path = validate_and_normalize_path(display_file_path)
212
-
213
- try:
214
- timeline_data = self.get_thread_label_name()
215
- for data in self._timeline_meta:
216
- timeline_data.append(data)
217
- if "scope_level" in data.keys():
218
- self._max_scope_name_num = max(
219
- self._max_scope_name_num, data["scope_level"] + 1)
220
-
221
- with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
222
- json.dump(timeline_data, json_file, indent=self.indent)
223
- os.chmod(display_file_path, stat.S_IREAD | stat.S_IWRITE)
224
- return self._timeline_meta
225
- except (IOError, OSError) as err:
226
- logger.critical('Error occurred when write timeline display file: %s', err)
227
- raise ProfilerIOException() from err
228
-
229
- def write_timeline_summary(self):
230
- """Write timeline summary to json."""
231
- timeline_summary_file_path = os.path.join(
232
- self._profiling_dir,
233
- self._timeline_summary_filename
234
- )
235
-
236
- timeline_summary_file_path = validate_and_normalize_path(timeline_summary_file_path)
237
-
238
- try:
239
- with os.fdopen(os.open(timeline_summary_file_path,
240
- os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
241
- json.dump(self._timeline_summary, json_file, indent=self.indent)
242
- except (IOError, OSError) as err:
243
- logger.critical('Error occurred when write timeline summary file: %s', err)
244
- raise ProfilerIOException() from err
245
- if os.path.exists(timeline_summary_file_path):
246
- os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
247
-
248
- def _get_device_process_label(self):
249
- """Get device process label."""
250
- device_process_label = self._aicore_op_label
251
- if self._device_target == DeviceTarget.ASCEND.value:
252
- if self._model == context.GRAPH_MODE:
253
- device_process_label = self._aicore_op_label
254
- elif self._model == context.PYNATIVE_MODE:
255
- device_process_label = self._ascend_op_label
256
- elif self._device_target == DeviceTarget.GPU.value:
257
- device_process_label = self._gpu_op_label
258
- elif self._device_target == DeviceTarget.CPU.value:
259
- device_process_label = self._host_cpu_op_label
260
- return device_process_label
261
-
262
- def _get_merged_time_list(self, time_list, get_interval_time=False, display_name="computation_op", factor=1):
263
- """
264
- Get merged time segment list.
265
-
266
- The process of merge is, for example, there is a list [[1,5], [2,6], [7,8]],
267
- each items in this list contains a start_time and end_time,
268
- the merged result is [[1,6], [7,8]].
269
- """
270
- time_merged_segment_list = []
271
- tid = self._tid_dict.get(display_name, (0, 0))[0]
272
- pid = self._tid_dict.get(display_name, (0, 0))[1]
273
- for time_item in time_list:
274
- time_segment = list(map(float, time_item[self._start_time_idx:self._duration_idx + 1]))
275
- time_segment[1] = time_segment[0] + time_segment[1] / factor
276
- if not time_merged_segment_list or \
277
- time_segment[0] > time_merged_segment_list[-1]:
278
- time_merged_segment_list.extend(time_segment)
279
- else:
280
- time_merged_segment_list[-1] = max(
281
- time_merged_segment_list[-1],
282
- time_segment[1]
283
- )
284
-
285
- # merged_display_list data used for ui page.
286
- merged_display_list = []
287
- for i in range(len(time_merged_segment_list) // 2):
288
- merged_display_list.append([display_name, tid, time_merged_segment_list[i * 2],
289
- (time_merged_segment_list[i * 2 + 1] - time_merged_segment_list[
290
- i * 2]) * factor, pid])
291
-
292
- if get_interval_time:
293
- time_merged_segment_list = time_merged_segment_list[1:-1]
294
-
295
- # merged_res_list data used to compute overlap with other time_list.
296
- merged_res_list = []
297
- for i in range(len(time_merged_segment_list) // 2):
298
- merged_res_list.append(
299
- [display_name, tid, time_merged_segment_list[i * 2], time_merged_segment_list[i * 2 + 1], pid])
300
-
301
- # interval_display_list is interval time used for ui page.
302
- interval_display_list = []
303
- for i in range(len(time_merged_segment_list) // 2):
304
- interval_display_list.append([display_name, tid, time_merged_segment_list[i * 2],
305
- (time_merged_segment_list[i * 2 + 1] - time_merged_segment_list[
306
- i * 2]) * factor, pid])
307
-
308
- return merged_res_list, interval_display_list, merged_display_list
309
-
310
- def _update_format_meta_data(self, timeline_dict):
311
- """Update format meta data which control the display arrange and map the thread name."""
312
- thread_name_meta_data = {
313
- "name": "thread_name",
314
- "pid": int(f'2{self._device_id}'),
315
- "tid": 100000,
316
- "ts": 0,
317
- "ph": "M",
318
- "cat": "__metadata",
319
- "args": {
320
- "name": "Steps"
321
- }
322
- }
323
- tid_name = timeline_dict['tid']
324
- sort_index = 0
325
-
326
- if tid_name in self._map_tid_name_to_int:
327
- sort_index, tid = self._map_tid_name_to_int.get(tid_name)
328
- elif tid_name.startswith("Stream"):
329
- tid = int(tid_name.split("#")[-1])
330
- sort_index = tid
331
- else:
332
- return
333
-
334
- if self._host_cpu_op_label == tid_name[:len(self._host_cpu_op_label)]:
335
- thread_name_meta_data['pid'] = self._HOST_CPU_PID
336
-
337
- thread_name_meta_data["tid"] = tid
338
- thread_name_meta_data.get("args")["name"] = tid_name
339
- self._format_meta_data_list.append(thread_name_meta_data)
340
-
341
- thread_name_meta_data['name'] = "thread_sort_index"
342
- thread_name_meta_data["args"] = {"sort_index": sort_index}
343
- self._format_meta_data_list.append(thread_name_meta_data)
344
- timeline_dict["tid"] = tid
345
-
346
- if tid_name in self._thread_processed_list:
347
- return
348
- self._thread_processed_list.append(tid_name)
349
-
350
- def _get_max_scope_name_num(self, timeline_list):
351
- """Get the max number of scope level from all operator."""
352
- max_scope_name_num = 0
353
- for time_item in timeline_list:
354
- cur_scope_name_num = len(time_item[self._op_name_idx].split('/')) - 1
355
- max_scope_name_num = max(cur_scope_name_num, max_scope_name_num)
356
-
357
- return max_scope_name_num
358
-
359
- def _get_scope_name_time_list(self, timeline_list, subgraph, factor_start_time_to_duration=1):
360
- """Produce the timeline of hierarchical scope name."""
361
- # the key of scope_name_start_duration_dict is scope name, the value is a dict which store the
362
- # start and end index of time_item in timeline_list.
363
- scope_name_start_duration_dict = {}
364
- scope_name_time_list = []
365
- sort_idx = {"op_full_name_idx": 0, "scope_name_idx": 0, "invalid_idx": -1}
366
- for idx, time_item in enumerate(timeline_list):
367
- scope_name_list = time_item[sort_idx.get("op_full_name_idx")].split('/')[:-1]
368
- # skip Default/InitDataSetQueue operator.
369
- if time_item[sort_idx.get("op_full_name_idx")].startswith("Default/InitDataSetQueue"):
370
- scope_name_list = []
371
- # process scope name of subgraph(Default/Gradients/recompute_Default) only.
372
- if scope_name_list and scope_name_list[0] != subgraph:
373
- scope_name_list = []
374
- # add the level of scope name, used to distinguish the same name at different scope level.
375
- scope_name_list = [f"{scope_level}-{scope_name}"
376
- for scope_level, scope_name in enumerate(scope_name_list)]
377
-
378
- # update the start and end index of time_item according to current scope_name
379
- for scope_name in scope_name_list:
380
- if scope_name not in scope_name_start_duration_dict:
381
- scope_name_start_duration_dict[scope_name] = {'start_item_idx': idx, 'end_item_idx': idx}
382
- if scope_name_start_duration_dict.get(scope_name)['start_item_idx'] == sort_idx.get("invalid_idx"):
383
- scope_name_start_duration_dict[scope_name] = {'start_item_idx': idx, 'end_item_idx': idx}
384
- else:
385
- scope_name_start_duration_dict.get(scope_name)['end_item_idx'] = idx
386
- # if the key(scope name) in scope_name_start_duration_dict does not appear in scope_name_list,
387
- # it means this key(scope name) is end and it is append to scope_name_time_list.
388
- for key, val in scope_name_start_duration_dict.items():
389
- if val['start_item_idx'] == sort_idx.get("invalid_idx"):
390
- continue
391
- if (key not in scope_name_list) \
392
- or idx == (len(timeline_list) - 1) \
393
- or time_item[sort_idx.get("op_full_name_idx")] == self._step_end_op_name:
394
- start_time = timeline_list[val['start_item_idx']][self._start_time_idx]
395
- duration = (float(timeline_list[val['end_item_idx']][self._start_time_idx]) - float(start_time)) * \
396
- factor_start_time_to_duration + \
397
- float(timeline_list[val['end_item_idx']][self._duration_idx])
398
- scope_name_time_list.append([key, "Scope Name", start_time, duration])
399
- scope_name_start_duration_dict.get(key)['start_item_idx'] = sort_idx.get("invalid_idx")
400
-
401
- # x[scope_name_idx] is a scope name like "0-Default".
402
- # if two element in scope_name_time_list have the same start time,
403
- # the previous element in list will displayed at the higher line in UI page.
404
- scope_name_time_list.sort(
405
- key=lambda x: (float(x[self._start_time_idx]), int(x[sort_idx.get("scope_name_idx")].split('-')[0]))
406
- )
407
-
408
- return scope_name_time_list
409
-
410
- def _set_step_start_and_end_op_name(self, timeline_list):
411
- """Set the start and end operator full name of each step."""
412
- if not timeline_list:
413
- return
414
- start_op_idx = 0
415
- if timeline_list[0][self._op_name_idx].startswith("Default/InitDataSetQueue"):
416
- start_op_idx = 1
417
- self._step_start_op_name = timeline_list[start_op_idx][self._op_name_idx]
418
- self._step_end_op_name = self._step_start_op_name
419
- if len(timeline_list) > (start_op_idx + 1):
420
- for time_item in timeline_list[start_op_idx + 1:]:
421
- if time_item[self._op_name_idx] != self._step_start_op_name:
422
- self._step_end_op_name = time_item[self._op_name_idx]
423
- else:
424
- break
425
-
426
- def _get_step_time_list(self, timeline_list, factor_start_time_to_duration=1):
427
- """Produce the time of each step."""
428
- # Record the time of each step.
429
- step_time_list = []
430
- step_num = 1
431
- tid = "Steps"
432
- cur_step_start_time, cur_step_duration_time = 0, 0
433
- for time_item in timeline_list:
434
- if time_item[self._op_name_idx] == self._step_start_op_name:
435
- cur_step_start_time = time_item[self._start_time_idx]
436
- if time_item[self._op_name_idx] == self._step_end_op_name:
437
- cur_step_duration_time = (float(time_item[self._start_time_idx]) - float(cur_step_start_time)) * \
438
- float(factor_start_time_to_duration) + float(time_item[self._duration_idx])
439
- step_time_item = [str(step_num), tid, float(cur_step_start_time), cur_step_duration_time]
440
- step_time_list.append(step_time_item)
441
- step_num += 1
442
-
443
- return step_time_list
444
-
445
- def _write_cluster_metrices(self, metrices, is_pipeline_parallel, device_target, dev_id):
446
- """Write cluster metric."""
447
- # Note that the feature of cluster bottleneck analyse is not supported in offline parse mode,
448
- # due to that parallel context is not set.
449
- if context.get_context("mode") == context.PYNATIVE_MODE:
450
- return
451
- parallel_mode, stage_num = BaseTimelineGenerator.get_parallel_context()
452
-
453
- unit = 1 if device_target == "Ascend" else 1e3
454
- time_decimal_digits = 4
455
- cluster_analyse_file_path = os.path.join(
456
- self._profiling_dir,
457
- self._cluster_analyse_filename.format(parallel_mode, stage_num, self._rank_size, dev_id)
458
- )
459
- cluster_analyse_file_path = validate_and_normalize_path(cluster_analyse_file_path)
460
-
461
- with os.fdopen(os.open(cluster_analyse_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
462
- 'w') as file_handle:
463
- csv_writer = csv.writer(file_handle)
464
- if is_pipeline_parallel:
465
- header = [
466
- 'computation_time', 'communication_alone_time', 'stage_time',
467
- 'receive_alone_time', 'collective_communication_alone_time'
468
- ]
469
- zip_metrices = zip(metrices[0], metrices[1], metrices[2], metrices[3], metrices[4])
470
- else:
471
- header = ['computation_time', 'communication_alone_time']
472
- zip_metrices = zip(metrices[0], metrices[1])
473
- csv_writer.writerow(header)
474
- for row_data in zip_metrices:
475
- row_data = [round(val / unit, time_decimal_digits) for val in row_data]
476
- csv_writer.writerow(row_data)
477
- os.chmod(cluster_analyse_file_path, stat.S_IREAD | stat.S_IWRITE)
478
-
479
- def _register_op_name(self, timeline_list):
480
- """Register op name to op name list."""
481
- for timeline in timeline_list:
482
- if timeline and timeline[self._op_name_idx] not in self._op_name_list:
483
- self._op_name_list.append(timeline[self._op_name_idx])