mindspore 2.6.0rc1__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (458) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +65 -84
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +58 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +178 -53
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +377 -203
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +5 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +117 -131
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +67 -55
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +70 -24
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +27 -7
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +6 -46
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +429 -23
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +140 -104
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +491 -623
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +117 -110
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +4 -6
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/asgd.py +2 -0
  188. mindspore/nn/optim/lamb.py +1 -3
  189. mindspore/nn/optim/optimizer.py +1 -1
  190. mindspore/nn/optim/tft_wrapper.py +2 -3
  191. mindspore/nn/optim/thor.py +2 -2
  192. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  193. mindspore/nn/probability/distribution/exponential.py +2 -1
  194. mindspore/nn/probability/distribution/poisson.py +2 -1
  195. mindspore/nn/sparse/sparse.py +3 -3
  196. mindspore/nn/wrap/cell_wrapper.py +73 -42
  197. mindspore/nn/wrap/grad_reducer.py +37 -52
  198. mindspore/nn/wrap/loss_scale.py +72 -74
  199. mindspore/numpy/array_creations.py +7 -7
  200. mindspore/numpy/fft.py +1 -1
  201. mindspore/numpy/math_ops.py +5 -5
  202. mindspore/numpy/utils_const.py +1 -1
  203. mindspore/opencv_core452.dll +0 -0
  204. mindspore/opencv_imgcodecs452.dll +0 -0
  205. mindspore/opencv_imgproc452.dll +0 -0
  206. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  207. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  208. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  209. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  210. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  211. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  212. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  213. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +54 -13
  214. mindspore/ops/auto_generate/gen_extend_func.py +27 -145
  215. mindspore/ops/auto_generate/gen_ops_def.py +1027 -347
  216. mindspore/ops/auto_generate/gen_ops_prim.py +2341 -1117
  217. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  218. mindspore/ops/composite/__init__.py +10 -0
  219. mindspore/ops/composite/base.py +9 -5
  220. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  221. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  222. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  223. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  224. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  225. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  226. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  227. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  228. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  229. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  230. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  231. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  232. mindspore/ops/function/__init__.py +4 -1
  233. mindspore/ops/function/_add_attr_func.py +11 -6
  234. mindspore/ops/function/array_func.py +19 -102
  235. mindspore/ops/function/debug_func.py +8 -5
  236. mindspore/ops/function/grad/grad_func.py +5 -13
  237. mindspore/ops/function/math_func.py +77 -572
  238. mindspore/ops/function/nn_func.py +46 -94
  239. mindspore/ops/function/other_func.py +4 -1
  240. mindspore/ops/function/random_func.py +44 -5
  241. mindspore/ops/function/vmap_func.py +2 -1
  242. mindspore/ops/functional.py +4 -4
  243. mindspore/ops/functional_overload.py +594 -18
  244. mindspore/ops/op_info_register.py +21 -0
  245. mindspore/ops/operations/__init__.py +16 -11
  246. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  247. mindspore/ops/operations/_inner_ops.py +14 -18
  248. mindspore/ops/operations/_sequence_ops.py +1 -1
  249. mindspore/ops/operations/array_ops.py +5 -51
  250. mindspore/ops/operations/comm_ops.py +186 -41
  251. mindspore/ops/operations/custom_ops.py +303 -177
  252. mindspore/ops/operations/debug_ops.py +59 -4
  253. mindspore/ops/operations/image_ops.py +13 -13
  254. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  255. mindspore/ops/operations/math_ops.py +8 -9
  256. mindspore/ops/operations/nn_ops.py +8 -40
  257. mindspore/ops/primitive.py +9 -20
  258. mindspore/ops/tensor_method.py +63 -15
  259. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  260. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  261. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  262. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  263. mindspore/ops_generate/common/base_generator.py +14 -0
  264. mindspore/ops_generate/common/gen_constants.py +8 -3
  265. mindspore/ops_generate/common/gen_utils.py +0 -19
  266. mindspore/ops_generate/common/op_proto.py +11 -4
  267. mindspore/ops_generate/common/template.py +88 -11
  268. mindspore/ops_generate/gen_ops.py +1 -1
  269. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  270. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  271. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  272. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  273. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  274. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  275. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  276. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  277. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  278. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  279. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  280. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  281. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  282. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  283. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  284. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  285. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  286. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  287. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  288. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  289. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  290. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  291. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  292. mindspore/parallel/_auto_parallel_context.py +16 -23
  293. mindspore/parallel/_cell_wrapper.py +113 -45
  294. mindspore/parallel/_parallel_serialization.py +4 -3
  295. mindspore/parallel/_ps_context.py +4 -6
  296. mindspore/parallel/_tensor.py +167 -12
  297. mindspore/parallel/_transformer/moe.py +1 -1
  298. mindspore/parallel/_transformer/transformer.py +17 -12
  299. mindspore/parallel/_utils.py +5 -11
  300. mindspore/parallel/auto_parallel.py +35 -14
  301. mindspore/parallel/checkpoint_convert.py +3 -3
  302. mindspore/parallel/checkpoint_transform.py +13 -7
  303. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  304. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  305. mindspore/parallel/cluster/run.py +48 -7
  306. mindspore/parallel/function/__init__.py +8 -1
  307. mindspore/parallel/function/reshard_func.py +12 -12
  308. mindspore/parallel/nn/__init__.py +15 -2
  309. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  310. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  311. mindspore/parallel/shard.py +10 -25
  312. mindspore/parallel/transform_safetensors.py +469 -174
  313. mindspore/pgodb140.dll +0 -0
  314. mindspore/pgort140.dll +0 -0
  315. mindspore/profiler/__init__.py +2 -1
  316. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  317. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  318. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  319. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  321. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  322. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  323. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  324. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  325. mindspore/profiler/analysis/task_manager.py +1 -1
  326. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  327. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  328. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  329. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  330. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  331. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  332. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  333. mindspore/profiler/common/constant.py +16 -0
  334. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  335. mindspore/profiler/common/path_manager.py +9 -0
  336. mindspore/profiler/common/profiler_context.py +50 -29
  337. mindspore/profiler/common/profiler_info.py +0 -16
  338. mindspore/profiler/common/profiler_meta_data.py +1 -0
  339. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  340. mindspore/profiler/common/profiler_output_path.py +23 -8
  341. mindspore/profiler/common/profiler_parameters.py +128 -35
  342. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  343. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  344. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  345. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  346. mindspore/profiler/dynamic_profiler.py +374 -338
  347. mindspore/profiler/envprofiler.py +42 -12
  348. mindspore/profiler/experimental_config.py +112 -7
  349. mindspore/profiler/mstx.py +33 -12
  350. mindspore/profiler/platform/__init__.py +2 -3
  351. mindspore/profiler/platform/cpu_profiler.py +10 -4
  352. mindspore/profiler/platform/npu_profiler.py +30 -20
  353. mindspore/profiler/profiler.py +218 -154
  354. mindspore/profiler/profiler_action_controller.py +65 -77
  355. mindspore/profiler/profiler_interface.py +2 -2
  356. mindspore/profiler/schedule.py +10 -4
  357. mindspore/rewrite/common/config.py +1 -0
  358. mindspore/rewrite/common/namer.py +1 -0
  359. mindspore/rewrite/common/namespace.py +1 -0
  360. mindspore/rewrite/node/node.py +31 -11
  361. mindspore/rewrite/parsers/assign_parser.py +1 -1
  362. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  363. mindspore/run_check/_check_version.py +7 -10
  364. mindspore/runtime/__init__.py +8 -6
  365. mindspore/runtime/event.py +10 -4
  366. mindspore/runtime/executor.py +87 -45
  367. mindspore/runtime/memory.py +31 -32
  368. mindspore/runtime/thread_bind_core.py +299 -165
  369. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  370. mindspore/swresample-4.dll +0 -0
  371. mindspore/swscale-6.dll +0 -0
  372. mindspore/tbbmalloc.dll +0 -0
  373. mindspore/tinyxml2.dll +0 -0
  374. mindspore/train/_utils.py +17 -7
  375. mindspore/train/amp.py +43 -23
  376. mindspore/train/callback/__init__.py +5 -5
  377. mindspore/train/callback/_callback.py +2 -1
  378. mindspore/train/callback/_checkpoint.py +4 -14
  379. mindspore/train/callback/_flops_collector.py +11 -7
  380. mindspore/train/callback/_landscape.py +0 -1
  381. mindspore/train/callback/_train_fault_tolerance.py +98 -21
  382. mindspore/train/data_sink.py +15 -6
  383. mindspore/train/dataset_helper.py +14 -5
  384. mindspore/train/model.py +133 -69
  385. mindspore/train/serialization.py +168 -126
  386. mindspore/train/summary/summary_record.py +13 -2
  387. mindspore/train/train_thor/model_thor.py +2 -2
  388. mindspore/turbojpeg.dll +0 -0
  389. mindspore/utils/__init__.py +3 -2
  390. mindspore/utils/dryrun.py +0 -6
  391. mindspore/utils/runtime_execution_order_check.py +163 -77
  392. mindspore/utils/sdc_detect.py +68 -0
  393. mindspore/utils/utils.py +14 -17
  394. mindspore/vcmeta.dll +0 -0
  395. mindspore/vcruntime140.dll +0 -0
  396. mindspore/vcruntime140_1.dll +0 -0
  397. mindspore/version.py +1 -1
  398. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  399. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/RECORD +403 -442
  400. mindspore/_deprecated/jit.py +0 -198
  401. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  402. mindspore/communication/_hccl_management.py +0 -297
  403. mindspore/experimental/es/embedding_service.py +0 -891
  404. mindspore/experimental/es/embedding_service_layer.py +0 -581
  405. mindspore/profiler/common/validator/__init__.py +0 -14
  406. mindspore/profiler/common/validator/validate_path.py +0 -84
  407. mindspore/profiler/parser/__init__.py +0 -14
  408. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  409. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  410. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  411. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  412. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  413. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  414. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  415. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  416. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  417. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  418. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  419. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  420. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  421. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  422. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  423. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  424. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  425. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  426. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  427. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  428. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  429. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  430. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  431. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  432. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  433. mindspore/profiler/parser/container.py +0 -229
  434. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  435. mindspore/profiler/parser/flops_parser.py +0 -531
  436. mindspore/profiler/parser/framework_enum.py +0 -111
  437. mindspore/profiler/parser/framework_parser.py +0 -464
  438. mindspore/profiler/parser/framework_struct.py +0 -61
  439. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  440. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  441. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  442. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  443. mindspore/profiler/parser/hccl_parser.py +0 -573
  444. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  445. mindspore/profiler/parser/integrator.py +0 -526
  446. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  447. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  448. mindspore/profiler/parser/minddata_parser.py +0 -186
  449. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  450. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  451. mindspore/profiler/parser/optime_parser.py +0 -250
  452. mindspore/profiler/parser/profiler_info.py +0 -213
  453. mindspore/profiler/parser/step_trace_parser.py +0 -666
  454. mindspore/utils/hooks.py +0 -81
  455. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  456. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  457. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  458. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -1,697 +0,0 @@
1
- # Copyright 2022 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """The integrator for integrating parsed profiling files."""
16
- import json
17
- import os
18
- import csv
19
-
20
- from mindspore import log as logger
21
- from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException, ProfilerFileNotFoundException, \
22
- ProfilerParamValueErrorException
23
- from mindspore.profiler.parser.container import TimelineContainer
24
- from mindspore.profiler.parser.base_timeline_generator import BaseTimelineGenerator
25
- from mindspore.profiler.parser.integrator import DeviceTarget
26
- from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
27
- from mindspore.profiler.parser.gpu_analysis.fwk_file_parser import GPUFwkFileParser
28
-
29
-
30
- class GpuTimelineGenerator(BaseTimelineGenerator):
31
- """Generate gpu Timeline data from file."""
32
- _display_filename = 'gpu_timeline_display_{}.json'
33
- _timeline_summary_filename = 'gpu_timeline_summary_{}.json'
34
- _output_op_execute_time_file_path = "gpu_op_execute_timestamp_{}.txt"
35
- _output_activity_execute_time_file_path = "activity_execute_timestamp_{}.txt"
36
- _output_gpu_activity_info_file_path = "gpu_activity_data_{}.csv"
37
- _step_trace_original_filename = 'step_trace_profiling_{}.txt'
38
- _cluster_analyse_filename = 'gpu_cluster_analyse_{}_{}_{}_{}.csv'
39
- _activity_keys_list = []
40
-
41
- def __init__(self, profiling_dir, device_id, rank_size, model):
42
- super().__init__(DeviceTarget.GPU.value, model)
43
- self._device_id = device_id
44
- self._rank_size = rank_size
45
- self._profiling_dir = profiling_dir
46
- self._timeline_meta = []
47
- self._display_filename = self._display_filename.format(device_id)
48
- self._timeline_summary_filename = self._timeline_summary_filename.format(device_id)
49
- self._tid_dict = {
50
- "receive_op_not_overlapped": (self._RECEIVE_ALONE, self._OP_OVERLAP_PID),
51
- "exclude_receive_op": (self._ALLREDUCE_ALONE, self._OP_OVERLAP_PID),
52
- "computation_op": (self._MERGED_COMPUTATION_TID, self._OP_OVERLAP_PID),
53
- "communication_not_overlapped": (self._PURE_COMMUNICATION_TID, self._OP_OVERLAP_PID),
54
- "communication": (self._MERGED_COMMUNICATION_TID, self._OP_OVERLAP_PID),
55
- "free_time": (self._FREE_TIME_TID, self._OP_OVERLAP_PID)
56
- }
57
-
58
- def init_timeline(self, reduce_op_type):
59
- """Init timeline metadata, adding all collected info."""
60
- timeline_list = self._load_timeline_data(reduce_op_type)
61
-
62
- # Init a dict for counting the num of streams.
63
- stream_count_dict = {}
64
- for timeline in timeline_list:
65
- self._parse_timeline_data(timeline, 0)
66
- # Updating the collection of streams.
67
- if len(timeline) == 4:
68
- self._update_num_of_streams(timeline, stream_count_dict)
69
-
70
- # Update timeline summary info
71
- self._timeline_summary['num_of_streams'] += len(stream_count_dict)
72
-
73
- def check_op_name(self, op_name):
74
- """
75
- Check whether the operator name exists.
76
-
77
- Args:
78
- op_name (str): The operator name or operator name prefix.
79
-
80
- Returns:
81
- bool, `True` if the operator name does exist, else `False`.
82
- """
83
- if not op_name:
84
- raise ProfilerParamValueErrorException('The op_name should exist.')
85
- for op_time_info in self._timeline_meta:
86
- full_op_name = op_time_info['name']
87
- if full_op_name and full_op_name.startswith(op_name):
88
- return True
89
- return False
90
-
91
- def is_gpu_kernel_async_launch(self):
92
- """Recognize the solution that launch the gpu kernel async."""
93
- step_trace_profiling_path = self._get_and_validate_path(
94
- self._step_trace_original_filename
95
- )
96
- try:
97
- with open(step_trace_profiling_path, 'r') as f_obj:
98
- line = next(f_obj)
99
- first_string = line.strip().split()[0]
100
- # the data format of launch the gpu kernel async is "Default/op1,160123 op-name"
101
- # otherwise, the data format is "Default/op1 160123,12 "
102
- return bool(len(first_string.split(',')) == 2)
103
- except (IOError, OSError) as err:
104
- logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
105
- raise ProfilerIOException() from err
106
- except StopIteration:
107
- logger.warning('No step trace data exists.')
108
- return False
109
-
110
- def parse_fwk_data(self):
111
- """
112
- Get framework op range trace data
113
- """
114
-
115
- fwk_parser = GPUFwkFileParser(self._profiling_dir, self._device_id)
116
- fwk_data = fwk_parser.get_op_range_data()
117
- self._fwk_json = fwk_parser.get_fwk_trace_data(fwk_data)
118
-
119
- def write_fwk_timeline(self):
120
- display_file_path = os.path.join(self._profiling_dir, self._display_filename)
121
- timeline_data = self._fwk_json
122
- if os.path.exists(display_file_path):
123
- with os.fdopen(os.open(display_file_path, os.O_RDONLY, 0o600), 'r') as fr:
124
- device_data = fr.read()
125
- timeline_data.extend(json.loads(device_data))
126
-
127
- with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as fw:
128
- json.dump(timeline_data, fw)
129
-
130
- def _get_and_validate_path(self, file_name):
131
- """Generate op or activity file path from file name, and validate this path."""
132
- file_path = os.path.join(
133
- self._profiling_dir,
134
- file_name.format(self._device_id)
135
- )
136
- file_path = validate_and_normalize_path(file_path)
137
- if not os.path.exists(file_path):
138
- logger.critical(f"Failed to find parsed timeline file {file_path}.")
139
- raise ProfilerFileNotFoundException('parsed timeline file')
140
-
141
- return file_path
142
-
143
- def _parse_timeline_data(self, timeline, min_cycle_counter):
144
- """Parse timeline data."""
145
- # factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
146
- factor = 1000
147
- op_meta = TimelineContainer(timeline)
148
- timeline_dict = {}
149
- timeline_dict['name'] = op_meta.op_name.split('/')[-1]
150
- timeline_dict['ph'] = 'X'
151
- timeline_dict['tid'] = op_meta.stream_id
152
- timeline_dict['ts'] = (op_meta.start_time - min_cycle_counter) / factor
153
- dur = op_meta.duration
154
- timeline_dict['dur'] = dur # unit is us
155
- if op_meta.pid is None:
156
- timeline_dict['pid'] = int(f'2{self._device_id}')
157
- else:
158
- timeline_dict['pid'] = op_meta.pid
159
- if op_meta.stream_id == "Scope Name":
160
- # remove the level of scope name which has a format like "0-conv2-Conv2d".
161
- timeline_dict['name'] = "-".join(op_meta.op_name.split('-')[1:])
162
- timeline_dict['scope_level'] = int(op_meta.op_name.split('-')[0])
163
- elif op_meta.stream_id[:len(self._host_cpu_op_label)] == self._host_cpu_op_label:
164
- timeline_dict['pid'] = self._HOST_CPU_PID
165
-
166
- if len(timeline) > 4:
167
- # len(timeline) > 4 refers to activity data, else op data.
168
- # Add args for activity data
169
- args_dict = {}
170
- for ix, value in enumerate(timeline[4:]):
171
- args_dict[self._activity_keys_list[ix]] = value
172
- timeline_dict['args'] = args_dict
173
- timeline_dict['tid'] = f"Stream #{timeline_dict.get('tid', '0')}"
174
- elif op_meta.stream_id not in ["Scope Name", "Steps"]:
175
- # Update total time of operator execution.
176
- self._timeline_summary['total_time'] += dur / factor
177
- self._timeline_summary['op_exe_times'] += 1
178
-
179
- self._update_format_meta_data(timeline_dict)
180
- self._timeline_meta.append(timeline_dict)
181
-
182
- def _load_timeline_data(self, reduce_op_type):
183
- """Load timeline data from file."""
184
- op_file_path = self._get_and_validate_path(
185
- self._output_op_execute_time_file_path)
186
-
187
- timeline_list, communication_info = self._load_op_data(op_file_path, reduce_op_type)
188
- communication_info.sort(key=lambda x: float(x[2]))
189
- # Add host cpu op timeline.
190
- cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._device_id, self._model)
191
- cpu_timeline_list = cpu_timeline_generator.load_cpu_op_data()
192
- if cpu_timeline_list:
193
- self._clock_synchronize_to_gpu(cpu_timeline_list)
194
- timeline_list.extend(cpu_timeline_list)
195
- timeline_list.sort(key=lambda x: float(x[2]))
196
- self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
197
- self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
198
-
199
- # Generate step time.
200
- factor_start_time_uint_to_duration = 1e-3
201
- self._set_step_start_and_end_op_name(timeline_list)
202
- # Fit gpu kernel async launch solution.
203
- if self.is_gpu_kernel_async_launch():
204
- step_time_list = self._get_step_time_list_from_step_trace()
205
- else:
206
- step_time_list = self._get_step_time_list(timeline_list, factor_start_time_uint_to_duration)
207
-
208
- # Add Scope Name.
209
- default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default",
210
- factor_start_time_uint_to_duration)
211
- gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients",
212
- factor_start_time_uint_to_duration)
213
- recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default",
214
- factor_start_time_uint_to_duration)
215
- cuda_op_timeline = self._load_activity_data()
216
-
217
- # Add AllReduce info to timeline temp list and sort by start time.
218
- if communication_info:
219
- logger.debug('Allreduce info found, Start adding info to timeline...')
220
- cluster_related_timeline = self._get_cluster_timeline(
221
- timeline_list, cuda_op_timeline[1], communication_info, step_time_list)
222
- timeline_list.extend(cluster_related_timeline)
223
- timeline_list.extend(communication_info)
224
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
225
-
226
- timeline_list.extend(default_scope_name_time_list)
227
- timeline_list.extend(gradient_scope_name_time_list)
228
- timeline_list.extend(recompute_scope_name_time_list)
229
- timeline_list.extend(step_time_list)
230
-
231
- timeline_list.sort(key=lambda x: (float(x[self._start_time_idx])))
232
-
233
- # Add cuda activity timeline.
234
- timeline_list.extend(cuda_op_timeline[0])
235
- timeline_list.sort(key=lambda x: float(x[2]))
236
-
237
- return timeline_list
238
-
239
- def _clock_synchronize_to_gpu(self, timeline_list):
240
- """Synchronize the timestamp from device to host."""
241
- start_time_file_path = os.path.join(self._profiling_dir, f"start_time_{self._device_id}.txt")
242
-
243
- try:
244
- with open(start_time_file_path) as f_obj:
245
- lines = f_obj.readlines()
246
- # lines[0] stores the host monotonic time of start training.
247
- host_monotonic_start_time = int(lines[0].strip().split(':')[-1])
248
- # lines[1] stores the gpu time of start training.
249
- gpu_start_time = int(lines[1].strip().split(':')[-1])
250
- except (IOError, OSError) as err:
251
- logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
252
- raise ProfilerIOException() from err
253
-
254
- time_diff = gpu_start_time - host_monotonic_start_time
255
- for idx, time_item in enumerate(timeline_list):
256
- timeline_list[idx][self._start_time_idx] = int(time_item[self._start_time_idx]) + time_diff
257
-
258
- def _load_op_data(self, op_file_path, reduce_op_type):
259
- """Load operator data from file"""
260
- op_timeline_list = []
261
- communication_info = []
262
- try:
263
- with open(op_file_path, 'r') as f_obj:
264
- for line in f_obj:
265
- self._timeline_summary['num_of_ops'] += 1
266
- op_list = line.strip('\n').strip().split(';')
267
- time_arr = op_list[-1]
268
- time_arr = time_arr.split(" ")
269
- for time in time_arr:
270
- time = time.split(",")
271
- line_list = op_list[:2] + time
272
- communication_op_name = line_list[0].strip().split('/')[-1]
273
- if communication_op_name not in reduce_op_type:
274
- op_timeline_list.append(line_list)
275
- else:
276
- communication_info.append(line_list)
277
- except (IOError, OSError) as err:
278
- logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
279
- raise ProfilerIOException() from err
280
-
281
- return op_timeline_list, communication_info
282
-
283
- def _load_activity_data(self):
284
- """Load activity data from file"""
285
- activity_timeline_list = []
286
- cuda_compute_ops_timeline_list = []
287
- args_dict = {}
288
- activity_file_path = self._get_and_validate_path(
289
- self._output_activity_execute_time_file_path)
290
- activity_args_file_path = self._get_and_validate_path(
291
- self._output_gpu_activity_info_file_path)
292
-
293
- if not os.path.exists(activity_args_file_path):
294
- logger.error(f'The file {activity_args_file_path} does not exist.')
295
- raise ProfilerFileNotFoundException(activity_args_file_path)
296
- with open(activity_args_file_path, 'r') as args_file:
297
- csv_reader = csv.reader(args_file)
298
- keys_list = next(csv_reader)
299
- # keys_list format is: name, type, op_full_name, stream_id, block_dim, grid_dim, ...
300
- self._activity_keys_list = keys_list[1:3] + keys_list[4:6]
301
- for info in csv_reader:
302
- args_dict[info[0]] = info[1:3] + info[4:6]
303
-
304
- if not os.path.exists(activity_file_path):
305
- logger.error(f'The file {activity_file_path} does not exist.')
306
- raise ProfilerFileNotFoundException(activity_file_path)
307
- with open(activity_file_path, 'r') as f_obj:
308
- for line in f_obj:
309
- line_list = line.strip('\n').split(';')
310
- # concat activity args info.
311
- line_list += args_dict.get(line_list[0])
312
- if not line_list[0].startswith('nccl'):
313
- cuda_compute_ops_timeline_list.append(line_list)
314
- activity_timeline_list.append(line_list)
315
-
316
- return activity_timeline_list, cuda_compute_ops_timeline_list
317
-
318
- def _get_step_time_list_from_step_trace(self):
319
- """Produce the time of each step based on step_trace_profiling file."""
320
- # Record the time of each step.
321
- step_time_list = []
322
- step_start_op_name = []
323
- step_end_op_name = []
324
- step_num = 1
325
- tid = "Steps"
326
- step_trace_profiling_path = self._get_and_validate_path(
327
- self._step_trace_original_filename
328
- )
329
-
330
- try:
331
- with open(step_trace_profiling_path, 'r') as f_obj:
332
- for line in f_obj:
333
- line = line.strip().split()
334
- step_start_op_name.append(line[0].split(',')[0])
335
- step_end_op_name.append(line[3].split(',')[0])
336
- cur_step_start_time = float(line[0].split(',')[1])
337
- cur_step_end_time = float(line[3].split(',')[1])
338
- # convert duration time unit from ns to us.
339
- cur_step_duration_time = (cur_step_end_time - cur_step_start_time) / 1e3
340
- step_time_item = [str(step_num), tid, cur_step_start_time, cur_step_duration_time]
341
- step_time_list.append(step_time_item)
342
- step_num += 1
343
- except (IOError, OSError) as err:
344
- logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
345
- raise ProfilerIOException() from err
346
-
347
- return step_time_list
348
-
349
- def _get_cluster_timeline(self, timeline, activity_info, comm_info, step_info):
350
- """
351
- Analyse the cluster communication and computation data, and write result to file.
352
-
353
- To analyse the cluster performance bottleneck based on timeline, define the time of a training
354
- step as "t_total", propose five metrics as follows:
355
- 1) The time that "receive" operators not overlapped by others(t1)
356
- 2) The time that is consumed inside the stage(t_total - t1)
357
- 3) The time that "communication" operators not overlapped by others(t2)
358
- 4) The time that consumed by computation(t_total - t2)
359
- 5) The time that "collective communication" operators not overlapped by others(t3)
360
- In pipeline parallel mode, we can locate slow stage based on t_total - t1. Inside each stage,
361
- we can locate slow card based on t_total - t2. The value of t1 indicates the degree that
362
- communication time between stages slow down the training. The value of t3 indicates the degree
363
- that communication inside each stage slow down the training.
364
- """
365
- time_info = {
366
- "stage_time": [], "computation_time": [], "recieve_alone_time": [], "comm_alone_time": [],
367
- "collective_comm_alone_time": []
368
- }
369
- is_pipeline_parallel = False
370
- comm_timeline = self._get_merged_time_list(
371
- comm_info,
372
- display_name="communication",
373
- factor=1e-3
374
- )
375
- compute_op_timeline = timeline + activity_info
376
- compute_op_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
377
- compute_timeline = self._get_merged_time_list(
378
- compute_op_timeline,
379
- get_interval_time=True,
380
- factor=1e-3
381
- )
382
- # Consider if the overlap will be 0 or not.
383
- comm_not_overlapped_timeline = self._get_intersection_time(
384
- compute_timeline[0],
385
- comm_timeline[0]
386
- )
387
-
388
- # Process receive part.
389
- all_timeline = timeline + comm_info
390
- all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
391
- receive_op_timeline = self._produce_two_separated_timeline(
392
- all_timeline,
393
- "Receive-op"
394
- )[0]
395
- if receive_op_timeline:
396
- is_pipeline_parallel = True
397
- receive_op_merged_timeline = self._get_merged_time_list(receive_op_timeline,
398
- factor=1e-3)[0]
399
-
400
- receive_op_not_overlapped_timeline = self._get_intersection_time(
401
- compute_timeline[0],
402
- receive_op_merged_timeline,
403
- display_name="receive_op_not_overlapped"
404
- )
405
-
406
- # Process collective communication part.
407
- collective_comm_timeline = self._produce_two_separated_timeline(
408
- comm_info,
409
- "Receive-op"
410
- )[-1]
411
- collective_comm_merged_timeline = self._get_merged_time_list(collective_comm_timeline,
412
- factor=1e-3)[0]
413
- collective_comm_not_overlapped_timeline = self._get_intersection_time(
414
- compute_timeline[0],
415
- collective_comm_merged_timeline,
416
- display_name="exclude_receive_op"
417
- )
418
-
419
- # Generate free time that exclude computation and communication time.
420
- all_timeline = compute_op_timeline + comm_info
421
- all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
422
- free_timeline = self._get_merged_time_list(
423
- all_timeline,
424
- get_interval_time=True,
425
- display_name="free_time",
426
- factor=1e-3
427
- )[1]
428
-
429
- # Compute these five metrics mentioned above per step.
430
- time_info["recieve_alone_time"] = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
431
- time_info["comm_alone_time"] = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
432
- time_info["collective_comm_alone_time"] = self._compute_time_inside_step(
433
- collective_comm_not_overlapped_timeline,
434
- step_info
435
- )
436
- step_num = len(step_info)
437
- for step in range(step_num):
438
- try:
439
- if is_pipeline_parallel:
440
- time_info.get("stage_time").append(
441
- step_info[step][self._duration_idx] - time_info.get("recieve_alone_time")[step]
442
- )
443
- except IndexError as e:
444
- logger.error(e)
445
- try:
446
- time_info.get("computation_time").append(
447
- step_info[step][self._duration_idx] - time_info.get("comm_alone_time")[step]
448
- )
449
- except IndexError as e:
450
- logger.error(e)
451
-
452
- metrices_per_step_list = [
453
- time_info.get("computation_time"), time_info.get("comm_alone_time"),
454
- time_info.get("stage_time"), time_info.get("recieve_alone_time"),
455
- time_info.get("collective_comm_alone_time")
456
- ]
457
- if step_num > 1:
458
- for metric in metrices_per_step_list:
459
- metric.append(sum(metric[1:]) / (step_num - 1))
460
- try:
461
- self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Gpu", self._device_id)
462
- except (IOError, OSError) as err:
463
- logger.warning(err)
464
- raise ProfilerIOException from err
465
-
466
- res_timeline = []
467
- res_timeline.extend(comm_not_overlapped_timeline)
468
- res_timeline.extend(compute_timeline[2])
469
- res_timeline.extend(comm_timeline[2])
470
- res_timeline.extend(free_timeline)
471
- return res_timeline
472
-
473
- def _compute_time_inside_step(self, metric_timeline, step_time_list):
474
- """Compute per step time of metric_timeline."""
475
- per_step_time_list = []
476
- step = 0
477
- cur_step_metric_time = 0
478
- factor_us_to_ns = 1e3
479
- step_end_time = step_time_list[step][self._start_time_idx] + \
480
- step_time_list[step][self._duration_idx] * factor_us_to_ns
481
- for time_item in metric_timeline:
482
- start_time = time_item[self._start_time_idx]
483
- if start_time > step_end_time:
484
- per_step_time_list.append(cur_step_metric_time)
485
- step += 1
486
- if step >= len(step_time_list):
487
- logger.warning("Compute profiler compute_time_inside_step time, "
488
- "find the data length is more than step count, "
489
- "maybe current graph has multi sub graph, skip the last data.")
490
- break
491
- step_end_time = step_time_list[step][self._start_time_idx] + \
492
- step_time_list[step][self._duration_idx] * factor_us_to_ns
493
- cur_step_metric_time = 0
494
- cur_step_metric_time += time_item[self._duration_idx]
495
- per_step_time_list.append(cur_step_metric_time)
496
-
497
- return per_step_time_list
498
-
499
- def _get_intersection_time(self, first_time_list, second_time_list,
500
- display_name="communication_not_overlapped"):
501
- """Get intersection time of two time list."""
502
- first_list_idx, second_list_idx = 0, 0
503
- first_list_len = len(first_time_list)
504
- second_list_len = len(second_time_list)
505
- intersection_segment_display_list = []
506
- factor_ns_to_us = 1e-3
507
- while first_list_idx < first_list_len and second_list_idx < second_list_len:
508
- intersection_start = max(
509
- first_time_list[first_list_idx][self._start_time_idx],
510
- second_time_list[second_list_idx][self._start_time_idx]
511
- )
512
- intersection_end = min(
513
- first_time_list[first_list_idx][self._duration_idx],
514
- second_time_list[second_list_idx][self._duration_idx]
515
- )
516
- if intersection_start < intersection_end:
517
- intersection_segment_display_list.append(
518
- [display_name, self._tid_dict.get(display_name, ('',))[0],
519
- intersection_start, (intersection_end - intersection_start) * factor_ns_to_us,
520
- self._tid_dict.get(display_name, ('', ''))[1]]
521
- )
522
- if first_time_list[first_list_idx][self._duration_idx] >= \
523
- second_time_list[second_list_idx][self._duration_idx]:
524
- second_list_idx += 1
525
- else:
526
- first_list_idx += 1
527
-
528
- return intersection_segment_display_list
529
-
530
- def _produce_two_separated_timeline(self, timeline, op_name):
531
- """Produce two separated timeline based on op_name."""
532
- timeline_include_op_name = []
533
- timeline_exclude_op_name = []
534
- for time_item in timeline:
535
- if op_name in time_item[self._op_name_idx]:
536
- timeline_include_op_name.append(time_item)
537
- else:
538
- timeline_exclude_op_name.append(time_item)
539
- return timeline_include_op_name, timeline_exclude_op_name
540
-
541
-
542
- class CpuTimelineGenerator(GpuTimelineGenerator):
543
- """Generate cpu Timeline data from file."""
544
- _output_op_execute_time_file_path = "cpu_op_execute_timestamp_{}.txt"
545
- _display_filename = 'cpu_timeline_display_{}.json'
546
- _timeline_summary_filename = 'cpu_timeline_summary_{}.json'
547
-
548
- def __init__(self, profiling_dir, device_id, model):
549
- super().__init__(profiling_dir, device_id, 0, model)
550
- self._device_target = DeviceTarget.CPU.value
551
-
552
- def get_timeline_data(self):
553
- """Get timeline data from file."""
554
- timeline_list = self.load_cpu_op_data()
555
- factor_ns_to_ms = 1e6
556
- factor_us_to_ms = 1e3
557
- for time_item in timeline_list:
558
- time_item[self._start_time_idx] = float(time_item[self._start_time_idx]) / factor_ns_to_ms
559
- time_item[self._duration_idx] = float(time_item[self._duration_idx]) / factor_us_to_ms
560
-
561
- return timeline_list
562
-
563
- def init_timeline(self, pretty=False):
564
- """Init timeline metadata, adding all collected info."""
565
- self._pretty = pretty
566
- timeline_list = self._load_timeline_data()
567
-
568
- # Init a dict for counting the num of streams.
569
- stream_count_dict = {}
570
- for timeline in timeline_list:
571
- self._parse_timeline_data(timeline, 0)
572
- # Updating the collection of streams.
573
- if len(timeline) == 4:
574
- self._update_num_of_streams(timeline, stream_count_dict)
575
-
576
- # Update timeline summary info
577
- self._timeline_summary['num_of_streams'] += len(stream_count_dict.keys())
578
-
579
- def load_cpu_op_data(self):
580
- """Load cpu operator data from file"""
581
- op_file_path = self._get_and_validate_path(self._output_op_execute_time_file_path)
582
- timeline_list = []
583
- if not os.path.exists(op_file_path):
584
- logger.info("No cpu operator info.")
585
- return timeline_list
586
- timeline_list = self._load_op_data(op_file_path)
587
- factor_ms_to_us = 1e-3
588
- for time_item in timeline_list:
589
- time_item[self._duration_idx] = float(time_item[self._duration_idx]) / factor_ms_to_us
590
-
591
- return timeline_list
592
-
593
- def _get_and_validate_path(self, file_name):
594
- """Generate op or activity file path from file name, and validate this path."""
595
- file_path = os.path.join(
596
- self._profiling_dir,
597
- file_name.format(self._device_id)
598
- )
599
- file_path = validate_and_normalize_path(file_path)
600
-
601
- return file_path
602
-
603
- def _load_op_data(self, op_file_path):
604
- """Load operator data from file"""
605
- op_timeline_list = []
606
- try:
607
- with open(op_file_path, 'r') as f_obj:
608
- for line in f_obj:
609
- self._timeline_summary['num_of_ops'] += 1
610
- op_list = line.strip('\n').strip().split(';')
611
- time_arr = op_list[-1]
612
- time_arr = time_arr.split(" ")
613
- for time in time_arr:
614
- time = time.split(",")
615
- if len(time) == 3:
616
- # for time value is [start_timestamp, duration, tid]
617
- # line_list[1] would be like "HostCpuOps" + str(tid)
618
- line_list = op_list[:1] + [op_list[1] + str(time[-1])] + time[:-1]
619
- else:
620
- # for time value is [start_timestamp, duration]
621
- line_list = op_list[:2] + time
622
- op_timeline_list.append(line_list)
623
- except (IOError, OSError) as err:
624
- logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
625
- raise ProfilerIOException() from err
626
-
627
- return op_timeline_list
628
-
629
- def _load_timeline_data(self):
630
- """Load timeline data from file."""
631
- timeline_list = self.load_cpu_op_data()
632
-
633
- timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
634
- self._max_scope_name_num = self._get_max_scope_name_num(timeline_list)
635
- self._timeline_summary['max_scope_name_num'] = self._max_scope_name_num
636
-
637
- # Generate step time.
638
- factor_start_time_uint_to_duration = 1e-3
639
- self._set_step_start_and_end_op_name(timeline_list)
640
-
641
- step_time_list = self._get_step_time_list(timeline_list, factor_start_time_uint_to_duration)
642
-
643
- # Add merge compute time and free time
644
- merge_compute_timeline = self._get_merged_time_list(
645
- timeline_list, False, "computation_op", factor_start_time_uint_to_duration)[2]
646
- free_time_timeline = self._get_merged_time_list(
647
- timeline_list, True, "free_time", factor_start_time_uint_to_duration)[1]
648
-
649
- # Add Scope Name.
650
- default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default",
651
- factor_start_time_uint_to_duration)
652
- gradient_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Gradients",
653
- factor_start_time_uint_to_duration)
654
- recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default",
655
- factor_start_time_uint_to_duration)
656
- timeline_list.extend(default_scope_name_time_list)
657
- timeline_list.extend(gradient_scope_name_time_list)
658
- timeline_list.extend(recompute_scope_name_time_list)
659
- timeline_list.extend(step_time_list)
660
-
661
- timeline_list.sort(key=lambda x: (float(x[self._start_time_idx]), x[self._tid_idx]))
662
- timeline_list.sort(key=lambda x: float(x[2]))
663
- timeline_list.extend(merge_compute_timeline)
664
- timeline_list.extend(free_time_timeline)
665
-
666
- return timeline_list
667
-
668
- def _parse_timeline_data(self, timeline, min_cycle_counter):
669
- """Parse timeline data."""
670
- # factor to convert the time unit of start_time(ts) from 1ns to 1us for timeline display
671
- factor = 1000
672
- op_meta = TimelineContainer(timeline)
673
- timeline_info = {}
674
- timeline_info['name'] = op_meta.op_name.split('/')[-1]
675
- timeline_info['ph'] = 'X'
676
- timeline_info['tid'] = op_meta.stream_id
677
- timeline_info['ts'] = (op_meta.start_time - min_cycle_counter) / factor
678
- dur = op_meta.duration
679
- timeline_info['dur'] = dur
680
- timeline_info['pid'] = int(self._device_id)
681
- if op_meta.stream_id == "Scope Name":
682
- # remove the level of scope name which has a format like "0-conv2-Conv2d".
683
- timeline_info['name'] = "-".join(op_meta.op_name.split('-')[1:])
684
- timeline_info['scope_level'] = int(op_meta.op_name.split('-')[0])
685
- elif self._host_cpu_op_label == op_meta.stream_id[:len(self._host_cpu_op_label)]:
686
- timeline_info['pid'] = self._HOST_CPU_PID
687
-
688
- if len(timeline) == 5:
689
- # len(timeline) == 5 refers to analyse data.
690
- timeline_info["pid"] = op_meta.pid
691
- elif op_meta.stream_id not in ["Scope Name", "Steps"]:
692
- # Update total time of operator execution.
693
- self._timeline_summary['total_time'] += dur / factor
694
- self._timeline_summary['op_exe_times'] += 1
695
-
696
- self._update_format_meta_data(timeline_info)
697
- self._timeline_meta.append(timeline_info)