mindspore 2.6.0__cp310-cp310-win_amd64.whl → 2.7.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (455) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +64 -83
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +47 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +177 -52
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +338 -208
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +2 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +84 -133
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +47 -38
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +69 -23
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +1 -0
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +4 -44
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +425 -19
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +125 -101
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +488 -620
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +86 -85
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +2 -4
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/lamb.py +1 -3
  188. mindspore/nn/optim/optimizer.py +1 -1
  189. mindspore/nn/optim/tft_wrapper.py +2 -3
  190. mindspore/nn/optim/thor.py +2 -2
  191. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  192. mindspore/nn/probability/distribution/exponential.py +2 -1
  193. mindspore/nn/probability/distribution/poisson.py +2 -1
  194. mindspore/nn/sparse/sparse.py +3 -3
  195. mindspore/nn/wrap/cell_wrapper.py +73 -42
  196. mindspore/nn/wrap/grad_reducer.py +37 -52
  197. mindspore/nn/wrap/loss_scale.py +72 -74
  198. mindspore/numpy/array_creations.py +7 -7
  199. mindspore/numpy/fft.py +1 -1
  200. mindspore/numpy/math_ops.py +1 -1
  201. mindspore/numpy/utils_const.py +1 -1
  202. mindspore/opencv_core452.dll +0 -0
  203. mindspore/opencv_imgcodecs452.dll +0 -0
  204. mindspore/opencv_imgproc452.dll +0 -0
  205. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  206. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  207. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  208. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  209. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  210. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  211. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  212. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
  213. mindspore/ops/auto_generate/gen_extend_func.py +5 -55
  214. mindspore/ops/auto_generate/gen_ops_def.py +753 -273
  215. mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
  216. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  217. mindspore/ops/composite/__init__.py +10 -0
  218. mindspore/ops/composite/base.py +9 -5
  219. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  220. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  221. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  222. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  223. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  224. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  225. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  226. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  227. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  228. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  229. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  230. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  231. mindspore/ops/function/__init__.py +4 -1
  232. mindspore/ops/function/_add_attr_func.py +11 -6
  233. mindspore/ops/function/array_func.py +17 -100
  234. mindspore/ops/function/debug_func.py +8 -5
  235. mindspore/ops/function/grad/grad_func.py +5 -13
  236. mindspore/ops/function/math_func.py +65 -399
  237. mindspore/ops/function/nn_func.py +44 -61
  238. mindspore/ops/function/other_func.py +4 -1
  239. mindspore/ops/function/random_func.py +31 -4
  240. mindspore/ops/functional.py +2 -3
  241. mindspore/ops/functional_overload.py +486 -18
  242. mindspore/ops/op_info_register.py +21 -0
  243. mindspore/ops/operations/__init__.py +5 -2
  244. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  245. mindspore/ops/operations/_inner_ops.py +14 -18
  246. mindspore/ops/operations/_sequence_ops.py +1 -1
  247. mindspore/ops/operations/array_ops.py +4 -50
  248. mindspore/ops/operations/comm_ops.py +186 -41
  249. mindspore/ops/operations/custom_ops.py +244 -175
  250. mindspore/ops/operations/debug_ops.py +55 -4
  251. mindspore/ops/operations/image_ops.py +13 -13
  252. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  253. mindspore/ops/operations/math_ops.py +8 -9
  254. mindspore/ops/operations/nn_ops.py +6 -7
  255. mindspore/ops/primitive.py +9 -20
  256. mindspore/ops/tensor_method.py +52 -11
  257. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  258. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  259. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  260. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  261. mindspore/ops_generate/common/base_generator.py +14 -0
  262. mindspore/ops_generate/common/gen_constants.py +7 -2
  263. mindspore/ops_generate/common/gen_utils.py +0 -19
  264. mindspore/ops_generate/common/op_proto.py +11 -4
  265. mindspore/ops_generate/common/template.py +88 -11
  266. mindspore/ops_generate/gen_ops.py +1 -1
  267. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  268. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  269. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  270. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  271. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  272. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  273. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  274. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  275. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  276. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  277. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  278. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  279. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  280. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  281. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  282. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  283. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  284. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  285. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  286. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  287. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  288. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  289. mindspore/parallel/_auto_parallel_context.py +9 -17
  290. mindspore/parallel/_cell_wrapper.py +106 -40
  291. mindspore/parallel/_parallel_serialization.py +4 -3
  292. mindspore/parallel/_ps_context.py +4 -6
  293. mindspore/parallel/_tensor.py +167 -12
  294. mindspore/parallel/_transformer/moe.py +1 -1
  295. mindspore/parallel/_transformer/transformer.py +17 -12
  296. mindspore/parallel/_utils.py +5 -11
  297. mindspore/parallel/auto_parallel.py +33 -12
  298. mindspore/parallel/checkpoint_convert.py +3 -3
  299. mindspore/parallel/checkpoint_transform.py +5 -1
  300. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  301. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  302. mindspore/parallel/cluster/run.py +48 -7
  303. mindspore/parallel/function/__init__.py +8 -1
  304. mindspore/parallel/function/reshard_func.py +7 -6
  305. mindspore/parallel/nn/__init__.py +15 -2
  306. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  307. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  308. mindspore/parallel/shard.py +9 -23
  309. mindspore/parallel/transform_safetensors.py +468 -174
  310. mindspore/pgodb140.dll +0 -0
  311. mindspore/pgort140.dll +0 -0
  312. mindspore/profiler/__init__.py +2 -1
  313. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  314. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  315. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  316. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  317. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  318. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  319. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  321. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  322. mindspore/profiler/analysis/task_manager.py +1 -1
  323. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  324. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  325. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  326. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  327. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  328. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  329. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  330. mindspore/profiler/common/constant.py +16 -0
  331. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  332. mindspore/profiler/common/path_manager.py +9 -0
  333. mindspore/profiler/common/profiler_context.py +50 -29
  334. mindspore/profiler/common/profiler_info.py +0 -16
  335. mindspore/profiler/common/profiler_meta_data.py +1 -0
  336. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  337. mindspore/profiler/common/profiler_output_path.py +23 -8
  338. mindspore/profiler/common/profiler_parameters.py +128 -35
  339. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  340. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  341. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  342. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  343. mindspore/profiler/dynamic_profiler.py +374 -338
  344. mindspore/profiler/envprofiler.py +42 -12
  345. mindspore/profiler/experimental_config.py +112 -7
  346. mindspore/profiler/mstx.py +33 -12
  347. mindspore/profiler/platform/__init__.py +2 -3
  348. mindspore/profiler/platform/cpu_profiler.py +10 -4
  349. mindspore/profiler/platform/npu_profiler.py +30 -20
  350. mindspore/profiler/profiler.py +218 -154
  351. mindspore/profiler/profiler_action_controller.py +65 -77
  352. mindspore/profiler/profiler_interface.py +2 -2
  353. mindspore/profiler/schedule.py +10 -4
  354. mindspore/rewrite/common/config.py +1 -0
  355. mindspore/rewrite/common/namer.py +1 -0
  356. mindspore/rewrite/common/namespace.py +1 -0
  357. mindspore/rewrite/node/node.py +31 -11
  358. mindspore/rewrite/parsers/assign_parser.py +1 -1
  359. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  360. mindspore/run_check/_check_version.py +7 -10
  361. mindspore/runtime/__init__.py +8 -6
  362. mindspore/runtime/event.py +10 -4
  363. mindspore/runtime/executor.py +87 -45
  364. mindspore/runtime/memory.py +22 -30
  365. mindspore/runtime/thread_bind_core.py +299 -165
  366. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  367. mindspore/swresample-4.dll +0 -0
  368. mindspore/swscale-6.dll +0 -0
  369. mindspore/tbbmalloc.dll +0 -0
  370. mindspore/tinyxml2.dll +0 -0
  371. mindspore/train/_utils.py +9 -5
  372. mindspore/train/amp.py +43 -23
  373. mindspore/train/callback/__init__.py +5 -5
  374. mindspore/train/callback/_callback.py +2 -1
  375. mindspore/train/callback/_checkpoint.py +4 -14
  376. mindspore/train/callback/_flops_collector.py +11 -7
  377. mindspore/train/callback/_landscape.py +0 -1
  378. mindspore/train/callback/_train_fault_tolerance.py +72 -18
  379. mindspore/train/data_sink.py +15 -6
  380. mindspore/train/dataset_helper.py +14 -5
  381. mindspore/train/model.py +49 -47
  382. mindspore/train/serialization.py +168 -126
  383. mindspore/train/summary/summary_record.py +13 -2
  384. mindspore/train/train_thor/model_thor.py +2 -2
  385. mindspore/turbojpeg.dll +0 -0
  386. mindspore/utils/__init__.py +3 -2
  387. mindspore/utils/dryrun.py +0 -6
  388. mindspore/utils/runtime_execution_order_check.py +162 -78
  389. mindspore/utils/sdc_detect.py +68 -0
  390. mindspore/utils/utils.py +14 -17
  391. mindspore/vcmeta.dll +0 -0
  392. mindspore/vcruntime140.dll +0 -0
  393. mindspore/vcruntime140_1.dll +0 -0
  394. mindspore/version.py +1 -1
  395. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  396. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
  397. mindspore/_deprecated/jit.py +0 -198
  398. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  399. mindspore/communication/_hccl_management.py +0 -297
  400. mindspore/experimental/es/embedding_service.py +0 -891
  401. mindspore/experimental/es/embedding_service_layer.py +0 -581
  402. mindspore/profiler/common/validator/__init__.py +0 -14
  403. mindspore/profiler/common/validator/validate_path.py +0 -84
  404. mindspore/profiler/parser/__init__.py +0 -14
  405. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  406. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  407. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  408. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  409. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  410. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  411. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  412. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  413. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  414. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  415. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  416. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  417. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  418. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  419. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  420. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  421. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  422. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  423. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  424. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  425. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  426. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  427. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  428. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  429. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  430. mindspore/profiler/parser/container.py +0 -229
  431. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  432. mindspore/profiler/parser/flops_parser.py +0 -531
  433. mindspore/profiler/parser/framework_enum.py +0 -111
  434. mindspore/profiler/parser/framework_parser.py +0 -464
  435. mindspore/profiler/parser/framework_struct.py +0 -61
  436. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  437. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  438. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  439. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  440. mindspore/profiler/parser/hccl_parser.py +0 -573
  441. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  442. mindspore/profiler/parser/integrator.py +0 -526
  443. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  444. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  445. mindspore/profiler/parser/minddata_parser.py +0 -186
  446. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  447. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  448. mindspore/profiler/parser/optime_parser.py +0 -250
  449. mindspore/profiler/parser/profiler_info.py +0 -213
  450. mindspore/profiler/parser/step_trace_parser.py +0 -666
  451. mindspore/utils/hooks.py +0 -81
  452. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  453. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  454. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  455. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -1,72 +0,0 @@
1
- # Copyright 2024 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """Profiler host information parser"""
16
- import os
17
- import json
18
- from decimal import Decimal
19
- from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
20
- from mindspore.profiler.parser.ascend_analysis.constant import Constant
21
- from mindspore.profiler.parser.profiler_info import ProfilerInfo
22
-
23
-
24
- class GPUProfilerInfoParser:
25
- """Parse files that record information, such as profiler_info.json"""
26
-
27
- _freq = 2600000000
28
- _system_time = 0
29
- _system_cnt = 0
30
- _s_to_ns = 1e9
31
- # profiler information related files
32
- _source_path = None
33
- _loaded_frequency = False
34
- _rank_id = 0
35
-
36
- @classmethod
37
- def init_source_path(cls, source_path: str):
38
- """initialize the path of PROF_* directory."""
39
- source_path = validate_and_normalize_path(source_path)
40
- cls._source_path = source_path
41
-
42
- @classmethod
43
- def init_rank_id(cls, rank_id: int):
44
- """initialize the rank id."""
45
- cls._rank_id = rank_id
46
-
47
- @classmethod
48
- def get_local_time(cls, syscnt: int) -> Decimal:
49
- """Convert syscnt to local time."""
50
- if not cls._loaded_frequency:
51
- profiler_info_path = os.path.join(cls._source_path, f"profiler_info_{cls._rank_id}.json")
52
- if not os.path.isfile(profiler_info_path):
53
- raise RuntimeError(f"Can`t find the file {profiler_info_path}, please check !")
54
- with os.fdopen(os.open(profiler_info_path, os.O_RDONLY, 0o600),
55
- 'r') as fr:
56
- profiler_info_data = json.load(fr)
57
- cls._system_cnt = profiler_info_data.get('system_cnt')
58
- cls._system_time = profiler_info_data.get('system_time')
59
- ProfilerInfo.set_system_time(cls._system_cnt)
60
- ProfilerInfo.set_system_cnt(cls._system_time)
61
- cls._loaded_frequency = True
62
-
63
- start_ns = cls._get_timestamp(syscnt)
64
- return Decimal(start_ns).quantize(Decimal('0.000')) * Decimal(Constant.NS_TO_US).quantize(Decimal('0.000'))
65
-
66
- @classmethod
67
- def _get_timestamp(cls, syscnt: int):
68
- """Convert syscnt to time stamp."""
69
- ratio = cls._freq / cls._s_to_ns
70
- # The unit of timestamp is ns
71
- timestamp = (syscnt - cls._system_cnt) / ratio + cls._system_time
72
- return timestamp
@@ -1,573 +0,0 @@
1
- # Copyright 2021 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """The parser for parsing hccl files."""
16
- import csv
17
- import json
18
- import os
19
- import stat
20
- from enum import Enum
21
- import numpy as np
22
-
23
- from mindspore.profiler.common.exceptions.exceptions import \
24
- ProfilerPathErrorException, ProfilerFileNotFoundException, \
25
- ProfilerDirNotFoundException, ProfilerRawFileException
26
- from mindspore import log as logger
27
- from mindspore.profiler.common.validator.validate_path import \
28
- validate_and_normalize_path
29
-
30
-
31
- class CommunicationInfo(Enum):
32
- """
33
- Communication related enumeration types.
34
-
35
- Enum:
36
- RDMA: Communication link between servers in cluster training.
37
- SDMA: Communication link inside server in cluster training.
38
- LOCAL: The operation of this card has no transmission process.
39
- RDMASEND: Communication operator of RDMA link.
40
- REDUCE_INLINE: Communication operator of SDMA link.
41
- MEMCPY: Communication operator of SDMA link.
42
- NOTIFY_RECORD: Communication operator of SDMA link.
43
- NOTIFY_WAIT: operator of LOCAL.
44
- """
45
- RDMA = 'RDMA'
46
- SDMA = 'SDMA'
47
- LOCAL = 'LOCAL'
48
- RDMASEND = 'RDMASend'
49
- REDUCE_INLINE = 'Reduce Inline'
50
- MEMCPY = 'Memcpy'
51
- NOTIFY_RECORD = 'Notify Record'
52
- NOTIFY_WAIT = 'Notify Wait'
53
-
54
-
55
- class HcclParser:
56
- """
57
- The parser for parsing hccl file.
58
-
59
- Args:
60
- source_dir (str): The hccl source dir.
61
- device_id (str): The device ID.
62
- rank_id (str): The rank ID.
63
- output_path (str): The directory of the parsed file. Default: `./`.
64
-
65
- Raises:
66
- ProfilerPathErrorException: If the hccl file path or the output path is invalid.
67
- ProfilerFileNotFoundException: If the hccl file or the output dir does not exist.
68
- """
69
- _parsed_hccl_file_name = 'hccl_raw_{}.csv'
70
- _col_names = ['step_num', 'communication_cost', 'wait_cost', 'link_info', 'communication_operator_cost']
71
-
72
- def __init__(self, source_dir, device_id, rank_id, output_path):
73
- self._dev_id = device_id
74
- self._rank_id = rank_id
75
- self._source_dir = source_dir
76
- self._save_path = self._get_save_path(output_path)
77
- self._step_trace_info = self._get_step_trace_info(output_path)
78
- self._communication_operator_name_mapping_info = self._get_communication_operator_name_mapping_info()
79
-
80
- @staticmethod
81
- def _divide_communication_info_by_thread(trace_events: list):
82
- """Divide information by thread."""
83
- threads_dict = dict()
84
- for item in trace_events:
85
- thread_id = item.get("tid")
86
- if thread_id not in threads_dict.keys():
87
- threads_dict[thread_id] = [item]
88
- else:
89
- threads_dict[thread_id].append(item)
90
- return threads_dict
91
-
92
- @staticmethod
93
- def _calculate_adma_link_info(trace_event: list):
94
- """
95
- Calculate RDMA link info.
96
-
97
- When the link is RDMA,it is necessary to match three consecutive operators RDMASend, RDMASend \
98
- and Notify Wait,and take the sum of the time of the three operators as one communication time.
99
- """
100
- rdma_communication_time = 0
101
- rdma_communication_size = 0
102
- rdma_communication_wait_time = 0
103
- start_index = 0
104
- end_index = len(trace_event) - 1
105
- while start_index < end_index:
106
- first_task_type = trace_event[start_index].get("args").get("task type")
107
- if first_task_type == CommunicationInfo.RDMASEND.value and start_index < end_index - 1:
108
- second_task_type = trace_event[start_index + 1].get("args").get("task type")
109
- third_task_type = trace_event[start_index + 2].get("args").get("task type")
110
- if second_task_type == CommunicationInfo.RDMASEND.value and \
111
- third_task_type == CommunicationInfo.NOTIFY_WAIT.value:
112
- rdma_send_cost = trace_event[start_index].get("dur", 0)
113
- notify_record_cost = trace_event[start_index + 1].get("dur", 0)
114
- notify_wait_cost = trace_event[start_index + 2].get("dur", 0)
115
- rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
116
- rdma_communication_wait_time += notify_wait_cost
117
- rdma_size = trace_event[start_index].get("args").get("size")
118
- if rdma_size:
119
- rdma_size = rdma_size if isinstance(rdma_size, int) else int(rdma_size, 16)
120
- else:
121
- rdma_size = 0
122
- notify_record_size = trace_event[start_index + 1].get("args").get("size")
123
- if notify_record_size:
124
- notify_record_size = notify_record_size if isinstance(notify_record_size, int) \
125
- else int(notify_record_size, 16)
126
- else:
127
- notify_record_size = 0
128
- rdma_communication_size += rdma_size + notify_record_size
129
- start_index += 2
130
- start_index += 1
131
-
132
- # The unit of rdma_communication_wait_time is ms.
133
- # The unit of rdma_bandwidth is KB/s.
134
- # The unit of rdma_communication_size is k_byte and The unit of rdma_communication_time is ms.
135
- rdma_communication_wait_time = rdma_communication_wait_time / 1e3
136
- rdma_communication_size = rdma_communication_size / 1e3
137
- rdma_communication_time = rdma_communication_time / 1e3
138
- rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
139
- if rdma_communication_size else 0
140
-
141
- return [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time]
142
-
143
- @staticmethod
144
- def _calculate_notify_wait_time(trace_event: list):
145
- """Calculate notify wait time."""
146
- total_notify_wait_time = 0
147
- for item in trace_event:
148
- task_type = item.get("args").get("task type")
149
- if task_type == CommunicationInfo.NOTIFY_WAIT.value:
150
- total_notify_wait_time += item.get("dur", 0)
151
- # The unit of total_notify_wait_time is ms.
152
- total_notify_wait_time = total_notify_wait_time / 1e3
153
- return total_notify_wait_time
154
-
155
- @staticmethod
156
- def _parser_link_dict(result_dict, src_dst_key, src_dst_value):
157
- """Parser link info to dict."""
158
- if src_dst_key not in result_dict.keys():
159
- result_dict[src_dst_key] = dict()
160
- for link_key, link_value in src_dst_value.items():
161
- if link_key not in result_dict[src_dst_key].keys():
162
- result_dict[src_dst_key][link_key] = list()
163
- result_dict[src_dst_key][link_key].append(link_value)
164
-
165
- @staticmethod
166
- def _calculate_link_value(link_info: list, calculate_type):
167
- """Calculate link average or total value."""
168
- result_dict = dict()
169
- for item in link_info:
170
- for src_dst_key, src_dst_value in item.items():
171
- HcclParser._parser_link_dict(result_dict, src_dst_key, src_dst_value)
172
- for src_dst_key, src_dst_value in result_dict.items():
173
- for link_key, _ in src_dst_value.items():
174
- if calculate_type == 'average':
175
- result_dict[src_dst_key][link_key] = np.mean(result_dict[src_dst_key][link_key], axis=0).tolist()
176
- if calculate_type == 'total':
177
- result_dict[src_dst_key][link_key] = np.sum(result_dict[src_dst_key][link_key], axis=0).tolist()
178
-
179
- return result_dict
180
-
181
- def parse(self):
182
- """Parse communication info."""
183
- self._parse_and_save(self._source_dir)
184
-
185
- def _parse_communication_cost(self, operators_cost_info, info, operators_dict):
186
- """Parse communication cost."""
187
- for k, v in operators_cost_info.items():
188
- for item in v:
189
- # index0:step_num
190
- if info[0] == item[0]:
191
- operators_dict[k] = item
192
-
193
- def _parse_and_save(self, dir_path):
194
- """Parse and save communication info."""
195
- communication_info_cache = list()
196
- operators_cost_info = self._get_communication_operators_cost_info(dir_path)
197
- for _, v in operators_cost_info.items():
198
- for item in v:
199
- communication_info_cache.append(item)
200
- communication_info_cache = self._merge_communication_info_by_step_num(communication_info_cache)
201
- for info in communication_info_cache:
202
- operators_dict = dict()
203
- self._parse_communication_cost(operators_cost_info, info, operators_dict)
204
- info.append(operators_dict)
205
- # Calculate device communication average.
206
- device_communication_average_value = self._calculate_communication_average_value(communication_info_cache)
207
- # Calculate operator communication average.
208
- operators_average_value = dict()
209
- for k, v in operators_cost_info.items():
210
- average_value = self._calculate_communication_average_value(v)
211
- # The symbol '-' is used to indicate that the line is average information.
212
- average_value.insert(0, '-')
213
- operators_average_value[k] = average_value
214
- device_communication_average_value.append(operators_average_value)
215
- # The symbol '-' is used to indicate that the line is average information.
216
- device_communication_average_value.insert(0, '-')
217
- with open(self._save_path, 'w', newline='') as save_file:
218
- csv_writer = csv.writer(save_file)
219
- csv_writer.writerow(self._col_names)
220
- for item in communication_info_cache:
221
- # item[3]:link_info which is a dictionary that needs to be encoded before it is written to a CSV file.
222
- # item[4]:it is a dictionary that needs to be encoded before it is written to a CSV file.
223
- item[3] = json.dumps(item[3])
224
- item[4] = json.dumps(item[4])
225
- csv_writer.writerow(item)
226
- # device_communication_average_value[3]: average value for link info
227
- # device_communication_average_value[4]: average value for operator info
228
- device_communication_average_value[3] = json.dumps(device_communication_average_value[3])
229
- device_communication_average_value[4] = json.dumps(device_communication_average_value[4])
230
-
231
- csv_writer.writerow(device_communication_average_value)
232
- os.chmod(self._save_path, stat.S_IREAD | stat.S_IWRITE)
233
-
234
- def _get_save_path(self, output_path):
235
- """
236
- Get the save path.
237
-
238
- Args:
239
- output_path (str): The output dir.
240
-
241
- Returns:
242
- str, the save path.
243
- """
244
- output_path = self._validate_dir_path(output_path)
245
- return os.path.join(
246
- output_path, self._parsed_hccl_file_name.format(self._rank_id)
247
- )
248
-
249
- def _get_step_trace_info(self, source_dir):
250
- """Get the start and end timestamps in a step and communication operators names."""
251
- file_path = os.path.join(
252
- source_dir,
253
- f'step_trace_raw_{self._rank_id}_detail_time.csv'
254
- )
255
- try:
256
- file_path = validate_and_normalize_path(file_path)
257
- except RuntimeError as err:
258
- logger.warning('file path is invalid.')
259
- raise ProfilerPathErrorException('file path is invalid.') from err
260
- if not os.path.isfile(file_path):
261
- logger.warning('The step trace file <%s> not found.', file_path)
262
- raise ProfilerFileNotFoundException(file_path)
263
-
264
- with open(file_path, 'r') as src_file:
265
- csv_reader = csv.reader(src_file)
266
- # The first row of step trace file is like: step_num, start_point,...,communication_operator_name.
267
- # The position number of the first communication operator name is 9.
268
- communication_operators_names = next(csv_reader)[9:]
269
-
270
- # index_0:step_num, index_1:start_point, index_2:end_point
271
- # The unit of time stamp is 10ns. To convert it to μs, you need to divide it by 100.
272
- step_timestamps_info = [
273
- [info[0], float(info[1]) / 100, float(info[2]) / 100]
274
- for info in csv_reader if info[0].isdigit()
275
- ]
276
-
277
- return [communication_operators_names, step_timestamps_info]
278
-
279
- def _get_communication_operator_name_mapping_info(self):
280
- """Get the name of communication operators mapping between hccl and step trace."""
281
- dir_path = self._validate_dir_path(self._source_dir)
282
- # The name of the operator in hccl is like: operatorName_{Ordered_number}_xx_xx.
283
- operators_names_in_hccl = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
284
- operators_names_in_hccl_set = set({i.split('_')[0] for i in operators_names_in_hccl})
285
- op_names_in_hccl_dic = dict()
286
- for item in operators_names_in_hccl_set:
287
- op_names_in_hccl_dic[item] = sorted([i for i in operators_names_in_hccl if i.split('_')[0] == item],
288
- key=lambda x: int(x.split('_')[1]))
289
-
290
- # The op_info in step trace is like: [op_name,op_name_start_point,op_name_end_point]
291
- # The name of the operator in step trace can be obtained every three.
292
- # The name of the operator in step trace is like: stream_xx_xx_operatorName-opxx.
293
- operators_names_in_step_trace = [self._step_trace_info[0][i]
294
- for i in range(0, len(self._step_trace_info[0]), 3)]
295
- op_names_in_step_trace_set = set({op_name.split('/')[-1].split('-')[0].split('_')[-1]
296
- for op_name in operators_names_in_step_trace})
297
- op_names_in_step_trace_dic = dict()
298
- for item in op_names_in_step_trace_set:
299
- op_names_in_step_trace_dic[item] = [
300
- op_name for op_name in operators_names_in_step_trace
301
- if op_name.split('/')[-1].split('-')[0].split('_')[-1] == item
302
- ]
303
-
304
- communication_operator_mapping_info = dict()
305
- for hccl_key, hccl_value in op_names_in_hccl_dic.items():
306
- for step_trace_key, step_trace_value in op_names_in_step_trace_dic.items():
307
- # the step_trace_key format is: operatorName
308
- if hccl_key.lower() == step_trace_key.lower().split('/')[-1]:
309
- communication_operator_mapping_info[hccl_key] = list(zip(hccl_value, step_trace_value))
310
-
311
- logger.info("Communication operator name mapping info is %s", communication_operator_mapping_info)
312
-
313
- return communication_operator_mapping_info
314
-
315
- def _calculate_the_step_by_timestamp(self, timestamp):
316
- """Calculate the step according to the timestamp."""
317
- # index0:communication_operator_name, index1:step_timestamps_info
318
- step_timestamps_info = self._step_trace_info[1]
319
- step_timestamps_len = len(step_timestamps_info)
320
- # index_0:step_num, index_1:start_point, index_2:end_point
321
- if timestamp < step_timestamps_info[0][1]:
322
- step_num = "1"
323
- elif step_timestamps_info[step_timestamps_len - 1][2] < timestamp:
324
- step_num = step_timestamps_info[step_timestamps_len - 1][0]
325
- else:
326
- for item in step_timestamps_info:
327
- if item[1] <= timestamp < item[2]:
328
- step_num = item[0]
329
- return step_num
330
-
331
- def _get_communication_operators_cost_info(self, dir_path):
332
- """Obtain time-consuming information of all communication operators."""
333
- operators_cost_info = dict()
334
- dir_path = self._validate_dir_path(dir_path)
335
- operators_dir = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
336
- operator_dir_path = [os.path.join(dir_path, operator_dir) for operator_dir in operators_dir]
337
- for operator_dir in operator_dir_path:
338
- operator_cost = self._calculate_communication_operator_cost(operator_dir)
339
- operator_name = os.path.basename(operator_dir)
340
- op_mapping_info = self._communication_operator_name_mapping_info.get(operator_name.split('_')[0], [])
341
- # index1: operator name in step trace.
342
- op_mapping_name = [item[1] for item in op_mapping_info if item[0] == operator_name]
343
- if not op_mapping_name:
344
- logger.warning("The mapping relationship between op name in hccl and op name in step trace "
345
- "cannot be found. Use op name in hccl to show the name of the communication operator.")
346
- else:
347
- operator_name = op_mapping_name[0]
348
- operators_cost_info[operator_name] = operator_cost
349
- return operators_cost_info
350
-
351
- def _calculate_communication_operator_cost(self, dir_path):
352
- """Calculate communication operator cost. Such as allReduce_1,allReduce_2."""
353
- dir_path = self._validate_dir_path(dir_path)
354
- files = [entry.name for entry in os.scandir(dir_path) if entry.is_file()]
355
- files_path = [os.path.join(dir_path, file) for file in files]
356
- operator_cost = list(map(self._calculate_communication_operator_iter_cost, files_path))
357
- # Add the same step_num merge.
358
- steps_operator_cost = self._merge_communication_info_by_step_num(operator_cost)
359
- return steps_operator_cost
360
-
361
- def _merge_communication_info_by_step_num(self, communication_info: list):
362
- """According to step num to merge communication info."""
363
- steps_communication_info = list()
364
- info_set = set()
365
- for item in communication_info:
366
- # index0:step_num,index1:communication_cost,index2:communication_wait_cost,index3:link_info
367
- if item[0].isdigit():
368
- info_set.add(int(item[0]))
369
- info_set = sorted(info_set)
370
- for item in info_set:
371
- item = str(item)
372
- step_communication_info = [info for info in communication_info if info[0] == item]
373
- step_communication_cost = sum([i[1] for i in step_communication_info])
374
- step_communication_wait_cost = sum([i[2] for i in step_communication_info])
375
- step_communication_link = self._calculate_link_value([i[3] for i in step_communication_info], "total")
376
- steps_communication_info.append([item, step_communication_cost,
377
- step_communication_wait_cost, step_communication_link])
378
- return steps_communication_info
379
-
380
- def _calculate_communication_operator_iter_cost(self, file_path):
381
- """Calculate the time-consuming of communication operator in one execution round."""
382
-
383
- def _inner_calculate_communication_operator_iter_cost(events):
384
- total_notify_wait = HcclParser._calculate_notify_wait_time(events)
385
- # Divide information by src dst rank_id.
386
- src_dst_dict = self._divide_communication_info_by_src_dst_rank(events)
387
- src_dst_link_info = self._calculate_src_dst_link_info(src_dst_dict)
388
- communication_cost, communication_wait = self._calculate_device_communication_cost(src_dst_link_info)
389
- total_notify_wait -= communication_wait
390
- return [communication_cost, total_notify_wait, src_dst_link_info]
391
-
392
- file_path = self._validate_file_path(file_path)
393
- with open(file_path, 'r') as src_file:
394
- try:
395
- operator_info = json.load(src_file)
396
- except (json.JSONDecodeError, TypeError) as err:
397
- logger.warning(err)
398
- raise ProfilerRawFileException('Fail to parse operator file.') from err
399
- trace_events = operator_info.get("traceEvents")
400
- operator_timestamp = trace_events[0].get("ts", 0)
401
- step_id = self._calculate_the_step_by_timestamp(operator_timestamp)
402
- # Statistics of communication operators in all streams.
403
- total_communication_operator_iter_cost = \
404
- _inner_calculate_communication_operator_iter_cost(trace_events)
405
- # Statistics of communication operators in mainstream.
406
- threads_dict = self._divide_communication_info_by_thread(trace_events)
407
- # The largest value is mainstream.
408
- major_thread = sorted(threads_dict, reverse=True)[0]
409
- major_thread_trace_events = threads_dict.get(major_thread)
410
- mainstream_communication_operator_iter_cost = \
411
- _inner_calculate_communication_operator_iter_cost(major_thread_trace_events)
412
- # index0:communication_cost,index1:communication_wait_cost,index2:link_info
413
- return [step_id, mainstream_communication_operator_iter_cost[0],
414
- mainstream_communication_operator_iter_cost[1],
415
- total_communication_operator_iter_cost[2]]
416
-
417
- def _divide_communication_info_by_src_dst_rank(self, trace_event: list):
418
- """Divide information by src rank id and dst rank id"""
419
- src_dst_dict = dict()
420
- for item in trace_event:
421
- src_rank = item.get("args").get("src rank")
422
- dst_rank = item.get("args").get("dst rank")
423
- if src_rank is None or dst_rank is None:
424
- continue
425
-
426
- # When the SDMA operation is in the card,
427
- # the source card or destination card is 0xffffffff, and it needs to be converted to localrank.
428
- if int(src_rank) == int('0xffffffff', 16):
429
- src_rank = dst_rank
430
-
431
- if int(dst_rank) == int('0xffffffff', 16):
432
- dst_rank = src_rank
433
-
434
- if item.get("args").get("transport type") == CommunicationInfo.LOCAL.value:
435
- item["args"]["src rank"] = dst_rank
436
- item["args"]["dst rank"] = src_rank
437
- src_dst_key = str(dst_rank) + '-' + str(src_rank)
438
- else:
439
- src_dst_key = str(src_rank) + '-' + str(dst_rank)
440
-
441
- if src_dst_key not in src_dst_dict.keys():
442
- src_dst_dict[src_dst_key] = [item]
443
- else:
444
- src_dst_dict[src_dst_key].append(item)
445
- return src_dst_dict
446
-
447
- def _divide_communication_info_by_link_type(self, trace_event: list):
448
- """Divide information by link type."""
449
- link_type_dict = dict()
450
- for item in trace_event:
451
- link_type_key = item.get("args").get("transport type")
452
- if link_type_key is None:
453
- continue
454
- if link_type_key in (CommunicationInfo.RDMA.value, CommunicationInfo.SDMA.value):
455
- task_type = item.get("args").get("task type")
456
- # Filter out the Notify Record operator in SDMA, because it does not transmit the actual amount of data.
457
- if task_type == CommunicationInfo.NOTIFY_RECORD.value:
458
- continue
459
- if link_type_dict.get(link_type_key):
460
- link_type_dict[link_type_key].append(item)
461
- else:
462
- link_type_dict[link_type_key] = [item]
463
- if link_type_key == CommunicationInfo.LOCAL.value:
464
- if link_type_dict.get(CommunicationInfo.RDMA.value):
465
- link_type_dict[CommunicationInfo.RDMA.value].append(item)
466
- return link_type_dict
467
-
468
- def _calculate_device_communication_cost(self, src_dst_link_info: dict):
469
- """Calculate notify wait time."""
470
- total_communication_time = 0
471
- total_wait_time = 0
472
- for src_dst_value in src_dst_link_info.values():
473
- for link_type_value in src_dst_value.values():
474
- # time_cost:0,size_cost:1,brand_width:2,wait_time:3
475
- total_communication_time += link_type_value[0]
476
- if len(link_type_value) > 3:
477
- total_wait_time += link_type_value[3]
478
- return total_communication_time, total_wait_time
479
-
480
- def _parse_link_cost(self, result_dict, key, link_type_dict):
481
- """Parse link cost."""
482
- for link_type_key, link_type_value in link_type_dict.items():
483
- if link_type_key == CommunicationInfo.RDMA.value:
484
- # Divide information by thread.
485
- rdma_infos = []
486
- threads_dict = self._divide_communication_info_by_thread(link_type_value)
487
- for thread_value in threads_dict.values():
488
- rdma_info = self._calculate_adma_link_info(thread_value)
489
- rdma_infos.append(rdma_info)
490
- rdma_total_cost = np.sum(rdma_infos, axis=0).tolist()
491
- result_dict[key][link_type_key] = rdma_total_cost
492
- if link_type_key == CommunicationInfo.SDMA.value:
493
- sdma_total_cost = self._calculate_sdma_link_info(link_type_value)
494
- result_dict[key][link_type_key] = sdma_total_cost
495
-
496
- def _calculate_src_dst_link_info(self, src_dst_dict: dict):
497
- """Calculate src dst link info."""
498
- result_dict = dict()
499
- for k, v in src_dst_dict.items():
500
- # Divide information by link type.
501
- link_type_dict = self._divide_communication_info_by_link_type(v)
502
- if not link_type_dict:
503
- continue
504
- result_dict[k] = dict()
505
- self._parse_link_cost(result_dict, k, link_type_dict)
506
- return result_dict
507
-
508
- def _calculate_sdma_link_info(self, trace_event: list):
509
- """
510
- Calculate SDMA link info.
511
-
512
- When the link is SDMA, the communication time of the primary link is the sum of the execution time\
513
- of Reduce inline and Memcpy operators.
514
- """
515
- sdma_communication_time = 0
516
- sdma_communication_size = 0
517
-
518
- for item in trace_event:
519
- task_type = item.get("args").get("task type")
520
- if task_type in (CommunicationInfo.REDUCE_INLINE.value, CommunicationInfo.MEMCPY.value):
521
- sdma_communication_time += item.get("dur", 0)
522
- sdma_size = item.get("args").get("size")
523
- if sdma_size:
524
- sdma_size = sdma_size if isinstance(sdma_size, int) else int(sdma_size, 16)
525
- else:
526
- sdma_size = 0
527
-
528
- sdma_communication_size += sdma_size
529
-
530
- # The unit of sdma_bandwidth is KB/s.
531
- # The unit of sdma_communication_size is k_byte and The unit of sdma_communication_time is ms.
532
- sdma_communication_time = sdma_communication_time / 1e3
533
- sdma_communication_size = sdma_communication_size / 1e3
534
- sdma_bandwidth = sdma_communication_size / (sdma_communication_time / 1e3) \
535
- if sdma_communication_size else 0
536
- return [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
537
-
538
- def _calculate_communication_average_value(self, communication_info: list):
539
- """Calculate communication average value."""
540
- communication_info_size = len(communication_info)
541
- if communication_info_size == 0:
542
- return []
543
- # index1: communication_cost,index2:wait_cost,index3:link_info
544
- communication_cost_average = sum([i[1] for i in communication_info]) / communication_info_size
545
- wait_cost_average = sum([i[2] for i in communication_info]) / communication_info_size
546
- link_info = [i[3] for i in communication_info]
547
- calculate_type = 'average'
548
- link_average_info = HcclParser._calculate_link_value(link_info, calculate_type)
549
- return [communication_cost_average, wait_cost_average, link_average_info]
550
-
551
- def _validate_file_path(self, file_path):
552
- """Validate file path."""
553
- try:
554
- file_path = validate_and_normalize_path(file_path)
555
- except RuntimeError as err:
556
- logger.warning('file path is invalid.')
557
- raise ProfilerPathErrorException('file path is invalid.') from err
558
- if not os.path.isfile(file_path):
559
- logger.warning('The file <%s> not found.', file_path)
560
- raise ProfilerFileNotFoundException(file_path)
561
- return file_path
562
-
563
- def _validate_dir_path(self, dir_path):
564
- """Validate dir path."""
565
- try:
566
- dir_path = validate_and_normalize_path(dir_path)
567
- except RuntimeError as err:
568
- logger.warning('dir path is invalid.')
569
- raise ProfilerPathErrorException('dir path is invalid.') from err
570
- if not os.path.isdir(dir_path):
571
- logger.warning('The dir <%s> not found.', dir_path)
572
- raise ProfilerDirNotFoundException(dir_path)
573
- return dir_path