mindspore 2.1.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.10__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (580) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +4 -1
  3. mindspore/_akg/akg/build_module.py +5 -6
  4. mindspore/_akg/akg/composite/build_module.py +46 -19
  5. mindspore/_akg/akg/composite/split_stitch.py +10 -11
  6. mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
  7. mindspore/_akg/akg/tvm/api.py +4 -3
  8. mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
  9. mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
  10. mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
  11. mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
  12. mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
  13. mindspore/_akg/akg/tvm/build_module.py +16 -1
  14. mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
  15. mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
  16. mindspore/_akg/akg/tvm/ir_builder.py +1 -1
  17. mindspore/_akg/akg/tvm/module.py +1 -2
  18. mindspore/_akg/akg/tvm/stmt.py +2 -2
  19. mindspore/_akg/akg/utils/ascend_profilier/__init__.py +0 -0
  20. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  21. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  22. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  23. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  24. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  25. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  26. mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
  27. mindspore/_akg/akg/utils/kernel_exec.py +98 -274
  28. mindspore/_akg/akg/utils/result_analysis.py +4 -24
  29. mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
  30. mindspore/_akg/akg/utils/util.py +38 -0
  31. mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
  32. mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
  33. mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
  34. mindspore/_check_jit_forbidden_api.py +3 -1
  35. mindspore/_checkparam.py +23 -29
  36. mindspore/_extends/graph_kernel/__init__.py +0 -1
  37. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  38. mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
  39. mindspore/_extends/graph_kernel/splitter.py +4 -11
  40. mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
  41. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
  42. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
  43. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  44. mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
  45. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
  46. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
  47. mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
  48. mindspore/_extends/parse/__init__.py +12 -15
  49. mindspore/_extends/parse/namespace.py +7 -33
  50. mindspore/_extends/parse/parser.py +61 -71
  51. mindspore/_extends/parse/resources.py +1 -1
  52. mindspore/_extends/parse/standard_method.py +74 -104
  53. mindspore/_extends/parse/trope.py +1 -1
  54. mindspore/_extends/remote/kernel_build_server.py +25 -7
  55. mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
  56. mindspore/_install_custom.py +43 -0
  57. mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
  58. mindspore/amp.py +47 -11
  59. mindspore/bin/cache_admin +0 -0
  60. mindspore/bin/cache_server +0 -0
  61. mindspore/boost/boost.py +1 -8
  62. mindspore/boost/boost_cell_wrapper.py +3 -2
  63. mindspore/boost/grad_accumulation.py +1 -1
  64. mindspore/boost/group_loss_scale_manager.py +8 -7
  65. mindspore/common/__init__.py +5 -3
  66. mindspore/common/_jit_fallback_utils.py +6 -0
  67. mindspore/common/_register_for_adapter.py +2 -0
  68. mindspore/common/_register_for_tensor.py +2 -2
  69. mindspore/common/_stub_tensor.py +13 -0
  70. mindspore/common/_utils.py +13 -0
  71. mindspore/common/api.py +174 -259
  72. mindspore/common/auto_dynamic_shape.py +494 -0
  73. mindspore/common/dtype.py +18 -11
  74. mindspore/common/dump.py +6 -4
  75. mindspore/common/initializer.py +14 -14
  76. mindspore/common/jit_config.py +33 -15
  77. mindspore/common/lazy_inline.py +126 -7
  78. mindspore/common/mindir_util.py +101 -0
  79. mindspore/common/parameter.py +51 -41
  80. mindspore/common/seed.py +4 -4
  81. mindspore/common/sparse_tensor.py +13 -14
  82. mindspore/common/tensor.py +243 -165
  83. mindspore/communication/__init__.py +7 -4
  84. mindspore/communication/_comm_helper.py +83 -4
  85. mindspore/communication/management.py +152 -84
  86. mindspore/config/op_info.config +14 -3
  87. mindspore/config/super_bar_config.json +4 -2
  88. mindspore/context.py +152 -61
  89. mindspore/dataset/__init__.py +5 -5
  90. mindspore/dataset/audio/__init__.py +2 -2
  91. mindspore/dataset/audio/transforms.py +52 -52
  92. mindspore/dataset/callback/ds_callback.py +16 -2
  93. mindspore/dataset/core/config.py +68 -51
  94. mindspore/dataset/engine/cache_client.py +28 -5
  95. mindspore/dataset/engine/datasets.py +250 -112
  96. mindspore/dataset/engine/datasets_audio.py +43 -211
  97. mindspore/dataset/engine/datasets_standard_format.py +16 -35
  98. mindspore/dataset/engine/datasets_text.py +43 -67
  99. mindspore/dataset/engine/datasets_user_defined.py +86 -100
  100. mindspore/dataset/engine/datasets_vision.py +219 -1029
  101. mindspore/dataset/engine/iterators.py +11 -4
  102. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
  103. mindspore/dataset/engine/obs/util.py +3 -0
  104. mindspore/dataset/engine/samplers.py +1 -1
  105. mindspore/dataset/engine/validators.py +19 -5
  106. mindspore/dataset/text/__init__.py +3 -3
  107. mindspore/dataset/text/transforms.py +101 -127
  108. mindspore/dataset/text/utils.py +205 -138
  109. mindspore/dataset/transforms/__init__.py +1 -1
  110. mindspore/dataset/transforms/py_transforms_util.py +40 -12
  111. mindspore/dataset/transforms/transforms.py +95 -40
  112. mindspore/dataset/utils/browse_dataset.py +8 -2
  113. mindspore/dataset/utils/line_reader.py +17 -19
  114. mindspore/dataset/vision/__init__.py +3 -3
  115. mindspore/dataset/vision/c_transforms.py +6 -3
  116. mindspore/dataset/vision/transforms.py +409 -287
  117. mindspore/dataset/vision/utils.py +13 -14
  118. mindspore/dataset/vision/validators.py +11 -1
  119. mindspore/experimental/map_parameter.py +14 -0
  120. mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
  121. mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
  122. mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
  123. mindspore/experimental/optim/lr_scheduler.py +1427 -0
  124. mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
  125. mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
  126. mindspore/gen_ops.py +273 -0
  127. mindspore/include/OWNERS +0 -1
  128. mindspore/include/api/data_type.h +2 -1
  129. mindspore/include/api/graph.h +0 -15
  130. mindspore/include/api/kernel.h +2 -0
  131. mindspore/include/api/kernel_api.h +37 -12
  132. mindspore/include/api/model.h +17 -14
  133. mindspore/include/api/status.h +8 -3
  134. mindspore/include/api/types.h +37 -4
  135. mindspore/include/c_api/ms/abstract.h +67 -0
  136. mindspore/include/c_api/ms/attribute.h +197 -0
  137. mindspore/include/c_api/ms/base/handle_types.h +43 -0
  138. mindspore/include/c_api/ms/base/macros.h +32 -0
  139. mindspore/include/c_api/ms/base/status.h +33 -0
  140. mindspore/include/c_api/ms/base/types.h +282 -0
  141. mindspore/include/c_api/ms/context.h +102 -0
  142. mindspore/include/c_api/ms/graph.h +160 -0
  143. mindspore/include/c_api/ms/node.h +606 -0
  144. mindspore/include/c_api/ms/tensor.h +161 -0
  145. mindspore/include/c_api/ms/value.h +84 -0
  146. mindspore/include/dataset/constants.h +6 -5
  147. mindspore/include/dataset/execute.h +23 -13
  148. mindspore/include/dataset/text.h +26 -26
  149. mindspore/include/dataset/transforms.h +13 -13
  150. mindspore/include/dataset/vision.h +60 -60
  151. mindspore/include/dataset/vision_ascend.h +5 -6
  152. mindspore/include/dataset/vision_lite.h +17 -17
  153. mindspore/include/mindapi/base/type_id.h +1 -0
  154. mindspore/include/mindapi/base/types.h +1 -0
  155. mindspore/lib/libdnnl.so.2 +0 -0
  156. mindspore/lib/libjemalloc.so.2 +0 -0
  157. mindspore/lib/libmindspore.so +0 -0
  158. mindspore/lib/libmindspore_backend.so +0 -0
  159. mindspore/lib/libmindspore_common.so +0 -0
  160. mindspore/lib/libmindspore_core.so +0 -0
  161. mindspore/lib/libmindspore_glog.so.0 +0 -0
  162. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  163. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  164. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  165. mindspore/lib/libmindspore_shared_lib.so +0 -0
  166. mindspore/lib/libnnacl.so +0 -0
  167. mindspore/lib/libopencv_core.so.4.5 +0 -0
  168. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  169. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  170. mindspore/lib/libps_cache.so +0 -0
  171. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  172. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  173. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  174. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  175. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  176. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  177. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  178. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  179. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  180. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  181. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  182. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  183. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  184. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  185. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  186. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8928 -0
  187. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  188. mindspore/lib/plugin/ascend/libakg.so +0 -0
  189. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  190. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  191. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  192. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  193. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  194. mindspore/lib/plugin/cpu/libakg.so +0 -0
  195. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  196. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  197. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  198. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  199. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  200. mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
  201. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  202. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  203. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  204. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  205. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  206. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  207. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  208. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  209. mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
  210. mindspore/nn/__init__.py +0 -2
  211. mindspore/nn/cell.py +313 -74
  212. mindspore/nn/dynamic_lr.py +21 -21
  213. mindspore/nn/layer/activation.py +22 -30
  214. mindspore/nn/layer/basic.py +15 -13
  215. mindspore/nn/layer/channel_shuffle.py +1 -1
  216. mindspore/nn/layer/container.py +271 -9
  217. mindspore/nn/layer/conv.py +323 -204
  218. mindspore/nn/layer/dense.py +8 -5
  219. mindspore/nn/layer/embedding.py +33 -27
  220. mindspore/nn/layer/flash_attention.py +141 -88
  221. mindspore/nn/layer/image.py +8 -6
  222. mindspore/nn/layer/math.py +16 -25
  223. mindspore/nn/layer/normalization.py +107 -66
  224. mindspore/nn/layer/padding.py +1 -1
  225. mindspore/nn/layer/pooling.py +131 -109
  226. mindspore/nn/layer/rnn_cells.py +27 -22
  227. mindspore/nn/layer/rnns.py +13 -16
  228. mindspore/nn/layer/thor_layer.py +1 -1
  229. mindspore/nn/layer/transformer.py +221 -154
  230. mindspore/nn/learning_rate_schedule.py +9 -1
  231. mindspore/nn/loss/loss.py +235 -174
  232. mindspore/nn/optim/ada_grad.py +2 -1
  233. mindspore/nn/optim/adadelta.py +1 -0
  234. mindspore/nn/optim/adafactor.py +2 -1
  235. mindspore/nn/optim/adam.py +7 -4
  236. mindspore/nn/optim/adamax.py +3 -2
  237. mindspore/nn/optim/adasum.py +2 -2
  238. mindspore/nn/optim/asgd.py +2 -3
  239. mindspore/nn/optim/ftrl.py +6 -5
  240. mindspore/nn/optim/lamb.py +7 -4
  241. mindspore/nn/optim/lars.py +1 -1
  242. mindspore/nn/optim/lazyadam.py +5 -3
  243. mindspore/nn/optim/momentum.py +2 -1
  244. mindspore/nn/optim/optimizer.py +53 -4
  245. mindspore/nn/optim/proximal_ada_grad.py +3 -4
  246. mindspore/nn/optim/rmsprop.py +4 -3
  247. mindspore/nn/optim/rprop.py +23 -12
  248. mindspore/nn/optim/sgd.py +26 -11
  249. mindspore/nn/optim/thor.py +9 -7
  250. mindspore/nn/probability/bijector/bijector.py +5 -5
  251. mindspore/nn/probability/bijector/power_transform.py +27 -27
  252. mindspore/nn/probability/bijector/softplus.py +3 -3
  253. mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
  254. mindspore/nn/probability/distribution/bernoulli.py +5 -5
  255. mindspore/nn/probability/distribution/beta.py +3 -3
  256. mindspore/nn/probability/distribution/categorical.py +7 -7
  257. mindspore/nn/probability/distribution/cauchy.py +0 -1
  258. mindspore/nn/probability/distribution/distribution.py +3 -3
  259. mindspore/nn/probability/distribution/gamma.py +3 -3
  260. mindspore/nn/probability/distribution/geometric.py +4 -4
  261. mindspore/nn/probability/distribution/gumbel.py +4 -4
  262. mindspore/nn/probability/distribution/log_normal.py +2 -2
  263. mindspore/nn/probability/distribution/logistic.py +2 -2
  264. mindspore/nn/probability/distribution/poisson.py +4 -4
  265. mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
  266. mindspore/nn/probability/distribution/uniform.py +6 -6
  267. mindspore/nn/wrap/cell_wrapper.py +84 -34
  268. mindspore/nn/wrap/grad_reducer.py +8 -5
  269. mindspore/nn/wrap/loss_scale.py +105 -42
  270. mindspore/numpy/array_creations.py +1 -2
  271. mindspore/numpy/array_ops.py +3 -2
  272. mindspore/numpy/utils_const.py +5 -5
  273. mindspore/offline_debug/convert_async.py +2 -2
  274. mindspore/ops/_grad_experimental/__init__.py +0 -5
  275. mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
  276. mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
  277. mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
  278. mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
  279. mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
  280. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
  281. mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
  282. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  283. mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
  284. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
  285. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
  286. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
  287. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
  288. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
  289. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
  290. mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
  291. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  292. mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
  293. mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
  294. mindspore/ops/_op_impl/aicpu/eps.py +32 -0
  295. mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
  296. mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
  297. mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
  298. mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
  299. mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
  300. mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
  301. mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
  302. mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
  303. mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
  304. mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
  305. mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
  306. mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
  307. mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
  308. mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
  309. mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
  310. mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
  311. mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
  312. mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
  313. mindspore/ops/_op_impl/tbe/__init__.py +4 -4
  314. mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
  315. mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
  316. mindspore/ops/_primitive_cache.py +1 -1
  317. mindspore/ops/_tracefunc.py +45 -13
  318. mindspore/ops/_utils/utils.py +6 -1
  319. mindspore/ops/_vmap/vmap_array_ops.py +3 -3
  320. mindspore/ops/_vmap/vmap_base.py +3 -3
  321. mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
  322. mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
  323. mindspore/ops/_vmap/vmap_math_ops.py +5 -2
  324. mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
  325. mindspore/ops/arg_dtype_cast.py +54 -0
  326. mindspore/ops/composite/base.py +37 -10
  327. mindspore/ops/composite/math_ops.py +5 -4
  328. mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
  329. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
  330. mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
  331. mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
  332. mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
  333. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  334. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
  335. mindspore/ops/deprecated.py +304 -0
  336. mindspore/ops/function/__init__.py +4 -1
  337. mindspore/ops/function/array_func.py +174 -193
  338. mindspore/ops/function/clip_func.py +81 -13
  339. mindspore/ops/function/debug_func.py +1 -1
  340. mindspore/ops/function/grad/grad_func.py +18 -9
  341. mindspore/ops/function/image_func.py +10 -4
  342. mindspore/ops/function/linalg_func.py +5 -5
  343. mindspore/ops/function/math_func.py +575 -386
  344. mindspore/ops/function/nn_func.py +568 -260
  345. mindspore/ops/function/random_func.py +88 -57
  346. mindspore/ops/function/sparse_func.py +1 -1
  347. mindspore/ops/function/sparse_unary_func.py +14 -12
  348. mindspore/ops/function/vmap_func.py +6 -5
  349. mindspore/ops/functional.py +15 -10
  350. mindspore/ops/op_info_register.py +244 -25
  351. mindspore/ops/operations/__init__.py +28 -19
  352. mindspore/ops/operations/_grad_ops.py +72 -7
  353. mindspore/ops/operations/_inner_ops.py +350 -17
  354. mindspore/ops/operations/_quant_ops.py +4 -8
  355. mindspore/ops/operations/_sequence_ops.py +42 -0
  356. mindspore/ops/operations/array_ops.py +68 -282
  357. mindspore/ops/operations/comm_ops.py +107 -59
  358. mindspore/ops/operations/custom_ops.py +94 -70
  359. mindspore/ops/operations/debug_ops.py +8 -4
  360. mindspore/ops/operations/image_ops.py +18 -12
  361. mindspore/ops/operations/inner_ops.py +26 -3
  362. mindspore/ops/operations/math_ops.py +189 -141
  363. mindspore/ops/operations/nn_ops.py +794 -489
  364. mindspore/ops/operations/other_ops.py +0 -22
  365. mindspore/ops/operations/random_ops.py +53 -111
  366. mindspore/ops/operations/sparse_ops.py +3 -1
  367. mindspore/ops/primitive.py +24 -18
  368. mindspore/parallel/_auto_parallel_context.py +68 -8
  369. mindspore/parallel/_cost_model_context.py +2 -2
  370. mindspore/parallel/_offload_context.py +17 -3
  371. mindspore/parallel/_parallel_serialization.py +12 -5
  372. mindspore/parallel/_ps_context.py +12 -0
  373. mindspore/parallel/_tensor.py +18 -13
  374. mindspore/parallel/_transformer/layers.py +5 -3
  375. mindspore/parallel/_transformer/loss.py +1 -0
  376. mindspore/parallel/_transformer/moe.py +2 -2
  377. mindspore/parallel/_transformer/op_parallel_config.py +12 -1
  378. mindspore/parallel/_transformer/transformer.py +23 -3
  379. mindspore/parallel/_utils.py +11 -7
  380. mindspore/parallel/algo_parameter_config.py +85 -5
  381. mindspore/parallel/checkpoint_transform.py +19 -12
  382. mindspore/parallel/shard.py +21 -14
  383. mindspore/profiler/common/struct_type.py +3 -3
  384. mindspore/profiler/common/util.py +4 -2
  385. mindspore/profiler/envprofiling.py +1 -1
  386. mindspore/profiler/parser/aicpu_data_parser.py +5 -3
  387. mindspore/profiler/parser/ascend_flops_generator.py +2 -2
  388. mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
  389. mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
  390. mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
  391. mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
  392. mindspore/profiler/parser/ascend_op_generator.py +6 -6
  393. mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
  394. mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
  395. mindspore/profiler/parser/base_timeline_generator.py +10 -8
  396. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
  397. mindspore/profiler/parser/flops_parser.py +15 -11
  398. mindspore/profiler/parser/framework_parser.py +38 -22
  399. mindspore/profiler/parser/hccl_parser.py +16 -12
  400. mindspore/profiler/parser/integrator.py +22 -11
  401. mindspore/profiler/parser/memory_usage_parser.py +2 -2
  402. mindspore/profiler/parser/minddata_analyzer.py +12 -14
  403. mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
  404. mindspore/profiler/parser/msadvisor_parser.py +8 -4
  405. mindspore/profiler/parser/op_intermediate_parser.py +5 -2
  406. mindspore/profiler/parser/optime_parser.py +1 -1
  407. mindspore/profiler/parser/profiler_info.py +21 -2
  408. mindspore/profiler/parser/step_trace_parser.py +11 -14
  409. mindspore/profiler/profiling.py +179 -89
  410. mindspore/rewrite/api/node.py +102 -19
  411. mindspore/rewrite/api/node_type.py +5 -1
  412. mindspore/rewrite/api/pattern_engine.py +1 -1
  413. mindspore/rewrite/api/scoped_value.py +9 -17
  414. mindspore/rewrite/api/symbol_tree.py +131 -47
  415. mindspore/rewrite/ast_helpers/__init__.py +2 -1
  416. mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
  417. mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
  418. mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
  419. mindspore/rewrite/common/rewrite_elog.py +5 -1
  420. mindspore/rewrite/namer.py +33 -24
  421. mindspore/rewrite/namespace.py +14 -5
  422. mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
  423. mindspore/rewrite/node/call_function.py +79 -0
  424. mindspore/rewrite/node/cell_container.py +135 -0
  425. mindspore/rewrite/node/control_flow.py +88 -0
  426. mindspore/rewrite/{node.py → node/node.py} +273 -234
  427. mindspore/rewrite/node/node_manager.py +254 -0
  428. mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
  429. mindspore/rewrite/parsers/arguments_parser.py +22 -21
  430. mindspore/rewrite/parsers/assign_parser.py +216 -221
  431. mindspore/rewrite/parsers/attribute_parser.py +9 -7
  432. mindspore/rewrite/parsers/class_def_parser.py +174 -113
  433. mindspore/rewrite/parsers/constant_parser.py +9 -6
  434. mindspore/rewrite/parsers/container_parser.py +9 -7
  435. mindspore/rewrite/parsers/for_parser.py +36 -15
  436. mindspore/rewrite/parsers/function_def_parser.py +24 -16
  437. mindspore/rewrite/parsers/if_parser.py +28 -24
  438. mindspore/rewrite/parsers/module_parser.py +196 -25
  439. mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
  440. mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
  441. mindspore/rewrite/parsers/return_parser.py +6 -6
  442. mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
  443. mindspore/rewrite/sparsify/utils.py +1 -1
  444. mindspore/rewrite/symbol_tree.py +523 -578
  445. mindspore/rewrite/symbol_tree_builder.py +9 -193
  446. mindspore/rewrite/symbol_tree_dumper.py +2 -2
  447. mindspore/run_check/_check_version.py +6 -4
  448. mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
  449. mindspore/safeguard/rewrite_obfuscation.py +541 -0
  450. mindspore/scipy/linalg.py +1 -1
  451. mindspore/scipy/optimize/minimize.py +7 -3
  452. mindspore/train/_utils.py +7 -3
  453. mindspore/train/amp.py +323 -123
  454. mindspore/train/anf_ir_pb2.py +14 -2
  455. mindspore/train/callback/_backup_and_restore.py +2 -12
  456. mindspore/train/callback/_callback.py +29 -4
  457. mindspore/train/callback/_checkpoint.py +23 -8
  458. mindspore/train/callback/_early_stop.py +2 -2
  459. mindspore/train/callback/_landscape.py +4 -4
  460. mindspore/train/callback/_loss_monitor.py +2 -2
  461. mindspore/train/callback/_on_request_exit.py +2 -2
  462. mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
  463. mindspore/train/callback/_summary_collector.py +15 -8
  464. mindspore/train/callback/_time_monitor.py +58 -5
  465. mindspore/train/data_sink.py +5 -11
  466. mindspore/train/dataset_helper.py +84 -57
  467. mindspore/train/loss_scale_manager.py +2 -2
  468. mindspore/train/metrics/__init__.py +3 -3
  469. mindspore/train/metrics/cosine_similarity.py +1 -1
  470. mindspore/train/metrics/hausdorff_distance.py +3 -2
  471. mindspore/train/metrics/mean_surface_distance.py +3 -2
  472. mindspore/train/metrics/metric.py +39 -19
  473. mindspore/train/metrics/roc.py +2 -2
  474. mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
  475. mindspore/train/mind_ir_pb2.py +85 -36
  476. mindspore/train/model.py +187 -47
  477. mindspore/train/serialization.py +487 -161
  478. mindspore/train/summary/_summary_adapter.py +1 -1
  479. mindspore/train/summary/_writer_pool.py +3 -2
  480. mindspore/train/summary/summary_record.py +37 -17
  481. mindspore/train/train_thor/convert_utils.py +3 -3
  482. mindspore/train/train_thor/dataset_helper.py +1 -1
  483. mindspore/version.py +1 -1
  484. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +6 -7
  485. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +488 -528
  486. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -1
  487. mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
  488. mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
  489. mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
  490. mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
  491. mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
  492. mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
  493. mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
  494. mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
  495. mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
  496. mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
  497. mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
  498. mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
  499. mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
  500. mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
  501. mindspore/_akg/akg/tvm/rpc/base.py +0 -182
  502. mindspore/_akg/akg/tvm/rpc/client.py +0 -436
  503. mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
  504. mindspore/_akg/akg/tvm/rpc/server.py +0 -413
  505. mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
  506. mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
  507. mindspore/_extends/graph_kernel/expander.py +0 -80
  508. mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
  509. mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
  510. mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
  511. mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
  512. mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
  513. mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
  514. mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
  515. mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
  516. mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
  517. mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
  518. mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
  519. mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
  520. mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
  521. mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
  522. mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
  523. mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
  524. mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
  525. mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
  526. mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
  527. mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
  528. mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
  529. mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
  530. mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
  531. mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
  532. mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
  533. mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
  534. mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
  535. mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
  536. mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
  537. mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
  538. mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
  539. mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
  540. mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
  541. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
  542. mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
  543. mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
  544. mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
  545. mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
  546. mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
  547. mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
  548. mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
  549. mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
  550. mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
  551. mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
  552. mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
  553. mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
  554. mindspore/dataset/datapreprocess/__init__.py +0 -20
  555. mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
  556. mindspore/include/api/net.h +0 -142
  557. mindspore/nn/lr_scheduler.py +0 -262
  558. mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
  559. mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
  560. mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
  561. mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
  562. mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
  563. mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
  564. mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
  565. mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
  566. mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
  567. mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
  568. mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
  569. mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
  570. mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
  571. mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
  572. mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
  573. mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
  574. mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
  575. mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
  576. mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
  577. mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
  578. mindspore/rewrite/node_visitor.py +0 -44
  579. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
  580. {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,7 @@ import copy
21
21
  from collections import defaultdict
22
22
  import numpy as np
23
23
  import mindspore as ms
24
+ from mindspore.common import dtype as mstype
24
25
  from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
25
26
  _transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, \
26
27
  _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
@@ -35,8 +36,7 @@ def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
35
36
  """
36
37
  Merge parallel strategy between all pipeline stages in pipeline parallel mode.
37
38
  For more details about converting distributed Checkpoint, please refer to
38
- `Distributed Resilience Training and
39
- Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
39
+ `Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
40
40
 
41
41
  Note:
42
42
  Strategy file of each pipeline stage should be included in src_strategy_dirs.
@@ -76,17 +76,16 @@ def rank_list_for_transform(rank_id, src_strategy_file=None, dst_strategy_file=N
76
76
  """
77
77
  List of original distributed checkpoint rank index for obtaining the target checkpoint of a rank_id
78
78
  during the distributed checkpoint conversion. For more details about converting distributed Checkpoint,
79
- please refer to `Distributed Resilience Training and
80
- Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
79
+ please refer to `Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
81
80
 
82
81
  Args:
83
82
  rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
84
83
  src_strategy_file (str): Name of source sharding strategy file which saved by
85
- 'mindspore.set_auto_parallel_context(strategy_ckpt_save_file)'.
84
+ `mindspore.set_auto_parallel_context(strategy_ckpt_save_file)`.
86
85
  when the 'src_strategy_file' is None, it means that the source sharding strategy is
87
86
  without any sharing for each parameter. Default:None.
88
87
  dst_strategy_file (str): Name of destination sharding strategy file which saved by
89
- 'mindspore.set_auto_parallel_context(strategy_ckpt_save_file)'.
88
+ `mindspore.set_auto_parallel_context(strategy_ckpt_save_file)`.
90
89
  when the 'dst_strategy_file' is None, it means that the destination sharding strategy
91
90
  is without any sharing for each parameter. Default:None.
92
91
 
@@ -139,8 +138,7 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
139
138
  """
140
139
  Transform distributed checkpoint from source sharding strategy to destination sharding strategy by rank
141
140
  for a network. For more details about converting distributed Checkpoint, please refer to
142
- `Distributed Resilience Training and
143
- Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
141
+ `Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
144
142
 
145
143
  Args:
146
144
  rank_id (int): The rank of which distributed checkpoint needs to be obtained after conversion.
@@ -195,6 +193,7 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
195
193
  raise ValueError("Checkpoint file {} in rank {} not exits: ".format(local_file, rank))
196
194
  param_total_dict = defaultdict(dict)
197
195
  param_attr_dict = defaultdict(dict)
196
+ param_type_dict = defaultdict(dict)
198
197
  src_strategy_list, dst_strategy_list = _extract_src_dst_layout_map(rank_id, src_strategy_file, dst_strategy_file)
199
198
  # src rank => local rank inside pipeline stage
200
199
  src_stage_device_num = np.prod(src_strategy_list.get(list(src_strategy_list.keys())[0])[0]) if src_strategy_list \
@@ -211,11 +210,15 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
211
210
  and _parameter_not_in_local_stage(param_name, origin_dst_strategy_list, dst_strategy_list):
212
211
  continue
213
212
  src_rank = rank % src_stage_device_num
213
+ param_type_dict[param_name][src_rank] = str(param.data.dtype)
214
+ if param.data.dtype == mstype.bfloat16:
215
+ param.set_dtype(mstype.float32)
214
216
  param_total_dict[param_name][src_rank] = param.data.asnumpy()
215
217
  param_attr_dict[param_name][src_rank] = (param.requires_grad, param.layerwise_parallel)
216
218
  local_rank_id = rank_id % dst_stage_device_num
217
219
  transform_param_list = _transform_parallel_checkpoint(local_rank_id, param_total_dict,
218
- param_attr_dict, src_strategy_list, dst_strategy_list)
220
+ param_attr_dict, src_strategy_list, dst_strategy_list,
221
+ param_type_dict)
219
222
  ms.save_checkpoint(transform_param_list, save_checkpoint_file_name)
220
223
 
221
224
 
@@ -224,8 +227,7 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
224
227
  """
225
228
  Transform distributed checkpoint from source sharding strategy to destination sharding strategy for a rank.
226
229
  For more details about converting distributed Checkpoint, please refer to
227
- `Distributed Resilience Training and
228
- Inference <https://www.mindspore.cn/tutorials/experts/en/r2.1/parallel/resilience_train_and_predict.html>`_.
230
+ `Model Transformation <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/model_transformation.html>`_.
229
231
 
230
232
  Note:
231
233
  The `src_checkpoints_dir` directory structure should be organized like "src_checkpoints_dir/rank_0/a.ckpt", the
@@ -301,11 +303,15 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
301
303
  for needed_rank_list_key, transform_rank_list in needed_rank_list_map.items():
302
304
  param_total_dict = defaultdict(dict)
303
305
  param_attr_dict = defaultdict(dict)
306
+ param_type_dict = defaultdict(dict)
304
307
  needed_rank_list = needed_rank_list_key.split("-")
305
308
  for needed_rank in needed_rank_list:
306
309
  ckpt_dict = ms.load_checkpoint(all_checkpoint_files_map.get(int(needed_rank)))
307
310
  for param_name, param in ckpt_dict.items():
308
311
  src_rank = int(needed_rank) % src_stage_device_num
312
+ param_type_dict[param_name][src_rank] = str(param.data.dtype)
313
+ if param.data.dtype == mstype.bfloat16:
314
+ param.set_dtype(mstype.float32)
309
315
  param_total_dict[param_name][src_rank] = param.data.asnumpy()
310
316
  param_attr_dict[param_name][src_rank] = (param.requires_grad, param.layerwise_parallel)
311
317
  for transform_rank in transform_rank_list:
@@ -320,7 +326,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
320
326
 
321
327
  local_rank_id = transform_rank % dst_stage_device_num
322
328
  transform_param_list = _transform_parallel_checkpoint(local_rank_id, param_total_dict_copy,
323
- param_attr_dict, src_strategy_list, dst_strategy_list)
329
+ param_attr_dict, src_strategy_list, dst_strategy_list,
330
+ param_type_dict)
324
331
  save_checkpoint_file = "{}{}.ckpt".format(ckpt_prefix, transform_rank)
325
332
  save_checkpoint_file_dir = os.path.join(dst_checkpoints_dir, "rank_{}".format(transform_rank))
326
333
  if not os.path.exists(save_checkpoint_file_dir):
@@ -24,7 +24,7 @@ class Shard(Shard_):
24
24
 
25
25
  def __init__(self):
26
26
  """Initialize Shard."""
27
- Shard_.__init__(self, 'Shard')
27
+ super().__init__('Shard')
28
28
  self.shard_fn = None
29
29
  self.fn = None
30
30
  self.in_strategy = None
@@ -36,13 +36,17 @@ class Shard(Shard_):
36
36
  def __call__(self, fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
37
37
  if ms.context.get_context("mode") != ms.context.PYNATIVE_MODE or \
38
38
  ms.context.get_auto_parallel_context("parallel_mode") not in ["auto_parallel"]:
39
- raise AssertionError(f"Cell shard only supports auto parallel under PyNative mode.")
39
+ raise AssertionError(
40
+ f"Cell shard only supports auto parallel under PyNative mode.")
40
41
  if ms.context.get_context("device_target") not in ["Ascend", "GPU"]:
41
- raise AssertionError(f"'Shard' now only supports 'Ascend' and 'GPU'")
42
+ raise AssertionError(
43
+ f"'Shard' now only supports 'Ascend' and 'GPU'")
42
44
  if ms.context.get_auto_parallel_context("search_mode") != "sharding_propagation":
43
- raise AssertionError(f"'search_mode' must be 'sharding_propagation' for 'Shard'")
45
+ raise AssertionError(
46
+ f"'search_mode' must be 'sharding_propagation' for 'Shard'")
44
47
  if not isinstance(in_strategy, tuple):
45
- raise TypeError(f"For 'Shard', the 'in_strategy' should be a tuple, but got {type(in_strategy).__name__}")
48
+ raise TypeError(
49
+ f"For 'Shard', the 'in_strategy' should be a tuple, but got {type(in_strategy).__name__}")
46
50
  if not isinstance(out_strategy, (type(None), tuple)):
47
51
  raise TypeError(f"For 'Shard', the 'out_strategy' should be None or tuple, "
48
52
  f"but got {type(out_strategy).__name__}")
@@ -117,7 +121,8 @@ class Shard(Shard_):
117
121
  return
118
122
  if isinstance(parameter_plan, dict):
119
123
  if not isinstance(fn, ms.nn.Cell):
120
- raise TypeError(f"If parameter_plan is set, type of fn must be mindspore.nn.Cell, but got {type(fn)}")
124
+ raise TypeError(
125
+ f"If parameter_plan is set, type of fn must be mindspore.nn.Cell, but got {type(fn)}")
121
126
  for k in parameter_plan.keys():
122
127
  v = parameter_plan[k]
123
128
  if not isinstance(k, str) or not isinstance(v, tuple):
@@ -131,10 +136,12 @@ class Shard(Shard_):
131
136
  param_strategy = parameter_plan[param_name]
132
137
  param = self._search_parameter_by_name(param_name, fn)
133
138
  if param is None:
134
- logger.warning(f"{param_name} is not exist, ignored its setting.")
139
+ logger.warning(
140
+ f"{param_name} is not exist, ignored its setting.")
135
141
  continue
136
142
 
137
- self._check_layout_is_valid(param_name, param.shape, param_strategy)
143
+ self._check_layout_is_valid(
144
+ param_name, param.shape, param_strategy)
138
145
  if param.param_info.param_strategy:
139
146
  logger.warning(f"The layout of parameter '{param_name}' "
140
147
  f"has been set to {param.param_info.param_strategy}, "
@@ -143,7 +150,7 @@ class Shard(Shard_):
143
150
 
144
151
  def _is_attrs_has_been_set(self, fn, in_strategy, out_strategy, device, level):
145
152
  return self.shard_fn is not None and self.fn == fn and self.in_strategy == in_strategy and \
146
- self.out_strategy == out_strategy and self.device == device and self.level == level
153
+ self.out_strategy == out_strategy and self.device == device and self.level == level
147
154
 
148
155
 
149
156
  def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
@@ -159,8 +166,8 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
159
166
 
160
167
  Note:
161
168
  You need to set the execution mode to PyNative mode,
162
- set the parallel mode in `set_auto_parallel_context` to "auto_parallel"
163
- and the search mode to "sharding_propagation".
169
+ set the parallel mode in `set_auto_parallel_context` (parallel_mode) to "auto_parallel"
170
+ and the search mode (search_mode) to "sharding_propagation".
164
171
  If the input contain Parameter, its strategy should be set in `in_strategy`.
165
172
 
166
173
  Args:
@@ -216,15 +223,15 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
216
223
  ... device_num=2)
217
224
  >>> def test_shard(x, y):
218
225
  ... return x + y
219
- >>> x = Tensor(np.ones(shape=(32, 10)))
220
- >>> y = Tensor(np.ones(shape=(32, 10)))
226
+ >>> x = Tensor(np.ones(shape=(32, 10)), dtype=ms.float32)
227
+ >>> y = Tensor(np.ones(shape=(32, 10)), dtype=ms.float32)
221
228
  >>> output = ms.shard(test_shard, in_strategy=((2, 1), (2, 1)))(x, y)
222
229
  >>> print(output.shape)
223
230
  (32, 10)
224
231
 
225
232
  Tutorial Examples:
226
233
  - `Functional Operator Sharding
227
- <https://www.mindspore.cn/docs/en/r2.1/api_python/samples/mindspore/pynative_shard_function_parallel.html>`_
234
+ <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/pynative_shard_function_parallel.html>`_
228
235
  """
229
236
  if not isinstance(fn, (ms.nn.Cell)):
230
237
  logger.warning("'fn' is not a mindspore.nn.Cell, and its definition cannot involve Parameter; "
@@ -72,7 +72,7 @@ class StructType(Enum):
72
72
  """
73
73
  Parse the binary data to get the unpacked data.
74
74
 
75
- Args
75
+ Args:
76
76
  data_struct (dict): Key is the data name, value is StructType.
77
77
  binary_data (str): This value should be a binary string.
78
78
  special_func (Callable): This is a callable function,
@@ -105,14 +105,14 @@ class StructType(Enum):
105
105
  for name, data_type in data_struct.items():
106
106
  data_size = StructType.sizeof(data_type)
107
107
  if special_process_func:
108
- unpack_data, success = special_process_func(binary_data[cursor:cursor+data_size], name,
108
+ unpack_data, success = special_process_func(binary_data[cursor:cursor + data_size], name,
109
109
  data_type, unpacked_data)
110
110
  if success:
111
111
  cursor += data_size
112
112
  unpacked_data[name] = unpack_data
113
113
  continue
114
114
 
115
- unpack_data = struct.unpack(data_type.value, binary_data[cursor: cursor+data_size])[0]
115
+ unpack_data = struct.unpack(data_type.value, binary_data[cursor: cursor + data_size])[0]
116
116
  cursor += data_size
117
117
  unpacked_data[name] = unpack_data
118
118
  return unpacked_data
@@ -23,6 +23,8 @@ import os
23
23
  import re
24
24
  import stat
25
25
 
26
+ from mindspore import log as logger
27
+
26
28
 
27
29
  def to_int(param, param_name):
28
30
  """
@@ -80,9 +82,9 @@ def fwrite_format(output_data_path, data_source=None, is_print=False, is_start=F
80
82
  if isinstance(raw_data, (list, tuple)):
81
83
  raw_data = map(str, raw_data)
82
84
  raw_data = " ".join(raw_data)
83
- print(raw_data)
85
+ logger.info(raw_data)
84
86
  else:
85
- print(data_source)
87
+ logger.info(data_source)
86
88
 
87
89
 
88
90
  def get_log_slice_id(file_name):
@@ -68,7 +68,7 @@ def construct_profiling_options():
68
68
  logger.error(
69
69
  "The format of MS_PROFILER_OPTIONS is incorrect. "
70
70
  "The MS_PROFILER_OPTIONS parameter configuration may refer to "
71
- "'https://www.mindspore.cn/mindinsight/docs/zh-CN/master/performance_profiling_ascend.html'."
71
+ "'https://www.mindspore.cn/mindinsight/docs/zh-CN/r2.2/performance_profiling_ascend.html'."
72
72
  )
73
73
  return error_config
74
74
  conbine_options = combine_profile_options(profiling_options)
@@ -72,9 +72,11 @@ class DataPreProcessParser:
72
72
  _source_file_target_old = 'DATA_PREPROCESS.dev.AICPU.'
73
73
  _source_file_target = 'DATA_PREPROCESS.AICPU.'
74
74
  _dst_file_title = 'title:DATA_PREPROCESS AICPU'
75
- _dst_file_column_title = ['serial_number', 'node_type_name', 'total_time(ms)',
76
- 'dispatch_time(ms)', 'execution_time(ms)', 'run_start',
77
- 'run_end']
75
+ _dst_file_column_title = [
76
+ 'serial_number', 'node_type_name', 'total_time(ms)',
77
+ 'dispatch_time(ms)', 'execution_time(ms)', 'run_start',
78
+ 'run_end'
79
+ ]
78
80
  _ms_unit = 1000
79
81
  _us_unit = 100 # Convert 10ns to 1us.
80
82
  _task_id_threshold = 65536
@@ -78,7 +78,7 @@ class AscendFlopsGenerator:
78
78
  writer.writerows(self.flops.tolist())
79
79
  except (IOError, OSError) as err:
80
80
  logging.critical('Errot occurred when write flops file: %s', err)
81
- raise ProfilerIOException()
81
+ raise ProfilerIOException() from err
82
82
  if os.path.exists(flops_path):
83
83
  os.chmod(flops_path, stat.S_IREAD | stat.S_IWRITE)
84
84
 
@@ -89,6 +89,6 @@ class AscendFlopsGenerator:
89
89
  json.dump(self.flops_summary, json_file)
90
90
  except (IOError, OSError) as err:
91
91
  logging.critical('Errot occurred when write step trace point info file: %s', err)
92
- raise ProfilerIOException()
92
+ raise ProfilerIOException() from err
93
93
  if os.path.exists(flops_summary_path):
94
94
  os.chmod(flops_summary_path, stat.S_IREAD | stat.S_IWRITE)
@@ -71,6 +71,6 @@ class AscendFPBPGenerator:
71
71
  json.dump(self.points, json_file)
72
72
  except (IOError, OSError) as err:
73
73
  logging.critical('Errot occurred when write step trace point info file: %s', err)
74
- raise ProfilerIOException()
74
+ raise ProfilerIOException() from err
75
75
  if os.path.exists(step_trace_point_info_path):
76
76
  os.chmod(step_trace_point_info_path, stat.S_IREAD | stat.S_IWRITE)
@@ -82,6 +82,238 @@ def count_average(data):
82
82
  class AscendHCCLGenerator:
83
83
  """Generate ascend hccl data from files."""
84
84
 
85
+ def __init__(self, source_path, steptrace):
86
+ self.root_path = source_path
87
+ self.steptrace = steptrace
88
+ self.hccl_raw = []
89
+ self.hccl_data_df = np.dtype(
90
+ [('model_id', int), ('iteration_id', int), ('name', object), ('pid', int), ('tid', int), ('ts', float),
91
+ ('te', float), ('dur', float), ('ph', object),
92
+ ('task_type', object), ('link_info', object), ('transport_type', object), ('size', int), ('tag', object)])
93
+
94
+ @staticmethod
95
+ def _cost_analyse(iteration):
96
+ """analyse communication cost and wait cost"""
97
+ communication_cost = np.sum(iteration[iteration['name'] != 'Notify_Wait']['dur'])
98
+ wait_cost = np.sum(iteration[iteration['name'] == 'Notify_Wait']['dur'])
99
+ return communication_cost, wait_cost
100
+
101
+ @staticmethod
102
+ def _rdma_analyse(groupby_transport):
103
+ """rdma analyse"""
104
+ thread_groups = np.unique(groupby_transport['tid'])
105
+ thread_information = []
106
+ for thread_index in thread_groups:
107
+ groupby_thread = groupby_transport[groupby_transport['tid'] == thread_index]
108
+ rdma_communication_time = 0
109
+ rdma_communication_size = 0
110
+ rdma_communication_wait_time = 0
111
+ start_index = 0
112
+ end_index = groupby_thread.size - 2
113
+ while start_index < end_index:
114
+ first_task_type = groupby_thread[start_index]['task_type']
115
+ if first_task_type == 'RDMASend':
116
+ second_index = start_index + 1
117
+ third_index = start_index + 2
118
+ second_task_type = groupby_thread[second_index]['task_type']
119
+ third_task_type = groupby_thread[third_index]['task_type']
120
+ if second_task_type == 'RDMASend' and third_task_type == 'Notify Wait':
121
+ rdma_send_cost = groupby_thread[start_index]['dur']
122
+ notify_record_cost = groupby_thread[second_index]['dur']
123
+ notify_wait_cost = groupby_thread[third_index]['dur']
124
+ rdma_communication_time += rdma_send_cost + notify_record_cost + notify_wait_cost
125
+ rdma_communication_wait_time += notify_wait_cost
126
+ rdma_communication_size += groupby_thread[start_index]['size'] + groupby_thread[second_index][
127
+ 'size']
128
+ start_index += 2
129
+ start_index += 1
130
+ rdma_communication_wait_time = rdma_communication_wait_time / 1e3
131
+ rdma_communication_size = rdma_communication_size / 1e3
132
+ rdma_communication_time = rdma_communication_time / 1e3
133
+ rdma_bandwidth = rdma_communication_size / (rdma_communication_time / 1e3) \
134
+ if rdma_communication_size else 0
135
+ thread_information.append(
136
+ [rdma_communication_time, rdma_communication_size, rdma_bandwidth, rdma_communication_wait_time])
137
+ if len(thread_information) > 1:
138
+ thread_information = np.sum(thread_information, axis=0).tolist()
139
+
140
+ return thread_information
141
+
142
+ def parse(self):
143
+ """Analyse the original hccl data generator hccl data."""
144
+ hccl_data = []
145
+ for hccl_file in find_files(self.root_path, "hccl_*.json"):
146
+ with open(hccl_file) as fr:
147
+ hccl_data.append(self._original_data_analyse(json.load(fr)))
148
+ hccl_data = np.concatenate(hccl_data)
149
+
150
+ for model_id in np.unique(hccl_data['model_id']):
151
+ hccl_data_model = hccl_data[hccl_data['model_id'] == model_id]
152
+ for iteration_id in np.unique(hccl_data_model['iteration_id']):
153
+ hccl_data_model_iteration = hccl_data_model[hccl_data_model['iteration_id'] == iteration_id]
154
+
155
+ hccl_abstract_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] == '']
156
+ hccl_detail_data = hccl_data_model_iteration[hccl_data_model_iteration['task_type'] != '']
157
+ hccl_abstract_data = np.sort(hccl_abstract_data, order='ts')
158
+ hccl_detail_data = np.sort(hccl_detail_data, order='ts')
159
+
160
+ tag = np.searchsorted(hccl_abstract_data['ts'], hccl_detail_data['ts'], side='right') - 1
161
+
162
+ hccl_detail_data['tag'] = [x[-1] for x in
163
+ np.char.split(hccl_abstract_data[tag]['name'].astype(str), sep='/')]
164
+
165
+ self.hccl_raw.append(self._iteration_analyse(hccl_detail_data, iteration_id))
166
+
167
+ self.hccl_raw = sorted(self.hccl_raw, key=lambda x: x[0])
168
+ self.hccl_raw.append(copy.deepcopy(self.hccl_raw[-1]))
169
+ self.hccl_raw[-1][0] = '-'
170
+ for _, value in self.hccl_raw[-1][4].items():
171
+ value[0] = '-'
172
+
173
+ def write(self, hccl_raw_path):
174
+ """
175
+ Write the flops.csv and flops_summary.json
176
+
177
+ Args:
178
+ hccl_raw_path(str): hccl_raw.csv path.
179
+ """
180
+ try:
181
+ with os.fdopen(os.open(hccl_raw_path,
182
+ os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR), 'w',
183
+ newline='') as hccl_row:
184
+ writer = csv.writer(hccl_row)
185
+ writer.writerow(
186
+ ['step_num', 'communication_cost', 'wait_cost', 'link_info', 'communication_operator_cost'])
187
+ for row in self.hccl_raw:
188
+ row[3] = json.dumps(row[3])
189
+ row[4] = json.dumps(row[4])
190
+ writer.writerows(self.hccl_raw)
191
+ except (IOError, OSError) as err:
192
+ logging.critical('Errot occurred when write aicore detail file: %s', err)
193
+ raise ProfilerIOException() from err
194
+ if os.path.exists(hccl_raw_path):
195
+ os.chmod(hccl_raw_path, stat.S_IREAD | stat.S_IWRITE)
196
+
197
+ def _original_data_analyse(self, original_data):
198
+ """analyse original data"""
199
+
200
+ groups_steptrace = {model_id: np.sort(self.steptrace[self.steptrace['Model ID'] == model_id],
201
+ order='Iteration ID')
202
+ for model_id in np.unique(self.steptrace['Model ID'])}
203
+
204
+ target_data = []
205
+ for row in original_data:
206
+ model_id = row.get('args', {}).get('model id')
207
+ if row.get('ph') == 'X' and model_id is not None:
208
+ name = row.get('name')
209
+ pid = row.get('pid')
210
+ tid = row.get('tid')
211
+ ts = row.get('ts')
212
+ dur = row.get('dur')
213
+ te = ts + dur
214
+ ph = row.get('ph')
215
+ task_type = row.get('args', {}).get('task type', '')
216
+ src_rank = row.get('args', {}).get('src rank', 0)
217
+ dst_rank = row.get('args', {}).get('dst rank', 0)
218
+ if src_rank == int('0xffffffff', 16):
219
+ src_rank = dst_rank
220
+ if dst_rank == int('0xffffffff', 16):
221
+ dst_rank = src_rank
222
+ transport_type = row.get('args', {}).get('transport type', '')
223
+ if transport_type == 'LOCAL':
224
+ src_rank, dst_rank = dst_rank, src_rank
225
+ link_info = str(src_rank) + '-' + str(dst_rank)
226
+ size = row.get('args', {}).get('size(Byte)', 0)
227
+ size = size if isinstance(size, int) else int(size, 16)
228
+ steptrace = groups_steptrace.get(model_id, None)
229
+ if steptrace is None:
230
+ logging.warning('Could not find model: %s in hccl json, skip.', model_id)
231
+ continue
232
+ tag = np.searchsorted(steptrace['Iteration End'], te * 1e-3, side='left')
233
+ iteration_id = steptrace[tag]['Iteration ID']
234
+ target_data.append(
235
+ tuple([model_id, iteration_id, name, pid, tid,
236
+ ts, te, dur, ph, task_type,
237
+ link_info, transport_type, size, -1]))
238
+
239
+ hccl_data = np.array(target_data, dtype=self.hccl_data_df)
240
+
241
+ return hccl_data
242
+
243
+ def _iteration_analyse(self, hccl_detail_data, iteration):
244
+ """analyse data by iteration """
245
+ communication_cost, wait_cost = self._cost_analyse(hccl_detail_data)
246
+ link_info = self._link_info_analyse(hccl_detail_data)
247
+ communication_operator_cost = self._communication_operator_cost_analyse(hccl_detail_data, iteration)
248
+ return [iteration, communication_cost, wait_cost, link_info, communication_operator_cost]
249
+
250
+ def _link_info_analyse(self, hccl_detail_data):
251
+ """analyse link info data"""
252
+ groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
253
+ link_info_groups = np.unique(groupby_iteration['link_info'])
254
+ link_info_information = dict()
255
+ for link_info_index in link_info_groups:
256
+ groupby_link_info = groupby_iteration[groupby_iteration['link_info'] == link_info_index]
257
+ transport_groups = np.unique(groupby_iteration['transport_type'])
258
+ transport_information = dict()
259
+ for transport_index in transport_groups:
260
+ groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
261
+ if transport_index == 'SDMA' and groupby_transport.size > 0:
262
+ groupby_sdma = \
263
+ groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
264
+ ['dur', 'size']]
265
+ sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
266
+ sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
267
+ sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
268
+ if sdma_communication_time != 0 else 0
269
+ transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
270
+ elif transport_index == 'RDMA' and groupby_transport.size > 0:
271
+ transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
272
+ link_info_information[link_info_index] = transport_information
273
+ return link_info_information
274
+
275
+ def _communication_operator_cost_analyse(self, hccl_detail_data, iteration_index):
276
+ """analyse communication operator cost"""
277
+ groupby_iteration = hccl_detail_data[hccl_detail_data['task_type'] != 'Notify Record']
278
+ tag_groups = np.unique(groupby_iteration['tag'])
279
+ tag_information = dict()
280
+ for tag_index in tag_groups:
281
+ groupby_tag = groupby_iteration[groupby_iteration['tag'] == tag_index]
282
+ link_groups = np.unique(groupby_iteration['link_info'])
283
+ link_info_information = dict()
284
+ for link_info_index in link_groups:
285
+ groupby_link_info = groupby_tag[groupby_tag['link_info'] == link_info_index]
286
+ transport_groups = np.unique(groupby_link_info['transport_type'])
287
+ transport_information = dict()
288
+ for transport_index in transport_groups:
289
+ groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
290
+ if transport_index == 'SDMA':
291
+ groupby_sdma = \
292
+ groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
293
+ ['dur', 'size']]
294
+ sdma_communication_time = np.sum(groupby_sdma['dur']) * 1e-3
295
+ sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
296
+ sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
297
+ if sdma_communication_time != 0 else 0
298
+ transport_information['SDMA'] = [
299
+ sdma_communication_time, sdma_communication_size,
300
+ sdma_bandwidth
301
+ ]
302
+ elif transport_index == 'RDMA':
303
+ transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
304
+ link_info_information[link_info_index] = transport_information
305
+ communication_cost = np.sum(groupby_tag[groupby_tag['name'] != 'Notify_Wait']['dur'])
306
+ wait_cost = np.sum(groupby_tag[groupby_tag['name'] == 'Notify_Wait']['dur'])
307
+ tag_information[tag_index] = [
308
+ str(iteration_index), communication_cost, wait_cost,
309
+ link_info_information
310
+ ]
311
+ return tag_information
312
+
313
+
314
+ class AscendHCCLGeneratorOld:
315
+ """Generate ascend hccl data from files."""
316
+
85
317
  def __init__(self, source_path):
86
318
  self.root_path = source_path
87
319
  self.hccl_raw = []
@@ -99,7 +331,7 @@ class AscendHCCLGenerator:
99
331
  @staticmethod
100
332
  def _rdma_analyse(groupby_transport):
101
333
  """rdma analyse"""
102
- thread_groups, _, _, _ = np.unique(groupby_transport['tid'])
334
+ thread_groups = np.unique(groupby_transport['tid'])
103
335
  thread_information = []
104
336
  for thread_index in thread_groups:
105
337
  groupby_thread = groupby_transport[groupby_transport['tid'] == thread_index]
@@ -107,7 +339,7 @@ class AscendHCCLGenerator:
107
339
  rdma_communication_size = 0
108
340
  rdma_communication_wait_time = 0
109
341
  start_index = 0
110
- end_index = groupby_thread.size - 1
342
+ end_index = groupby_thread.size - 2
111
343
  while start_index < end_index:
112
344
  first_task_type = groupby_thread[start_index]['task_type']
113
345
  if first_task_type == 'RDMASend':
@@ -142,7 +374,8 @@ class AscendHCCLGenerator:
142
374
  file_list = find_files(self.root_path, "hccl_*.json")
143
375
 
144
376
  for hccl_file in file_list:
145
- iteration_id = int(hccl_file.split('_')[-1].split(('.'))[0])
377
+ _, relative_path = os.path.split(hccl_file)
378
+ iteration_id = int(relative_path.split('_')[3])
146
379
  with open(hccl_file) as f:
147
380
  _, hccl_detail_data = self._original_data_analyse(json.load(f))
148
381
  raw = self._iteration_analyse(hccl_detail_data, iteration_id)
@@ -173,7 +406,7 @@ class AscendHCCLGenerator:
173
406
  writer.writerows(self.hccl_raw)
174
407
  except (IOError, OSError) as err:
175
408
  logging.critical('Errot occurred when write aicore detail file: %s', err)
176
- raise ProfilerIOException()
409
+ raise ProfilerIOException() from err
177
410
  if os.path.exists(hccl_raw_path):
178
411
  os.chmod(hccl_raw_path, stat.S_IREAD | stat.S_IWRITE)
179
412
 
@@ -206,8 +439,8 @@ class AscendHCCLGenerator:
206
439
  tuple([name, pid, tid, ts, te, dur, ph, task_type, link_info, transport_type, size, -1]))
207
440
  hccl_data = np.array(target_data, dtype=self.hccl_data_df)
208
441
 
209
- hccl_abstract_data = hccl_data[hccl_data['tid'] >= 100]
210
- hccl_detail_data = hccl_data[hccl_data['tid'] < 100]
442
+ hccl_abstract_data = hccl_data[hccl_data['task_type'] == '']
443
+ hccl_detail_data = hccl_data[hccl_data['task_type'] != '']
211
444
 
212
445
  hccl_abstract_data = hccl_abstract_data[np.argsort(hccl_abstract_data['ts'])]
213
446
  hccl_detail_data = hccl_detail_data[np.argsort(hccl_detail_data['ts'])]
@@ -234,7 +467,7 @@ class AscendHCCLGenerator:
234
467
  transport_information = dict()
235
468
  for transport_index in transport_groups:
236
469
  groupby_transport = groupby_link_info[groupby_link_info['transport_type'] == transport_index]
237
- if transport_index == 'SDMA':
470
+ if transport_index == 'SDMA' and groupby_transport.size > 0:
238
471
  groupby_sdma = \
239
472
  groupby_transport[np.isin(groupby_transport['task_type'], ['Memcpy', 'Reduce Inline'])][
240
473
  ['dur', 'size']]
@@ -243,7 +476,7 @@ class AscendHCCLGenerator:
243
476
  sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
244
477
  if sdma_communication_time != 0 else 0
245
478
  transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size, sdma_bandwidth]
246
- elif transport_index == 'RDMA':
479
+ elif transport_index == 'RDMA' and groupby_transport.size > 0:
247
480
  transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
248
481
  link_info_information[link_info_index] = transport_information
249
482
  return link_info_information
@@ -271,13 +504,17 @@ class AscendHCCLGenerator:
271
504
  sdma_communication_size = np.sum(groupby_sdma['size']) * 1e-3
272
505
  sdma_bandwidth = sdma_communication_size / sdma_communication_time * 1e-3 \
273
506
  if sdma_communication_time != 0 else 0
274
- transport_information['SDMA'] = [sdma_communication_time, sdma_communication_size,
275
- sdma_bandwidth]
507
+ transport_information['SDMA'] = [
508
+ sdma_communication_time, sdma_communication_size,
509
+ sdma_bandwidth
510
+ ]
276
511
  elif transport_index == 'RDMA':
277
512
  transport_information['RDMA'] = self._rdma_analyse(groupby_transport)
278
513
  link_info_information[link_info_index] = transport_information
279
514
  communication_cost = np.sum(groupby_tag[groupby_tag['name'] != 'Notify_Wait']['dur'])
280
515
  wait_cost = np.sum(groupby_tag[groupby_tag['name'] == 'Notify_Wait']['dur'])
281
- tag_information[tag_index] = [str(iteration_index), communication_cost, wait_cost,
282
- link_info_information]
516
+ tag_information[tag_index] = [
517
+ str(iteration_index), communication_cost, wait_cost,
518
+ link_info_information
519
+ ]
283
520
  return tag_information