mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (423) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
  3. mindspore/__init__.py +1 -2
  4. mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
  5. mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
  6. mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
  7. mindspore/_checkparam.py +25 -5
  8. mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
  9. mindspore/_extends/parse/__init__.py +2 -2
  10. mindspore/_extends/parse/compile_config.py +0 -29
  11. mindspore/_extends/parse/namespace.py +2 -2
  12. mindspore/_extends/parse/parser.py +5 -21
  13. mindspore/_extends/parse/resources.py +7 -5
  14. mindspore/_extends/parse/standard_method.py +59 -40
  15. mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
  16. mindspore/amp.py +5 -26
  17. mindspore/bin/cache_admin +0 -0
  18. mindspore/bin/cache_server +0 -0
  19. mindspore/boost/adasum.py +1 -1
  20. mindspore/boost/base.py +1 -1
  21. mindspore/boost/boost_cell_wrapper.py +1 -1
  22. mindspore/boost/grad_freeze.py +2 -2
  23. mindspore/boost/less_batch_normalization.py +6 -9
  24. mindspore/common/__init__.py +1 -8
  25. mindspore/common/_register_for_tensor.py +9 -8
  26. mindspore/common/api.py +65 -275
  27. mindspore/common/dtype.py +4 -8
  28. mindspore/common/dump.py +5 -2
  29. mindspore/common/jit_config.py +1 -1
  30. mindspore/common/lazy_inline.py +2 -14
  31. mindspore/common/parameter.py +15 -14
  32. mindspore/common/recompute.py +5 -20
  33. mindspore/common/sparse_tensor.py +6 -21
  34. mindspore/common/tensor.py +52 -100
  35. mindspore/communication/__init__.py +11 -6
  36. mindspore/communication/management.py +94 -92
  37. mindspore/context.py +18 -180
  38. mindspore/dataset/engine/datasets.py +46 -69
  39. mindspore/dataset/engine/datasets_user_defined.py +53 -72
  40. mindspore/dataset/engine/datasets_vision.py +2 -2
  41. mindspore/dataset/engine/queue.py +38 -56
  42. mindspore/dataset/engine/validators.py +5 -11
  43. mindspore/dataset/vision/__init__.py +5 -5
  44. mindspore/dataset/vision/c_transforms.py +5 -5
  45. mindspore/dataset/vision/py_transforms_util.py +1 -1
  46. mindspore/dataset/vision/transforms.py +46 -591
  47. mindspore/dataset/vision/utils.py +1 -121
  48. mindspore/dataset/vision/validators.py +3 -9
  49. mindspore/hal/__init__.py +1 -7
  50. mindspore/hal/device.py +1 -1
  51. mindspore/include/api/model.h +0 -3
  52. mindspore/include/dataset/vision.h +2 -54
  53. mindspore/include/mindapi/base/types.h +0 -1
  54. mindspore/lib/libdnnl.so.2 +0 -0
  55. mindspore/lib/libmindspore.so +0 -0
  56. mindspore/lib/libmindspore_backend.so +0 -0
  57. mindspore/lib/libmindspore_common.so +0 -0
  58. mindspore/lib/libmindspore_core.so +0 -0
  59. mindspore/lib/libmindspore_glog.so.0 +0 -0
  60. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  61. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  62. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  63. mindspore/lib/libmindspore_shared_lib.so +0 -0
  64. mindspore/lib/libmpi_adapter.so +0 -0
  65. mindspore/lib/libmpi_collective.so +0 -0
  66. mindspore/lib/libnnacl.so +0 -0
  67. mindspore/lib/libopencv_core.so.4.5 +0 -0
  68. mindspore/lib/libps_cache.so +0 -0
  69. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
  70. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
  71. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
  72. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  73. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
  74. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  75. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
  76. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
  77. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
  78. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
  79. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
  80. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
  81. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
  82. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
  83. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
  84. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
  85. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
  86. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
  87. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
  88. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
  89. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
  90. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  91. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  92. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  93. mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
  94. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  95. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
  96. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
  97. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
  98. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
  99. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
  100. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
  101. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
  102. mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
  103. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
  104. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
  105. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
  106. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
  107. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
  108. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
  109. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
  110. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
  111. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
  112. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
  113. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
  114. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
  115. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
  116. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
  117. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
  118. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
  119. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
  120. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
  121. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
  122. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
  123. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
  124. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
  125. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
  126. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
  127. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
  128. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
  129. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
  130. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
  131. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
  132. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
  133. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
  134. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
  135. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
  136. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
  137. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
  138. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
  139. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
  140. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
  141. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
  142. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
  143. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
  144. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
  145. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
  146. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
  147. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
  148. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
  149. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
  150. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
  151. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
  152. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
  153. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
  154. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
  155. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
  156. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
  157. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
  158. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
  159. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
  160. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
  161. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
  162. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
  163. mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
  164. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  165. mindspore/mindrecord/filewriter.py +2 -2
  166. mindspore/mint/__init__.py +40 -720
  167. mindspore/mint/nn/__init__.py +7 -89
  168. mindspore/mint/nn/functional.py +16 -165
  169. mindspore/mint/optim/adamw.py +16 -15
  170. mindspore/nn/__init__.py +2 -0
  171. mindspore/nn/cell.py +98 -97
  172. mindspore/nn/extend/basic.py +2 -2
  173. mindspore/nn/extend/embedding.py +1 -1
  174. mindspore/nn/extend/layer/normalization.py +5 -7
  175. mindspore/nn/generator.py +297 -0
  176. mindspore/nn/layer/activation.py +3 -4
  177. mindspore/nn/layer/basic.py +16 -79
  178. mindspore/nn/layer/conv.py +8 -17
  179. mindspore/nn/layer/embedding.py +4 -1
  180. mindspore/nn/layer/math.py +1 -1
  181. mindspore/nn/layer/normalization.py +1 -1
  182. mindspore/nn/layer/pooling.py +0 -5
  183. mindspore/nn/layer/rnn_cells.py +2 -2
  184. mindspore/nn/loss/loss.py +19 -19
  185. mindspore/nn/optim/adasum.py +1 -1
  186. mindspore/nn/optim/sgd.py +2 -3
  187. mindspore/nn/probability/distribution/exponential.py +1 -1
  188. mindspore/nn/probability/distribution/geometric.py +1 -1
  189. mindspore/nn/probability/distribution/logistic.py +1 -1
  190. mindspore/nn/wrap/cell_wrapper.py +1 -25
  191. mindspore/nn/wrap/loss_scale.py +1 -24
  192. mindspore/numpy/array_ops.py +1 -5
  193. mindspore/numpy/dtypes.py +3 -3
  194. mindspore/numpy/math_ops.py +8 -8
  195. mindspore/ops/__init__.py +1 -1
  196. mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
  197. mindspore/ops/_vmap/vmap_array_ops.py +0 -27
  198. mindspore/ops/_vmap/vmap_math_ops.py +1 -29
  199. mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
  200. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
  201. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
  202. mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
  203. mindspore/ops/auto_generate/gen_extend_func.py +27 -603
  204. mindspore/ops/auto_generate/gen_ops_def.py +203 -993
  205. mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
  206. mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
  207. mindspore/ops/composite/base.py +6 -3
  208. mindspore/ops/composite/math_ops.py +1 -1
  209. mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
  210. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  211. mindspore/ops/extend/__init__.py +3 -2
  212. mindspore/ops/extend/array_func.py +51 -10
  213. mindspore/ops/extend/nn_func.py +78 -2
  214. mindspore/ops/function/__init__.py +13 -8
  215. mindspore/ops/function/array_func.py +179 -455
  216. mindspore/ops/function/clip_func.py +1 -1
  217. mindspore/ops/function/grad/grad_func.py +3 -3
  218. mindspore/ops/function/math_func.py +103 -117
  219. mindspore/ops/function/nn_func.py +163 -275
  220. mindspore/ops/function/other_func.py +2 -2
  221. mindspore/ops/function/random_func.py +69 -202
  222. mindspore/ops/function/sparse_func.py +4 -4
  223. mindspore/ops/functional.py +327 -332
  224. mindspore/ops/operations/__init__.py +3 -13
  225. mindspore/ops/operations/_grad_ops.py +27 -3
  226. mindspore/ops/operations/_inner_ops.py +356 -53
  227. mindspore/ops/operations/_rl_inner_ops.py +2 -2
  228. mindspore/ops/operations/_tensor_array.py +8 -8
  229. mindspore/ops/operations/array_ops.py +65 -82
  230. mindspore/ops/operations/comm_ops.py +93 -784
  231. mindspore/ops/operations/custom_ops.py +28 -51
  232. mindspore/ops/operations/debug_ops.py +4 -4
  233. mindspore/ops/operations/inner_ops.py +2 -2
  234. mindspore/ops/operations/manually_defined/ops_def.py +4 -304
  235. mindspore/ops/operations/math_ops.py +50 -3
  236. mindspore/ops/operations/nn_ops.py +247 -14
  237. mindspore/ops/operations/other_ops.py +3 -3
  238. mindspore/ops/operations/random_ops.py +1 -1
  239. mindspore/ops/operations/sparse_ops.py +1 -1
  240. mindspore/ops/primitive.py +8 -9
  241. mindspore/ops/silent_check.py +5 -5
  242. mindspore/ops_generate/arg_dtype_cast.py +9 -2
  243. mindspore/ops_generate/arg_handler.py +0 -26
  244. mindspore/ops_generate/gen_aclnn_implement.py +4 -1
  245. mindspore/ops_generate/gen_ops.py +4 -26
  246. mindspore/ops_generate/gen_pyboost_func.py +12 -41
  247. mindspore/ops_generate/gen_utils.py +0 -21
  248. mindspore/ops_generate/pyboost_utils.py +2 -7
  249. mindspore/ops_generate/template.py +0 -1
  250. mindspore/parallel/_auto_parallel_context.py +1 -21
  251. mindspore/parallel/_tensor.py +5 -0
  252. mindspore/parallel/_transformer/transformer.py +1 -1
  253. mindspore/parallel/_utils.py +1 -15
  254. mindspore/parallel/algo_parameter_config.py +3 -1
  255. mindspore/parallel/checkpoint_transform.py +9 -12
  256. mindspore/parallel/cluster/process_entity/_api.py +29 -28
  257. mindspore/parallel/cluster/process_entity/_utils.py +3 -13
  258. mindspore/parallel/cluster/run.py +16 -13
  259. mindspore/parallel/parameter_broadcast.py +2 -2
  260. mindspore/parallel/shard.py +17 -31
  261. mindspore/profiler/__init__.py +2 -3
  262. mindspore/profiler/common/util.py +2 -107
  263. mindspore/profiler/envprofiling.py +1 -1
  264. mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
  265. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
  266. mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
  267. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
  268. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
  269. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
  270. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
  271. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
  272. mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
  273. mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
  274. mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
  275. mindspore/profiler/parser/minddata_parser.py +3 -72
  276. mindspore/profiler/profiling.py +59 -176
  277. mindspore/rewrite/api/node.py +1 -1
  278. mindspore/rewrite/common/namespace.py +5 -5
  279. mindspore/rewrite/parsers/assign_parser.py +0 -2
  280. mindspore/rewrite/parsers/class_def_parser.py +4 -8
  281. mindspore/run_check/_check_version.py +1 -1
  282. mindspore/scipy/fft.py +3 -1
  283. mindspore/scipy/linalg.py +3 -2
  284. mindspore/scipy/ops.py +3 -5
  285. mindspore/scipy/optimize/__init__.py +2 -2
  286. mindspore/train/__init__.py +4 -4
  287. mindspore/train/anf_ir_pb2.py +2 -8
  288. mindspore/train/callback/__init__.py +2 -5
  289. mindspore/train/callback/_backup_and_restore.py +2 -2
  290. mindspore/train/callback/_checkpoint.py +16 -104
  291. mindspore/train/callback/_landscape.py +1 -1
  292. mindspore/train/callback/_time_monitor.py +1 -1
  293. mindspore/train/data_sink.py +4 -5
  294. mindspore/train/dataset_helper.py +20 -45
  295. mindspore/train/model.py +38 -266
  296. mindspore/train/serialization.py +105 -256
  297. mindspore/train/summary/_summary_adapter.py +1 -1
  298. mindspore/version.py +1 -1
  299. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
  300. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
  301. mindspore/_extends/pijit/__init__.py +0 -23
  302. mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
  303. mindspore/common/file_system.py +0 -48
  304. mindspore/common/generator.py +0 -260
  305. mindspore/common/no_inline.py +0 -54
  306. mindspore/common/np_dtype.py +0 -25
  307. mindspore/communication/comm_func.py +0 -1140
  308. mindspore/hal/memory.py +0 -326
  309. mindspore/lib/libavcodec.so.59 +0 -0
  310. mindspore/lib/libavdevice.so.59 +0 -0
  311. mindspore/lib/libavfilter.so.8 +0 -0
  312. mindspore/lib/libavformat.so.59 +0 -0
  313. mindspore/lib/libavutil.so.57 +0 -0
  314. mindspore/lib/libmindspore_np_dtype.so +0 -0
  315. mindspore/lib/libswresample.so.4 +0 -0
  316. mindspore/lib/libswscale.so.6 +0 -0
  317. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
  318. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
  319. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
  320. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
  321. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
  322. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
  323. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
  324. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
  325. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
  326. mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
  327. mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
  328. mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
  329. mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
  330. mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
  331. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
  332. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
  333. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
  334. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
  335. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
  336. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
  337. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
  338. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
  339. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
  340. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
  341. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
  342. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
  343. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
  344. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
  345. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
  346. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
  347. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
  348. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
  349. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
  350. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
  351. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
  352. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
  353. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
  354. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
  355. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
  356. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
  357. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
  358. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
  359. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
  360. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
  361. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
  362. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
  363. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
  364. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
  365. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
  366. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
  367. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
  368. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
  369. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
  370. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
  371. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
  372. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
  373. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
  374. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
  375. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
  376. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
  377. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
  378. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
  379. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
  380. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
  381. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
  382. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
  383. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
  384. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
  385. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
  386. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
  387. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
  388. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
  389. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
  390. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
  391. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
  392. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
  393. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
  394. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
  395. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
  396. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
  397. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
  398. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
  399. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
  400. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
  401. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
  402. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
  403. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
  404. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
  405. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
  406. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
  407. mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
  408. mindspore/mint/linalg/__init__.py +0 -22
  409. mindspore/nn/layer/embedding_service.py +0 -531
  410. mindspore/nn/layer/embedding_service_layer.py +0 -393
  411. mindspore/ops/function/reshard_func.py +0 -102
  412. mindspore/ops/operations/_infer_ops.py +0 -19
  413. mindspore/ops/operations/reshard_ops.py +0 -53
  414. mindspore/profiler/common/process_pool.py +0 -41
  415. mindspore/profiler/common/singleton.py +0 -28
  416. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  417. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  418. mindspore/train/callback/_cluster_monitor.py +0 -201
  419. mindspore/train/callback/_flops_collector.py +0 -238
  420. mindspore/train/callback/_mindio_ttp.py +0 -443
  421. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
  422. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
  423. {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
@@ -1,443 +0,0 @@
1
- # Copyright 2024 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """Checkpoint related classes and functions."""
16
-
17
- import os
18
- import sys
19
- import copy
20
- from mindspore.train.serialization import save_checkpoint, _convert_cell_param_and_names_to_dict, _get_merged_param_data
21
- from mindspore.parallel._auto_parallel_context import _get_auto_parallel_context
22
- from mindspore.parallel._utils import _get_device_num
23
- from mindspore import _checkparam as Validator
24
- from mindspore.train.callback._callback import Callback
25
- from mindspore.common.tensor import Tensor
26
- from mindspore import context
27
- import mindspore as ms
28
- from mindspore.communication import get_rank
29
- from mindspore.parallel.checkpoint_transform import sync_pipeline_shared_parameters
30
-
31
- from mindspore.train._utils import get_parameter_redundancy
32
- from mindspore import log as logger
33
- from mindspore.parallel._utils import _is_in_auto_parallel_mode
34
- from mindspore.common.api import _get_parameter_layout
35
-
36
-
37
- def _get_dp_from_layout(parameter_layout_dict):
38
- """ Get dp and tp from layout dict. """
39
- pp_num = _get_auto_parallel_context("pipeline_stages")
40
- dev_num = _get_device_num()
41
- global_rank = get_rank()
42
- pipe_size = dev_num // pp_num
43
- initial_rank = (global_rank // pipe_size) * pipe_size
44
- parameter_redundancy_dict = get_parameter_redundancy(
45
- parameter_layout_dict, initial_rank)
46
- value_len = sys.maxsize
47
- min_value = ()
48
- for key, value in parameter_redundancy_dict.items():
49
- if "accu_grads" in key or "inputs" in key:
50
- continue
51
- for item in value:
52
- if len(item) < value_len and global_rank in item:
53
- value_len = len(item)
54
- min_value = item
55
- return min_value
56
-
57
-
58
- def _get_ckpt_dir(append_dict, ckpt_save_path, is_tmp_file):
59
- """ Common func to generate ckpt dir name."""
60
- tmp = "_tmp" if is_tmp_file else ""
61
- mid_dir = f"ttp_saved_checkpoints-{str(append_dict['cur_epoch_num'])}_{str(append_dict['cur_step_num'])}{tmp}"
62
- return os.path.join(ckpt_save_path, mid_dir)
63
-
64
-
65
- def _flush_from_cache(cb_params):
66
- """ Flush cache data to host if tensor is cache enable."""
67
- params = cb_params.train_network.get_parameters()
68
- for param in params:
69
- if param.cache_enable:
70
- Tensor(param).flush_from_cache()
71
-
72
-
73
- def _save_checkpoint_on_failure(save_rank, step, rank_list, save_args):
74
- """ Callback used for TTP save ckpt function when errors occur."""
75
- logger.info("Enter _save_checkpoint_on_failure function")
76
- ckpt_save_path, save_params, append_dict = save_args
77
- ckpt_file = f"iteration-{str(append_dict['cur_epoch_num'])}_{str(append_dict['cur_step_num'])}.ckpt"
78
- cur_ckpt_dir = _get_ckpt_dir(
79
- append_dict, ckpt_save_path, True) + "/rank_" + str(save_rank)
80
- os.makedirs(cur_ckpt_dir)
81
- cur_file = os.path.join(cur_ckpt_dir, ckpt_file)
82
- save_checkpoint(save_params, cur_file,
83
- integrated_save=False, append_dict=append_dict)
84
- logger.info("Finish _save_checkpoint_on_failure function")
85
-
86
-
87
- def _convert_net_to_param_list(save_obj):
88
- """Convert nn.Cell to param_list."""
89
- sync_pipeline_shared_parameters(save_obj)
90
- param_list = []
91
- parameter_layout_dict = save_obj.parameter_layout_dict
92
- if _is_in_auto_parallel_mode() and not parameter_layout_dict:
93
- parameter_layout_dict = _get_parameter_layout()
94
- if not _is_in_auto_parallel_mode():
95
- save_obj.init_parameters_data()
96
- param_dict = _convert_cell_param_and_names_to_dict(save_obj, None)
97
- for (key, value) in param_dict.items():
98
- each_param = {"name": key}
99
- param_data = Tensor(value.asnumpy())
100
- # in automatic model parallel scenario, some parameters were split to all the devices,
101
- # which should be combined before saving
102
- if key in parameter_layout_dict:
103
- param_data = _get_merged_param_data(
104
- save_obj, parameter_layout_dict, key, param_data, False)
105
- each_param["data"] = param_data
106
- param_list.append(each_param)
107
- return param_list
108
-
109
-
110
- def _rename_save_result(rename_args):
111
- """ Callback used for TTP rename function after ckpt save callback was finished and successful."""
112
- logger.info("Enter _rename_save_result function")
113
- ckpt_save_path, _, append_dict = rename_args
114
-
115
- tmp_dir = _get_ckpt_dir(append_dict, ckpt_save_path, True)
116
- fin_dir = _get_ckpt_dir(append_dict, ckpt_save_path, False)
117
-
118
- os.rename(tmp_dir, fin_dir)
119
- logger.info("Finish _rename_save_result function")
120
-
121
-
122
- class MindIOTTPAdapter(Callback):
123
- """
124
- This callback is used to enable the feature
125
- `MindIO TTP <https://www.hiascend.com/document/detail/zh/mindx-dl/60rc1/mindio/mindiottp/mindiottp001.html>`_.
126
- This callback will execute TTP operations during training process, such as TTP init, report and exception handle.
127
-
128
- Note:
129
- Required for Ascend GE LazyInline mode only. And pipline size must be greater than 1.
130
-
131
- Args:
132
- controller_ip (str): TTP controller's ip address, used for init TTP controller.
133
- controller_port (int): TTP controller's ip port, used for init TTP controller and processor.
134
- ckpt_save_path (str): Checkpoint save directory when failure occurs, checkpoint file will save to directory
135
- named ttp_saved_checkpoints-{cur_epoch_num}_{cur_step_num} under this directory.
136
-
137
- Raises:
138
- Exception: TTP init failed.
139
- ModuleNotFoundError: Mindio TTP whl package is not installed.
140
-
141
- Examples:
142
- >>> import numpy as np
143
- >>> import os
144
- >>> import math
145
- >>> import mindspore as ms
146
- >>> import mindspore.dataset as ds
147
- >>> from mindspore import nn, ops, Parameter, train
148
- >>> from mindspore.communication import init
149
- >>> from mindspore.common.initializer import initializer, HeUniform
150
- >>> from mindspore.train import Model, MindIOTTPAdapter
151
- >>> from mindspore import dataset as ds
152
- >>> ms.set_context(mode=ms.GRAPH_MODE, jit_level='O2')
153
- >>> ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.SEMI_AUTO_PARALLEL, pipeline_stages=2)
154
- >>> init()
155
- >>> ms.set_seed(1)
156
- >>> ms.set_auto_parallel_context(strategy_ckpt_config={"save_file":
157
- >>> "./src_pipeline_strategys/src_strategy_{}.ckpt".format(get_rank())})
158
- >>> class MatMulCell(nn.Cell):
159
- ... def __init__(self, param=None, shape=None):
160
- ... super().__init__()
161
- ... if shape is None:
162
- ... shape = [28 * 28, 512]
163
- ... weight_init = HeUniform(math.sqrt(5))
164
- ... self.param = Parameter(initializer(weight_init, shape), name="param")
165
- ... if param is not None:
166
- ... self.param = param
167
- ... self.print = ops.Print()
168
- ... self.matmul = ops.MatMul()
169
- ...
170
- ... def construct(self, x):
171
- ... out = self.matmul(x, self.param)
172
- ... self.print("out is:", out)
173
- ... return out
174
- >>>
175
- >>> class Network(nn.Cell):
176
- ... def __init__(self):
177
- ... super().__init__()
178
- ... self.flatten = nn.Flatten()
179
- ... self.layer1 = MatMulCell()
180
- ... self.relu1 = nn.ReLU()
181
- ... self.layer2 = nn.Dense(512, 512)
182
- ... self.relu2 = nn.ReLU()
183
- ... self.layer3 = nn.Dense(512, 10)
184
- ...
185
- ... def construct(self, x):
186
- ... x = self.flatten(x)
187
- ... x = self.layer1(x)
188
- ... x = self.relu1(x)
189
- ... x = self.layer2(x)
190
- ... x = self.relu2(x)
191
- ... logits = self.layer3(x)
192
- ... return logits
193
- >>>
194
- >>> net = Network()
195
- >>> net.layer1.pipeline_stage = 0
196
- >>> net.relu1.pipeline_stage = 0
197
- >>> net.layer2.pipeline_stage = 0
198
- >>> net.relu2.pipeline_stage = 1
199
- >>> net.layer3.pipeline_stage = 1
200
- >>>
201
- >>> def create_dataset(batch_size):
202
- ... dataset_path = os.getenv("DATA_PATH")
203
- ... dataset = ds.MnistDataset(dataset_path)
204
- ... image_transforms = [
205
- ... ds.vision.Rescale(1.0 / 255.0, 0),
206
- ... ds.vision.Normalize(mean=(0.1307,), std=(0.3081,)),
207
- ... ds.vision.HWC2CHW()
208
- ... ]
209
- ... label_transform = ds.transforms.TypeCast(ms.int32)
210
- ... dataset = dataset.map(image_transforms, 'image')
211
- ... dataset = dataset.map(label_transform, 'label')
212
- ... dataset = dataset.batch(batch_size)
213
- ... return dataset
214
- >>>
215
- >>> data_set = create_dataset(32)
216
- >>>
217
- >>> optimizer = nn.SGD(net.trainable_params(), 1e-2)
218
- >>> loss_fn = nn.CrossEntropyLoss()
219
- >>>
220
- >>> net_with_loss = nn.PipelineCell(nn.WithLossCell(net, loss_fn), 4)
221
- >>> net_with_loss.set_train()
222
- >>> model = Model(net_with_loss, optimizer=optimizer)
223
- >>> ttp_cb = MindIOTTPAdapter("192.168.0.1", 2000, "./ttp_checkpoint/")
224
- >>> loss_cb = train.LossMonitor(1)
225
- >>> model.train(1, dataset, callbacks=[ttp_cb, loss_cb])
226
- """
227
-
228
- def __init__(self, controller_ip, controller_port, ckpt_save_path):
229
- super(MindIOTTPAdapter, self).__init__()
230
- # let it raises errors if not install mindio_ttp package
231
- from mindio_ttp import framework_ttp as ttp
232
- self.ttp = ttp
233
- Validator.check_non_negative_int(controller_port)
234
- self.has_init = False
235
- self.enable = False
236
- mode = context.get_context("mode")
237
- if context.get_context("device_target") != "Ascend" or mode != context.GRAPH_MODE:
238
- logger.warning(
239
- "MindIO adataper only support on Ascend device with GRAPH Mode.")
240
- return
241
- if os.getenv("MS_ENABLE_MINDIO_GRACEFUL_EXIT") != "true":
242
- logger.warning("MindIO adataper need custom switch on.")
243
- return
244
- ttp_lib_path = os.getenv("MS_MINDIO_TTP_LIB_PATH")
245
- if ttp_lib_path is None or os.path.isfile(ttp_lib_path) is False:
246
- logger.warning(
247
- "MindIO adataper switch on, but ttp library path is not correct.")
248
- return
249
- self.enable = True
250
- self._controller_ip = controller_ip
251
- self._controller_port = controller_port
252
- self._ckpt_save_path = ckpt_save_path
253
-
254
- def wrapper_ttp_persist(self, func):
255
- """
256
- This method is used to wrapper TTP exception handler for the input func.
257
-
258
- Args:
259
- func (function): train method that need to be wrapper.
260
-
261
- Returns:
262
- Function, if the TTP is enabled, return the encapsulated function,
263
- otherwise the original function is returned.
264
-
265
- """
266
- if self.enable:
267
- return self.ttp.ttp_to_persist(func)
268
- return func
269
-
270
- def _init_ttp(self, run_context):
271
- """ Init Mindio TTP, used internal. """
272
- logger.info("Begin to init ttp.")
273
- dev_num = _get_device_num()
274
-
275
- cb_params = run_context.original_args()
276
- param_layout_dict = cb_params.train_network.parameter_layout_dict
277
- dp = _get_dp_from_layout(param_layout_dict)
278
- logger.info("Init TTP with dp: {}.".format(dp))
279
-
280
- self.ttp.ttp_register_save_ckpt_handler(_save_checkpoint_on_failure)
281
- self.ttp.ttp_register_rename_handler(_rename_save_result)
282
-
283
- world_size = dev_num
284
- cur_rank = get_rank()
285
- is_odd = len(dp) % 2
286
- replica = 2 if is_odd else len(dp) // 2
287
- enable_local_copy = False
288
- if cur_rank == 0:
289
- logger.info("Begin to start ttp controller.")
290
- self.ttp.ttp_init_controller(
291
- cur_rank, world_size, replica, enable_local_copy)
292
- self.ttp.ttp_start_controller(
293
- self._controller_ip, self._controller_port)
294
- logger.info("Finish start ttp controller.")
295
-
296
- logger.info("Begin to start ttp processor.")
297
- self.ttp.ttp_init_processor(cur_rank, dp, len(
298
- dp), world_size, replica, enable_local_copy)
299
- self.ttp.ttp_start_processor(
300
- self._controller_ip, self._controller_port)
301
- logger.info("Finished start ttp processor.")
302
-
303
- logger.info("Finish init ttp.")
304
-
305
- def on_train_step_end(self, run_context):
306
- """
307
- Init TTP Controller only once after first step finished.
308
- And report status to MindIO TTP after every step finished.
309
-
310
- Args:
311
- run_context (RunContext): Context of the train running. Refer to
312
- :class:`mindspore.train.RunContext` for detail.
313
-
314
- """
315
-
316
- if self.enable is False:
317
- return
318
- pp_num = _get_auto_parallel_context("pipeline_stages")
319
- if pp_num < 2:
320
- self.enable = False
321
- return
322
- cb_params = run_context.original_args()
323
- if cb_params.dataset_sink_mode is True and cb_params.sink_size > 1:
324
- self.enable = False
325
- return
326
- if self.has_init is False:
327
- self.has_init = True
328
- self._init_ttp(run_context)
329
- _flush_from_cache(cb_params)
330
- cur_rank = get_rank()
331
- append_dict = {}
332
- append_dict["cur_epoch_num"] = cb_params.cur_epoch_num
333
- append_dict["cur_step_num"] = int(
334
- (cb_params.cur_step_num - 1) % cb_params.batch_num + 1)
335
- append_dict["cur_rank"] = cur_rank
336
- append_dict["batch_num"] = cb_params.batch_num
337
- append_dict["global_step"] = cb_params.cur_step_num
338
-
339
- save_params = _convert_net_to_param_list(cb_params.train_network)
340
- save_params_copy = copy.deepcopy(save_params)
341
-
342
- logger.info("Set ckpt args to TTP.")
343
- self.ttp.ttp_set_ckpt_args(
344
- (self._ckpt_save_path, save_params_copy, append_dict))
345
- logger.info("Set optimizer finish step status to TTP.")
346
- self.ttp.ttp_end_updating_os(cb_params.cur_step_num)
347
-
348
- @staticmethod
349
- def load_checkpoint_with_backup(ckpt_file_path, strategy_file_path, net):
350
- """
351
- Load checkpoint into network, and use strategy file to find backup checkpoint file
352
- when origin checkpoint file not found.
353
-
354
- Note:
355
- This API must be called after the communication is initialized because the cluster information
356
- needs to be obtained internally.
357
-
358
- Args:
359
- ckpt_file_path (str): the checkpoint file to be loaded.
360
- strategy_file_path (str): strategy file path for current rank.
361
- net (Cell): network that needs to load checkpoint.
362
-
363
- Returns:
364
- Dict, checkpoint weights after loaded.
365
-
366
- Raises:
367
- ValueError: Failed to load the checkpoint file.
368
-
369
- Examples:
370
- >>> import numpy as np
371
- >>> from mindspore import nn
372
- >>> from mindspore.train import Model, MindIOTTPAdapter
373
- >>> from mindspore import dataset as ds
374
- >>> ms.set_context(mode=ms.GRAPH_MODE)
375
- >>> ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.DATA_PARALLEL, gradients_mean=True)
376
- >>> init()
377
- >>> ms.set_seed(1)
378
- >>> class Network(nn.Cell):
379
- ... def __init__(self):
380
- ... super().__init__()
381
- ... self.flatten = nn.Flatten()
382
- ... self.fc = nn.Dense(28*28, 10, weight_init="normal", bias_init="zeros")
383
- ... self.relu = nn.ReLU()
384
- ...
385
- ... def construct(self, x):
386
- ... x = self.flatten(x)
387
- ... logits = self.relu(self.fc(x))
388
- ... return logits
389
- >>>
390
- >>> net = Network()
391
- >>>
392
- >>> def create_dataset(batch_size):
393
- ... dataset_path = os.getenv("DATA_PATH")
394
- ... rank_id = get_rank()
395
- ... rank_size = get_group_size()
396
- ... dataset = ds.MnistDataset(dataset_path, num_shards=rank_size, shard_id=rank_id)
397
- ... image_transforms = [
398
- ... ds.vision.Rescale(1.0 / 255.0, 0),
399
- ... ds.vision.Normalize(mean=(0.1307,), std=(0.3081,)),
400
- ... ds.vision.HWC2CHW()
401
- ... ]
402
- ... label_transform = ds.transforms.TypeCast(ms.int32)
403
- ... dataset = dataset.map(image_transforms, 'image')
404
- ... dataset = dataset.map(label_transform, 'label')
405
- ... dataset = dataset.batch(batch_size)
406
- ... return dataset
407
- >>> data_set = create_dataset(32)
408
- >>> ckpt_file= "./rank_5/iteration-1_40.ckpt"
409
- >>> strategy_file = "./src_pipeline_strategys/src_strategy_5.ckpt"
410
- >>> param_dict = MindIOTTPAdapter.load_checkpoint_with_backup(ckpt_file, stragegy_file, net)
411
- >>> data_set.set_init_step(param_dict["global_step"])
412
- """
413
- logger.info("Start load checkpoint with strategy file.")
414
- try:
415
- param_dict = ms.load_checkpoint(ckpt_file_path)
416
- except ValueError as e:
417
- logger.warning(
418
- "Loading origin checkpoint file failed, the reason is:{}.".format(str(e)))
419
- dp = _get_dp_from_layout(strategy_file_path)
420
- rank = get_rank()
421
- logger.info(
422
- "Can't load origin checkpoint file, found dp:{}.".format(dp))
423
- for i in dp:
424
- if i == rank:
425
- continue
426
- new_ckpt = ckpt_file_path.replace(
427
- f"/rank_{rank}/", f"/rank_{str(i)}/")
428
- if not os.path.isfile(new_ckpt):
429
- continue
430
- try:
431
- param_dict = ms.load_checkpoint(new_ckpt)
432
- except ValueError as e1:
433
- logger.warning(
434
- "Loading strategy checkpoint file failed, the reason is:{}.".format(str(e1)))
435
- param_dict = None
436
- if param_dict:
437
- logger.info("Found param dict, load it into network.")
438
- ms.load_param_into_net(net, param_dict)
439
- else:
440
- raise ValueError(
441
- "Load checkpoint file failed, please check your config is set correctly.")
442
- logger.info("Finish load checkpoint with strategy file.")
443
- return param_dict