mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (455) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +64 -83
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +47 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +177 -52
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +338 -208
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +2 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +84 -133
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +47 -38
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +69 -23
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +1 -0
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +4 -44
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +425 -19
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +125 -101
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +488 -620
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +86 -85
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +2 -4
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/lamb.py +1 -3
  188. mindspore/nn/optim/optimizer.py +1 -1
  189. mindspore/nn/optim/tft_wrapper.py +2 -3
  190. mindspore/nn/optim/thor.py +2 -2
  191. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  192. mindspore/nn/probability/distribution/exponential.py +2 -1
  193. mindspore/nn/probability/distribution/poisson.py +2 -1
  194. mindspore/nn/sparse/sparse.py +3 -3
  195. mindspore/nn/wrap/cell_wrapper.py +73 -42
  196. mindspore/nn/wrap/grad_reducer.py +37 -52
  197. mindspore/nn/wrap/loss_scale.py +72 -74
  198. mindspore/numpy/array_creations.py +7 -7
  199. mindspore/numpy/fft.py +1 -1
  200. mindspore/numpy/math_ops.py +1 -1
  201. mindspore/numpy/utils_const.py +1 -1
  202. mindspore/opencv_core452.dll +0 -0
  203. mindspore/opencv_imgcodecs452.dll +0 -0
  204. mindspore/opencv_imgproc452.dll +0 -0
  205. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  206. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  207. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  208. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  209. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  210. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  211. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  212. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
  213. mindspore/ops/auto_generate/gen_extend_func.py +5 -55
  214. mindspore/ops/auto_generate/gen_ops_def.py +753 -273
  215. mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
  216. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  217. mindspore/ops/composite/__init__.py +10 -0
  218. mindspore/ops/composite/base.py +9 -5
  219. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  220. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  221. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  222. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  223. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  224. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  225. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  226. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  227. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  228. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  229. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  230. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  231. mindspore/ops/function/__init__.py +4 -1
  232. mindspore/ops/function/_add_attr_func.py +11 -6
  233. mindspore/ops/function/array_func.py +17 -100
  234. mindspore/ops/function/debug_func.py +8 -5
  235. mindspore/ops/function/grad/grad_func.py +5 -13
  236. mindspore/ops/function/math_func.py +65 -399
  237. mindspore/ops/function/nn_func.py +44 -61
  238. mindspore/ops/function/other_func.py +4 -1
  239. mindspore/ops/function/random_func.py +31 -4
  240. mindspore/ops/functional.py +2 -3
  241. mindspore/ops/functional_overload.py +486 -18
  242. mindspore/ops/op_info_register.py +21 -0
  243. mindspore/ops/operations/__init__.py +5 -2
  244. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  245. mindspore/ops/operations/_inner_ops.py +14 -18
  246. mindspore/ops/operations/_sequence_ops.py +1 -1
  247. mindspore/ops/operations/array_ops.py +4 -50
  248. mindspore/ops/operations/comm_ops.py +186 -41
  249. mindspore/ops/operations/custom_ops.py +244 -175
  250. mindspore/ops/operations/debug_ops.py +55 -4
  251. mindspore/ops/operations/image_ops.py +13 -13
  252. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  253. mindspore/ops/operations/math_ops.py +8 -9
  254. mindspore/ops/operations/nn_ops.py +6 -7
  255. mindspore/ops/primitive.py +9 -20
  256. mindspore/ops/tensor_method.py +52 -11
  257. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  258. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  259. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  260. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  261. mindspore/ops_generate/common/base_generator.py +14 -0
  262. mindspore/ops_generate/common/gen_constants.py +7 -2
  263. mindspore/ops_generate/common/gen_utils.py +0 -19
  264. mindspore/ops_generate/common/op_proto.py +11 -4
  265. mindspore/ops_generate/common/template.py +88 -11
  266. mindspore/ops_generate/gen_ops.py +1 -1
  267. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  268. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  269. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  270. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  271. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  272. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  273. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  274. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  275. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  276. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  277. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  278. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  279. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  280. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  281. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  282. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  283. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  284. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  285. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  286. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  287. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  288. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  289. mindspore/parallel/_auto_parallel_context.py +9 -17
  290. mindspore/parallel/_cell_wrapper.py +106 -40
  291. mindspore/parallel/_parallel_serialization.py +4 -3
  292. mindspore/parallel/_ps_context.py +4 -6
  293. mindspore/parallel/_tensor.py +167 -12
  294. mindspore/parallel/_transformer/moe.py +1 -1
  295. mindspore/parallel/_transformer/transformer.py +17 -12
  296. mindspore/parallel/_utils.py +5 -11
  297. mindspore/parallel/auto_parallel.py +33 -12
  298. mindspore/parallel/checkpoint_convert.py +3 -3
  299. mindspore/parallel/checkpoint_transform.py +5 -1
  300. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  301. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  302. mindspore/parallel/cluster/run.py +48 -7
  303. mindspore/parallel/function/__init__.py +8 -1
  304. mindspore/parallel/function/reshard_func.py +7 -6
  305. mindspore/parallel/nn/__init__.py +15 -2
  306. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  307. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  308. mindspore/parallel/shard.py +9 -23
  309. mindspore/parallel/transform_safetensors.py +468 -174
  310. mindspore/pgodb140.dll +0 -0
  311. mindspore/pgort140.dll +0 -0
  312. mindspore/profiler/__init__.py +2 -1
  313. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  314. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  315. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  316. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  317. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  318. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  319. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  321. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  322. mindspore/profiler/analysis/task_manager.py +1 -1
  323. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  324. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  325. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  326. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  327. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  328. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  329. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  330. mindspore/profiler/common/constant.py +16 -0
  331. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  332. mindspore/profiler/common/path_manager.py +9 -0
  333. mindspore/profiler/common/profiler_context.py +50 -29
  334. mindspore/profiler/common/profiler_info.py +0 -16
  335. mindspore/profiler/common/profiler_meta_data.py +1 -0
  336. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  337. mindspore/profiler/common/profiler_output_path.py +23 -8
  338. mindspore/profiler/common/profiler_parameters.py +128 -35
  339. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  340. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  341. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  342. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  343. mindspore/profiler/dynamic_profiler.py +374 -338
  344. mindspore/profiler/envprofiler.py +42 -12
  345. mindspore/profiler/experimental_config.py +112 -7
  346. mindspore/profiler/mstx.py +33 -12
  347. mindspore/profiler/platform/__init__.py +2 -3
  348. mindspore/profiler/platform/cpu_profiler.py +10 -4
  349. mindspore/profiler/platform/npu_profiler.py +30 -20
  350. mindspore/profiler/profiler.py +218 -154
  351. mindspore/profiler/profiler_action_controller.py +65 -77
  352. mindspore/profiler/profiler_interface.py +2 -2
  353. mindspore/profiler/schedule.py +10 -4
  354. mindspore/rewrite/common/config.py +1 -0
  355. mindspore/rewrite/common/namer.py +1 -0
  356. mindspore/rewrite/common/namespace.py +1 -0
  357. mindspore/rewrite/node/node.py +31 -11
  358. mindspore/rewrite/parsers/assign_parser.py +1 -1
  359. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  360. mindspore/run_check/_check_version.py +7 -10
  361. mindspore/runtime/__init__.py +8 -6
  362. mindspore/runtime/event.py +10 -4
  363. mindspore/runtime/executor.py +87 -45
  364. mindspore/runtime/memory.py +22 -30
  365. mindspore/runtime/thread_bind_core.py +299 -165
  366. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  367. mindspore/swresample-4.dll +0 -0
  368. mindspore/swscale-6.dll +0 -0
  369. mindspore/tbbmalloc.dll +0 -0
  370. mindspore/tinyxml2.dll +0 -0
  371. mindspore/train/_utils.py +9 -5
  372. mindspore/train/amp.py +43 -23
  373. mindspore/train/callback/__init__.py +5 -5
  374. mindspore/train/callback/_callback.py +2 -1
  375. mindspore/train/callback/_checkpoint.py +4 -14
  376. mindspore/train/callback/_flops_collector.py +11 -7
  377. mindspore/train/callback/_landscape.py +0 -1
  378. mindspore/train/callback/_train_fault_tolerance.py +72 -18
  379. mindspore/train/data_sink.py +15 -6
  380. mindspore/train/dataset_helper.py +14 -5
  381. mindspore/train/model.py +49 -47
  382. mindspore/train/serialization.py +168 -126
  383. mindspore/train/summary/summary_record.py +13 -2
  384. mindspore/train/train_thor/model_thor.py +2 -2
  385. mindspore/turbojpeg.dll +0 -0
  386. mindspore/utils/__init__.py +3 -2
  387. mindspore/utils/dryrun.py +0 -6
  388. mindspore/utils/runtime_execution_order_check.py +162 -78
  389. mindspore/utils/sdc_detect.py +68 -0
  390. mindspore/utils/utils.py +14 -17
  391. mindspore/vcmeta.dll +0 -0
  392. mindspore/vcruntime140.dll +0 -0
  393. mindspore/vcruntime140_1.dll +0 -0
  394. mindspore/version.py +1 -1
  395. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  396. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
  397. mindspore/_deprecated/jit.py +0 -198
  398. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  399. mindspore/communication/_hccl_management.py +0 -297
  400. mindspore/experimental/es/embedding_service.py +0 -891
  401. mindspore/experimental/es/embedding_service_layer.py +0 -581
  402. mindspore/profiler/common/validator/__init__.py +0 -14
  403. mindspore/profiler/common/validator/validate_path.py +0 -84
  404. mindspore/profiler/parser/__init__.py +0 -14
  405. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  406. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  407. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  408. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  409. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  410. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  411. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  412. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  413. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  414. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  415. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  416. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  417. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  418. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  419. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  420. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  421. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  422. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  423. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  424. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  425. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  426. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  427. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  428. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  429. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  430. mindspore/profiler/parser/container.py +0 -229
  431. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  432. mindspore/profiler/parser/flops_parser.py +0 -531
  433. mindspore/profiler/parser/framework_enum.py +0 -111
  434. mindspore/profiler/parser/framework_parser.py +0 -464
  435. mindspore/profiler/parser/framework_struct.py +0 -61
  436. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  437. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  438. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  439. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  440. mindspore/profiler/parser/hccl_parser.py +0 -573
  441. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  442. mindspore/profiler/parser/integrator.py +0 -526
  443. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  444. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  445. mindspore/profiler/parser/minddata_parser.py +0 -186
  446. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  447. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  448. mindspore/profiler/parser/optime_parser.py +0 -250
  449. mindspore/profiler/parser/profiler_info.py +0 -213
  450. mindspore/profiler/parser/step_trace_parser.py +0 -666
  451. mindspore/utils/hooks.py +0 -81
  452. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  453. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  454. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  455. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -21,9 +21,11 @@ import numpy as np
21
21
  from mindspore.common.tensor import Tensor
22
22
  from mindspore.communication.management import get_rank, get_group_size
23
23
  from mindspore._c_expression import TensorTransform
24
+ from mindspore import log as logger
24
25
 
25
26
  _tensor_transform = TensorTransform.get_instance()
26
-
27
+ COMM_TENSOR_CELL_CACHE = {}
28
+ RESHARD_OP_MAP_CACHE = {}
27
29
 
28
30
  def _get_tensor_strategy(dev_mat, tensor_map):
29
31
  """
@@ -348,7 +350,7 @@ def _extract_layout_item(layout_item):
348
350
  return dev_matrix, tensor_map, opt_shard_step, opt_shard_size
349
351
 
350
352
 
351
- def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
353
+ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id, enable_redist_opt=False):
352
354
  """
353
355
  Transform tensor from source layout to the destination layout.
354
356
 
@@ -362,7 +364,7 @@ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
362
364
  """
363
365
  if not isinstance(from_layout, tuple) or not isinstance(to_layout, tuple):
364
366
  raise TypeError("The layout should be tuple! layout is {} and {}".format(from_layout, to_layout))
365
- return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, rank_id)
367
+ return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, enable_redist_opt, rank_id)
366
368
 
367
369
 
368
370
  def _construct_from_to_tensor_layout(from_full_tensor_shape, from_dev_matrix,
@@ -587,13 +589,15 @@ def _get_needed_rank_list_by_layouts(from_tensor_layout, to_tensor_layout, devic
587
589
  return result_list
588
590
 
589
591
 
590
- def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank):
592
+ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank,
593
+ enable_redist_opt=False):
591
594
  """
592
595
  AllGather op: {op_name, group_ranks + axis}
593
596
  """
594
597
  stack = []
595
598
  index = 0
596
- transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank)
599
+ transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank,
600
+ enable_redist_opt)
597
601
  result_map = {self_rank: transform_operators}
598
602
  for operators in transform_operators:
599
603
  op_name = operators[0]
@@ -606,7 +610,7 @@ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_te
606
610
  for rank in group_info[1]:
607
611
  if rank not in result_map:
608
612
  new_transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout,
609
- device_list, rank)
613
+ device_list, rank, enable_redist_opt)
610
614
  result_map[rank] = new_transform_operators
611
615
  index = 0
612
616
  for operators in new_transform_operators:
@@ -710,8 +714,6 @@ def _apply_operator(operator_name):
710
714
  Returns:
711
715
  The data of tensor after apply operator.
712
716
  """
713
- if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
714
- numpy_data = numpy_data[:]
715
717
  if not isinstance(numpy_data, np.ndarray):
716
718
  raise TypeError("The data should be a numpy.ndarray.")
717
719
  _check_operator(reshape_op)
@@ -732,10 +734,7 @@ def _apply_operator(operator_name):
732
734
  raise TypeError("The data_list should be a list.")
733
735
  new_numpy_data_list = []
734
736
  for numpy_data in numpy_data_list:
735
- if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
736
- new_numpy_data_list.append(numpy_data[:])
737
- else:
738
- new_numpy_data_list.append(numpy_data)
737
+ new_numpy_data_list.append(numpy_data)
739
738
  numpy_data_list = new_numpy_data_list
740
739
  _check_operator(allgather_op)
741
740
  concat_group = allgather_op[1][:-1]
@@ -896,3 +895,159 @@ def _chunk_shape(np_tensor, strategy, depth):
896
895
  output.extend(
897
896
  _chunk_shape(ret_, strategy[len(strategy) - depth + 1:len(strategy)], depth - 1))
898
897
  return output
898
+
899
+
900
+ def _infer_pp_op_map(from_layout, to_layout, self_rank):
901
+ """
902
+ get the ops map for merging pp stages
903
+ """
904
+ from_rank_list = from_layout[3]
905
+ to_rank_list = to_layout[3]
906
+ from_dev_num_in_stage = len(from_rank_list)
907
+ current_rank_stage_id = self_rank // from_dev_num_in_stage
908
+ diff_rank_id = [
909
+ rank_id for rank_id in to_rank_list if rank_id not in from_rank_list]
910
+ end_stage = from_dev_num_in_stage * (current_rank_stage_id + 1)
911
+ start_stage = from_dev_num_in_stage * current_rank_stage_id
912
+ rank_pos_in_stage = list(range(start_stage, end_stage)).index(self_rank)
913
+ root_idx = from_rank_list[rank_pos_in_stage]
914
+ broadcast_rank_list = [root_idx]
915
+ while rank_pos_in_stage < len(diff_rank_id):
916
+ broadcast_rank_list.append(diff_rank_id[rank_pos_in_stage])
917
+ rank_pos_in_stage += from_dev_num_in_stage
918
+ broadcast_rank_list.sort()
919
+ broadcast_map = {rank_id: [('Broadcast', root_idx, broadcast_rank_list)] for rank_id in broadcast_rank_list}
920
+ return broadcast_map
921
+
922
+
923
+ def _get_pipeline_operator_map(from_layout, to_layout, self_rank):
924
+ """
925
+ If src_pp_stages is greater than dst_pp_stages, the weights of the corresponding cards need to
926
+ be communicated via broadcast to swap. Need to communicate src rank0's 01 to src rank2,
927
+ so that rank2 holds param0's data. Similarly, communicate rank1's 02 to rank3
928
+ rank0 01 01 11
929
+ rank1 02 02 12
930
+ pp2 -------> pp1
931
+ rank2 11 03 13
932
+ rank3 12 04 14
933
+
934
+ Args:
935
+ from_layout (tuple): Use tuple to present layout
936
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
937
+ to_layout (tuple): Use tuple to present layout
938
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
939
+ self_rank (int): rank_id
940
+ """
941
+ if len(from_layout[3]) < len(to_layout[3]):
942
+ logger.debug(f"from {from_layout} to {to_layout} need to broadcast data across pp stages")
943
+ comm_tensor_cache_key = (
944
+ f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
945
+ f" -> "
946
+ f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
947
+ global COMM_TENSOR_CELL_CACHE
948
+ if comm_tensor_cache_key not in COMM_TENSOR_CELL_CACHE:
949
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, not match cache")
950
+ broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
951
+ broadcast_op_map_dict = {rank_id: broadcast_map for rank_id in broadcast_map.keys()}
952
+ COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key] = broadcast_op_map_dict
953
+ else:
954
+ comm_tensor_cache_key_rank_list = COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key]
955
+ if self_rank in comm_tensor_cache_key_rank_list:
956
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, match cache")
957
+ broadcast_map = comm_tensor_cache_key_rank_list[self_rank]
958
+ else:
959
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, but rank {self_rank} not match cache")
960
+ broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
961
+ for rank_id in broadcast_map.keys():
962
+ COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key][rank_id] = broadcast_map
963
+ return broadcast_map
964
+ logger.debug(f"from {from_layout} to {to_layout} no need to broadcast data across pp stages")
965
+ return {}
966
+
967
+
968
+ def _is_multi_shard(in_tensor_map):
969
+ """
970
+ whether the input tensor map is in multi shard
971
+ """
972
+ for tensor_map in in_tensor_map:
973
+ if isinstance(tensor_map, (list, tuple)) and len(tensor_map) > 1:
974
+ return True
975
+ return False
976
+
977
+
978
+ def _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
979
+ insert_from_reshape, insert_to_reshape):
980
+ """ insert layout expand op reshape """
981
+ from_dev_matrix = from_info_tuple[0]
982
+ from_tensor_map = from_info_tuple[1]
983
+ from_full_tensor_shape = from_info_tuple[2]
984
+ to_dev_matrix_origin = to_info_tuple[0]
985
+ to_tensor_map_origin = to_info_tuple[1]
986
+ origin_tensor_shape = to_info_tuple[2]
987
+ for param_rank, _ in param_rank_map.items():
988
+ if insert_from_reshape:
989
+ from_slice_tensor_shape = ()
990
+ from_tensor_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
991
+ for i, item in enumerate(from_full_tensor_shape):
992
+ from_slice_tensor_shape += (item // from_tensor_strategy[i],)
993
+ param_rank_map.get(param_rank).insert(0, ('Reshape', list(from_slice_tensor_shape)))
994
+ if insert_to_reshape:
995
+ to_tensor_strategy = _get_tensor_strategy(to_dev_matrix_origin, to_tensor_map_origin)
996
+ to_slice_tensor_shape = ()
997
+ for i, item in enumerate(origin_tensor_shape):
998
+ to_slice_tensor_shape += (item // to_tensor_strategy[i],)
999
+ param_rank_map.get(param_rank).append(('Reshape', list(to_slice_tensor_shape)))
1000
+
1001
+
1002
+ def _infer_reshard_op_map(from_layout, to_layout, self_rank):
1003
+ """infer reshard op map"""
1004
+ from_layout_without_rank_list = from_layout[:-1]
1005
+ to_layout_without_rank_list = to_layout[:-1]
1006
+ if _is_multi_shard(from_layout[1]):
1007
+ # ((2, 1), 1) --> (2, 1, 1) expand tensormap
1008
+ new_layout = _expand_layout(from_layout[0], from_layout[1], from_layout[2])
1009
+ from_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
1010
+ if _is_multi_shard(to_layout[1]):
1011
+ new_layout = _expand_layout(to_layout[0], to_layout[1], to_layout[2])
1012
+ to_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
1013
+ operator_map = _get_needed_rank_transform_operator_map_by_layouts(from_layout_without_rank_list,
1014
+ to_layout_without_rank_list,
1015
+ from_layout[3], self_rank,
1016
+ True)
1017
+ new_to_layout_info = to_layout[:-1]
1018
+ _insert_expand_layout_reshape(operator_map, from_layout_without_rank_list, new_to_layout_info,
1019
+ _is_multi_shard(from_layout[1]), _is_multi_shard(to_layout[1]))
1020
+ return operator_map
1021
+
1022
+
1023
+ def _get_resharding_operator_map(from_layout, to_layout, self_rank):
1024
+ """
1025
+ Args:
1026
+ from_layout (tuple): Use tuple to present layout
1027
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
1028
+ to_layout (tuple): Use tuple to present layout
1029
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
1030
+ self_rank (int): rank_id
1031
+ """
1032
+ reshard_op_cache_key = (
1033
+ f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
1034
+ f" -> "
1035
+ f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
1036
+ global RESHARD_OP_MAP_CACHE
1037
+ if reshard_op_cache_key not in RESHARD_OP_MAP_CACHE:
1038
+ operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
1039
+ op_map_dict = {rank_id: operator_map for rank_id in operator_map}
1040
+ RESHARD_OP_MAP_CACHE[reshard_op_cache_key] = op_map_dict
1041
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, not match cache")
1042
+ else:
1043
+ cache_rank_list_dict = RESHARD_OP_MAP_CACHE[reshard_op_cache_key]
1044
+ if self_rank in cache_rank_list_dict:
1045
+ operator_map = cache_rank_list_dict[self_rank]
1046
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, match cache")
1047
+ else:
1048
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, "
1049
+ f"but rank {self_rank} is not match cache")
1050
+ operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
1051
+ for rank_id in operator_map:
1052
+ RESHARD_OP_MAP_CACHE[reshard_op_cache_key][rank_id] = operator_map
1053
+ return operator_map
@@ -111,7 +111,7 @@ def _check_moe_config(moe_config=None, parallel_config=None):
111
111
  """
112
112
  if not isinstance(moe_config, MoEConfig):
113
113
  raise TypeError(f"'moe_config' must be an instance of MoEConfig, but got {type(moe_config).__name__}.")
114
- use_moe = (moe_config.expert_num > 1)
114
+ use_moe = moe_config.expert_num > 1
115
115
  if use_moe is False:
116
116
  return
117
117
  if moe_config.expert_num % parallel_config.expert_parallel != 0:
@@ -1365,7 +1365,12 @@ class MultiHeadAttention(Cell):
1365
1365
  class TransformerEncoderLayer(Cell):
1366
1366
  r"""
1367
1367
  Transformer Encoder Layer. This is an implementation of the single layer of the transformer
1368
- encoder layer, including multihead attention and feedward layer.
1368
+ encoder layer, mainly including Multi-Head Attention, Feed Forward, Add and LayerNorm layer.
1369
+
1370
+ The TransformerEncoderLayer structure is shown in the following figure:
1371
+
1372
+ .. image:: ../images/TransformerEncoderLayer.png
1373
+ :align: center
1369
1374
 
1370
1375
  Args:
1371
1376
  batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
@@ -1448,7 +1453,7 @@ class TransformerEncoderLayer(Cell):
1448
1453
  >>> # When use use_past=True, it includes two steps to implement the incremental prediction.
1449
1454
  >>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
1450
1455
  >>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
1451
- >>> init_reset = Tensor([True], mstype.bool_)
1456
+ >>> init_reset = Tensor([True], mstype.bool)
1452
1457
  >>> # Set is_first_iteration=True to generate the full memory states
1453
1458
  >>> model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
1454
1459
  ... num_heads=2, use_past=True)
@@ -1462,7 +1467,7 @@ class TransformerEncoderLayer(Cell):
1462
1467
  (2, 2, 16, 4)
1463
1468
  >>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
1464
1469
  >>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
1465
- >>> init_reset = Tensor([False], mstype.bool_)
1470
+ >>> init_reset = Tensor([False], mstype.bool)
1466
1471
  >>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
1467
1472
  >>> # the full sequence.
1468
1473
  >>> model.add_flags_recursive(is_first_iteration=False)
@@ -1532,7 +1537,7 @@ class TransformerEncoderLayer(Cell):
1532
1537
  "and parallel_config. model_parallel is {}."
1533
1538
  .format(ffn_hidden_size, parallel_config.model_parallel))
1534
1539
  _check_moe_config(moe_config, parallel_config)
1535
- self.use_moe = (moe_config.expert_num > 1)
1540
+ self.use_moe = moe_config.expert_num > 1
1536
1541
  self.use_past = use_past
1537
1542
  self.seq_length = seq_length
1538
1543
  self.hidden_size = hidden_size
@@ -1607,7 +1612,7 @@ class TransformerEncoderLayer(Cell):
1607
1612
  "and parallel_config. model_parallel is {}."
1608
1613
  .format(ffn_hidden_size, parallel_config.model_parallel))
1609
1614
  _check_moe_config(moe_config, parallel_config)
1610
- self.use_moe = (moe_config.expert_num > 1)
1615
+ self.use_moe = moe_config.expert_num > 1
1611
1616
  self.use_past = use_past
1612
1617
  self.seq_length = seq_length
1613
1618
  self.hidden_size = hidden_size
@@ -1902,7 +1907,7 @@ class TransformerDecoderLayer(Cell):
1902
1907
  parallel_config=default_dpmp_config):
1903
1908
  super(TransformerDecoderLayer, self).__init__()
1904
1909
  _check_moe_config(moe_config, parallel_config)
1905
- self.use_moe = (moe_config.expert_num > 1)
1910
+ self.use_moe = moe_config.expert_num > 1
1906
1911
  config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
1907
1912
  if batch_size or use_past:
1908
1913
  Validator.check_positive_int(batch_size)
@@ -2370,7 +2375,7 @@ class TransformerEncoder(Cell):
2370
2375
  >>> # When use use_past=True, it includes two steps to implement the incremental prediction.
2371
2376
  >>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
2372
2377
  >>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
2373
- >>> init_reset = Tensor([True], mstype.bool_)
2378
+ >>> init_reset = Tensor([True], mstype.bool)
2374
2379
  >>> # Set is_first_iteration=True to generate the full memory states
2375
2380
  >>> model = TransformerEncoder(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
2376
2381
  ... num_heads=2, num_layers=2, use_past=True)
@@ -2384,7 +2389,7 @@ class TransformerEncoder(Cell):
2384
2389
  (2, 2, 16, 4)
2385
2390
  >>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
2386
2391
  >>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
2387
- >>> init_reset = Tensor([False], mstype.bool_)
2392
+ >>> init_reset = Tensor([False], mstype.bool)
2388
2393
  >>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
2389
2394
  >>> # the full sequence.
2390
2395
  >>> model.add_flags_recursive(is_first_iteration=False)
@@ -2440,7 +2445,7 @@ class TransformerEncoder(Cell):
2440
2445
  super(TransformerEncoder, self).__init__()
2441
2446
  _check_config(parallel_config)
2442
2447
  _check_moe_config(moe_config, parallel_config)
2443
- self.use_moe = (moe_config.expert_num > 1)
2448
+ self.use_moe = moe_config.expert_num > 1
2444
2449
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2445
2450
  if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2446
2451
  self.add = P.Add()
@@ -2677,7 +2682,7 @@ class TransformerDecoder(Cell):
2677
2682
  super(TransformerDecoder, self).__init__()
2678
2683
  _check_moe_config(moe_config, parallel_config)
2679
2684
  _check_config(parallel_config)
2680
- self.use_moe = (moe_config.expert_num > 1)
2685
+ self.use_moe = moe_config.expert_num > 1
2681
2686
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2682
2687
  if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2683
2688
  self.add = P.Add()
@@ -2959,7 +2964,7 @@ class Transformer(Cell):
2959
2964
  if not lambda_func:
2960
2965
  lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
2961
2966
  _check_moe_config(moe_config, parallel_config)
2962
- self.use_moe = (moe_config.expert_num > 1)
2967
+ self.use_moe = moe_config.expert_num > 1
2963
2968
  self.add = P.Add()
2964
2969
  self.aux_loss = Tensor(0.0, mstype.float32)
2965
2970
  if encoder_layers > 0:
@@ -3026,7 +3031,7 @@ class Transformer(Cell):
3026
3031
  if not lambda_func:
3027
3032
  lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
3028
3033
  _check_moe_config(moe_config, parallel_config)
3029
- self.use_moe = (moe_config.expert_num > 1)
3034
+ self.use_moe = moe_config.expert_num > 1
3030
3035
  self.add = P.Add().shard(((), ()))
3031
3036
  self.aux_loss = Tensor(0.0, mstype.float32)
3032
3037
  if encoder_layers > 0:
@@ -21,7 +21,7 @@ import mindspore as ms
21
21
  from mindspore import context, log as logger
22
22
  from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
23
23
  from mindspore.common.tensor import Tensor
24
- from mindspore.common.dtype import dtype_to_nptype
24
+ from mindspore.common.dtype import _dtype_to_nptype
25
25
  from mindspore.common import dtype as mstype
26
26
  from mindspore.communication.management import get_group_size, get_rank
27
27
  from mindspore.communication._comm_helper import _is_initialized
@@ -156,7 +156,7 @@ def _is_in_auto_parallel_mode():
156
156
 
157
157
 
158
158
  def _is_parallel_mode():
159
- if not _is_initialized() or context.get_context('mode') == context.PYNATIVE_MODE:
159
+ if not _is_initialized():
160
160
  return False
161
161
  if os.getenv("RUN_MODE") != "predict":
162
162
  return False
@@ -173,12 +173,6 @@ def _is_in_hybrid_parallel_mode():
173
173
  return _get_parallel_mode() == ms.ParallelMode.HYBRID_PARALLEL
174
174
 
175
175
 
176
- def _is_pynative_parallel():
177
- parallel_mode = context.get_auto_parallel_context('parallel_mode')
178
- return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
179
- context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
180
-
181
-
182
176
  def _get_full_batch():
183
177
  """Get whether to use full_batch."""
184
178
  return auto_parallel_context().get_full_batch()
@@ -452,7 +446,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
452
446
  batchsize_per_device = item
453
447
  else:
454
448
  new_shape += (item,)
455
- new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
449
+ new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_)) # pylint:disable=protected-access
456
450
  start = stage_rank * batchsize_per_device
457
451
  new_tensor_numpy[start: start + batchsize_per_device] = data.asnumpy()
458
452
  else:
@@ -466,7 +460,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
466
460
  end = (stage_rank % dataset_strategy[index][i] + 1) * item
467
461
  s = slice(start, end, 1)
468
462
  slice_index += (s,)
469
- new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
463
+ new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_)) # pylint:disable=protected-access
470
464
  new_tensor_numpy[slice_index] = data.asnumpy()
471
465
  new_tensor = Tensor(new_tensor_numpy, dtype=type_)
472
466
  lst.append(new_tensor)
@@ -773,7 +767,7 @@ def _grads_divided_by_device_num_if_recomputation(grads):
773
767
  """
774
768
  If in pynative parallel and full_batch is True, divide grads by device num to ensure that the gradients is correct.
775
769
  """
776
- if not _is_pynative_parallel() or not _get_full_batch():
770
+ if not _get_full_batch():
777
771
  return grads
778
772
 
779
773
  device_num = _get_device_num()
@@ -14,6 +14,7 @@
14
14
  # ============================================================================
15
15
  """Cell of auto parallel"""
16
16
  import os
17
+ from mindspore import jit
17
18
  from mindspore.nn.cell import Cell
18
19
  from mindspore.parallel.shard import Layout
19
20
  from mindspore.communication.management import get_rank, get_group_size
@@ -162,6 +163,10 @@ class AutoParallel(Cell):
162
163
  super(AutoParallel, self).__init__(auto_prefix=False)
163
164
  self.network = network
164
165
 
166
+ if parallel_mode not in ["semi_auto", "sharding_propagation", "recursive_programming"]:
167
+ raise ValueError("the argument 'parallel_mode' must be one of ['semi_auto', 'sharding_propagation'," \
168
+ " 'recursive_programming'], but got the value : {} .".format(parallel_mode))
169
+
165
170
  self._parallel_mode = parallel_mode
166
171
 
167
172
  self._global_rank = get_rank()
@@ -260,8 +265,10 @@ class AutoParallel(Cell):
260
265
  self._save_strategy_file_path = file_path
261
266
 
262
267
  def disable_strategy_file_only_for_trainable_params(self):
263
- """By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
264
- non-trainable parameters as well."""
268
+ """
269
+ By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
270
+ non-trainable parameters as well.
271
+ """
265
272
  self._only_trainable_params = False
266
273
 
267
274
  def save_operator_strategy_file(self, file_path):
@@ -275,7 +282,8 @@ class AutoParallel(Cell):
275
282
  Note:
276
283
  - It only works when `parallel_mode=sharding_propagation`.
277
284
  - When performing distributed training, users can first save the strategy using dryrun on a single device
278
- and then load strategy to perform distributed training.
285
+ and then load strategy to perform distributed training. Note that only the first device of each node will
286
+ save the strategy file, so the simulated rank id specified by Dryrun must be divisible by 8.
279
287
 
280
288
  Args:
281
289
  file_path (str): Path to save parallel strategy json, must be an absolute path.
@@ -410,7 +418,7 @@ class AutoParallel(Cell):
410
418
  raise ValueError("For 'AutoParallel.dataset_strategy', the argument "
411
419
  "'config' must be 'full_batch' or 'data_parallel', but got the value : {}."
412
420
  .format(config))
413
- self._full_batch = (config == "full_batch")
421
+ self._full_batch = config == "full_batch"
414
422
  self._dataset_strategy_config = config
415
423
  return
416
424
  if not isinstance(config, tuple):
@@ -505,17 +513,18 @@ class AutoParallel(Cell):
505
513
  raise ValueError("For 'AutoParallel.pipeline', the argument 'stages' "
506
514
  "must be larger than zero, but got value: {}.".format(stages))
507
515
  if not isinstance(output_broadcast, bool):
508
- raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
516
+ raise TypeError("For 'AutoParallel.pipeline', the argument 'output_broadcast' "
509
517
  "must be bool type, but got the type : {}.".format(type(output_broadcast)))
510
518
  if not isinstance(interleave, bool):
511
- raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
519
+ raise TypeError("For 'AutoParallel.pipeline', the argument 'interleave' "
512
520
  "must be bool type, but got the type : {}.".format(type(interleave)))
513
521
  if not isinstance(scheduler, str):
514
- raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
522
+ raise TypeError("For 'AutoParallel.pipeline', the argument 'scheduler' "
515
523
  "must be str type, but got the type : {}.".format(type(scheduler)))
516
- if scheduler not in ("1f1b", "gpipe"):
524
+ if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp", "zero_bubble_v"):
517
525
  raise ValueError("For 'AutoParallel.pipeline', the argument "
518
- "'scheduler' must be '1f1b' , 'gpipe' , but got the value : {}."
526
+ "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp'/'zero_bubble_v' ," \
527
+ " but got the value : {}."
519
528
  .format(scheduler))
520
529
  self._pipeline_stages = stages
521
530
  self._pipeline_result_broadcast = output_broadcast
@@ -658,8 +667,11 @@ class AutoParallel(Cell):
658
667
  - recomputation_communication_overlap (bool): Enable overlap between recompute ops and communication ops
659
668
  if True.
660
669
  Default: False.
661
- - grad_matmul_communication_overlap (bool): Enable overlap between dw matmul and
662
- tensor parallel communication ops if True. Default: False.
670
+ - grad_matmul_communication_overlap (bool, str): When set to ``True``, it indicates that overlap
671
+ between dw matmul and tensor parallel communication is enabled. When set to ``False``, it indicates
672
+ that this feature is disabled. When set to str, it only optimizes the specified communication
673
+ operator types, with operators separated by ``,``. For example, "AlltoAll,AlltoAllV" indicates that
674
+ only ``AlltoAll`` and ``AlltoAllV`` are optimized. Default: ``False``.
663
675
  - grad_fa_allgather_overlap (bool): Enable overlap between duplicated allgather by recomputing
664
676
  in sequence parallel and flashattentionscoregrad ops if True. Default: False.
665
677
  - enable_communication_fusion (bool): Enable communication fusion to optimize the number of
@@ -674,7 +686,9 @@ class AutoParallel(Cell):
674
686
  and optimizer parallel allgather communication if True. Currently, do not support
675
687
  `O2 <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.JitConfig.html>`_
676
688
  Default: False.
677
- - computation_communication_fusion_level (int): Enable the fusion between compute and communicate.
689
+ - computation_communication_fusion_level (int): Enable the fusion between compute and communicate,
690
+ which fuses communication tasks and computing tasks, allows for partial pipelining and parallel
691
+ execution of these tasks during operation, thereby enhancing performance.
678
692
  Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
679
693
  This is an experimental configuration, may be changed or canceled in the future.
680
694
 
@@ -685,6 +699,12 @@ class AutoParallel(Cell):
685
699
  - 2: Apply fusion to backward nodes.
686
700
 
687
701
  - 3: Apply fusion to all nodes.
702
+
703
+ .. warning::
704
+ After setting ``export MS_ENABLE_LCCL=on``, the fusion operator based on memory semantics will be
705
+ used. Please note that this operator is still in an experimental stage and may be changed or
706
+ removed in the future.
707
+
688
708
  - dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
689
709
  support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
690
710
 
@@ -728,5 +748,6 @@ class AutoParallel(Cell):
728
748
  self._transformer_opt_config = file_path
729
749
  ctx.ascend_config['parallel_speed_up_json_path'] = file_path
730
750
 
751
+ @jit
731
752
  def construct(self, *args, **kwargs):
732
753
  return self.network(*args, **kwargs)
@@ -15,10 +15,10 @@
15
15
  """Convert distributed checkpoint"""
16
16
  from __future__ import absolute_import
17
17
 
18
- from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, transform_checkpoints
18
+ __all__ = ["rank_list_for_convert", "convert_checkpoint_by_rank", "convert_checkpoints"]
19
19
 
20
- __all__ = ["rank_list_for_convert",
21
- "convert_checkpoint_by_rank", "convert_checkpoints"]
20
+ from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
21
+ transform_checkpoints
22
22
 
23
23
 
24
24
  def rank_list_for_convert(rank_id, src_strategy_file=None, dst_strategy_file=None):
@@ -582,6 +582,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
582
582
  The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
583
583
  too large, otherwise it may cause freezing.
584
584
 
585
+ This function does not support converting remove_redundancy's checkpoint file.
586
+
585
587
  Args:
586
588
  src_checkpoints_dir (str): The source checkpoints directory.
587
589
  dst_checkpoints_dir (str): The destination checkpoints directory to save the converted checkpoints.
@@ -1163,6 +1165,8 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
1163
1165
  train_strategy_filename = ms.context.get_auto_parallel_context("strategy_ckpt_load_file")
1164
1166
 
1165
1167
  _train_strategy = build_searched_strategy(train_strategy_filename)
1168
+ if not _train_strategy:
1169
+ return True
1166
1170
  train_strategy = _convert_to_list(_train_strategy)
1167
1171
 
1168
1172
  train_dev_count = 1
@@ -1187,7 +1191,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
1187
1191
  param_not_in_ckpt = []
1188
1192
  for _, param in network.parameters_and_names():
1189
1193
  sliced_params = []
1190
- if param.name not in rank_list.keys():
1194
+ if param.name not in rank_list:
1191
1195
  param_not_in_strategy.append(param.name)
1192
1196
  continue
1193
1197
  if param.name not in param_total_dict: