mindspore 2.6.0rc1__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (458) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +65 -84
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +58 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +178 -53
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +377 -203
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +5 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +117 -131
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +67 -55
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +70 -24
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +27 -7
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +6 -46
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +429 -23
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +140 -104
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +491 -623
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +117 -110
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +4 -6
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/asgd.py +2 -0
  188. mindspore/nn/optim/lamb.py +1 -3
  189. mindspore/nn/optim/optimizer.py +1 -1
  190. mindspore/nn/optim/tft_wrapper.py +2 -3
  191. mindspore/nn/optim/thor.py +2 -2
  192. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  193. mindspore/nn/probability/distribution/exponential.py +2 -1
  194. mindspore/nn/probability/distribution/poisson.py +2 -1
  195. mindspore/nn/sparse/sparse.py +3 -3
  196. mindspore/nn/wrap/cell_wrapper.py +73 -42
  197. mindspore/nn/wrap/grad_reducer.py +37 -52
  198. mindspore/nn/wrap/loss_scale.py +72 -74
  199. mindspore/numpy/array_creations.py +7 -7
  200. mindspore/numpy/fft.py +1 -1
  201. mindspore/numpy/math_ops.py +5 -5
  202. mindspore/numpy/utils_const.py +1 -1
  203. mindspore/opencv_core452.dll +0 -0
  204. mindspore/opencv_imgcodecs452.dll +0 -0
  205. mindspore/opencv_imgproc452.dll +0 -0
  206. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  207. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  208. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  209. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  210. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  211. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  212. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  213. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +54 -13
  214. mindspore/ops/auto_generate/gen_extend_func.py +27 -145
  215. mindspore/ops/auto_generate/gen_ops_def.py +1027 -347
  216. mindspore/ops/auto_generate/gen_ops_prim.py +2341 -1117
  217. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  218. mindspore/ops/composite/__init__.py +10 -0
  219. mindspore/ops/composite/base.py +9 -5
  220. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  221. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  222. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  223. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  224. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  225. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  226. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  227. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  228. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  229. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  230. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  231. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  232. mindspore/ops/function/__init__.py +4 -1
  233. mindspore/ops/function/_add_attr_func.py +11 -6
  234. mindspore/ops/function/array_func.py +19 -102
  235. mindspore/ops/function/debug_func.py +8 -5
  236. mindspore/ops/function/grad/grad_func.py +5 -13
  237. mindspore/ops/function/math_func.py +77 -572
  238. mindspore/ops/function/nn_func.py +46 -94
  239. mindspore/ops/function/other_func.py +4 -1
  240. mindspore/ops/function/random_func.py +44 -5
  241. mindspore/ops/function/vmap_func.py +2 -1
  242. mindspore/ops/functional.py +4 -4
  243. mindspore/ops/functional_overload.py +594 -18
  244. mindspore/ops/op_info_register.py +21 -0
  245. mindspore/ops/operations/__init__.py +16 -11
  246. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  247. mindspore/ops/operations/_inner_ops.py +14 -18
  248. mindspore/ops/operations/_sequence_ops.py +1 -1
  249. mindspore/ops/operations/array_ops.py +5 -51
  250. mindspore/ops/operations/comm_ops.py +186 -41
  251. mindspore/ops/operations/custom_ops.py +303 -177
  252. mindspore/ops/operations/debug_ops.py +59 -4
  253. mindspore/ops/operations/image_ops.py +13 -13
  254. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  255. mindspore/ops/operations/math_ops.py +8 -9
  256. mindspore/ops/operations/nn_ops.py +8 -40
  257. mindspore/ops/primitive.py +9 -20
  258. mindspore/ops/tensor_method.py +63 -15
  259. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  260. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  261. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  262. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  263. mindspore/ops_generate/common/base_generator.py +14 -0
  264. mindspore/ops_generate/common/gen_constants.py +8 -3
  265. mindspore/ops_generate/common/gen_utils.py +0 -19
  266. mindspore/ops_generate/common/op_proto.py +11 -4
  267. mindspore/ops_generate/common/template.py +88 -11
  268. mindspore/ops_generate/gen_ops.py +1 -1
  269. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  270. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  271. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  272. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  273. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  274. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  275. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  276. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  277. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  278. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  279. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  280. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  281. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  282. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  283. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  284. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  285. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  286. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  287. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  288. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  289. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  290. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  291. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  292. mindspore/parallel/_auto_parallel_context.py +16 -23
  293. mindspore/parallel/_cell_wrapper.py +113 -45
  294. mindspore/parallel/_parallel_serialization.py +4 -3
  295. mindspore/parallel/_ps_context.py +4 -6
  296. mindspore/parallel/_tensor.py +167 -12
  297. mindspore/parallel/_transformer/moe.py +1 -1
  298. mindspore/parallel/_transformer/transformer.py +17 -12
  299. mindspore/parallel/_utils.py +5 -11
  300. mindspore/parallel/auto_parallel.py +35 -14
  301. mindspore/parallel/checkpoint_convert.py +3 -3
  302. mindspore/parallel/checkpoint_transform.py +13 -7
  303. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  304. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  305. mindspore/parallel/cluster/run.py +48 -7
  306. mindspore/parallel/function/__init__.py +8 -1
  307. mindspore/parallel/function/reshard_func.py +12 -12
  308. mindspore/parallel/nn/__init__.py +15 -2
  309. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  310. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  311. mindspore/parallel/shard.py +10 -25
  312. mindspore/parallel/transform_safetensors.py +469 -174
  313. mindspore/pgodb140.dll +0 -0
  314. mindspore/pgort140.dll +0 -0
  315. mindspore/profiler/__init__.py +2 -1
  316. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  317. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  318. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  319. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  321. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  322. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  323. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  324. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  325. mindspore/profiler/analysis/task_manager.py +1 -1
  326. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  327. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  328. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  329. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  330. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  331. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  332. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  333. mindspore/profiler/common/constant.py +16 -0
  334. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  335. mindspore/profiler/common/path_manager.py +9 -0
  336. mindspore/profiler/common/profiler_context.py +50 -29
  337. mindspore/profiler/common/profiler_info.py +0 -16
  338. mindspore/profiler/common/profiler_meta_data.py +1 -0
  339. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  340. mindspore/profiler/common/profiler_output_path.py +23 -8
  341. mindspore/profiler/common/profiler_parameters.py +128 -35
  342. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  343. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  344. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  345. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  346. mindspore/profiler/dynamic_profiler.py +374 -338
  347. mindspore/profiler/envprofiler.py +42 -12
  348. mindspore/profiler/experimental_config.py +112 -7
  349. mindspore/profiler/mstx.py +33 -12
  350. mindspore/profiler/platform/__init__.py +2 -3
  351. mindspore/profiler/platform/cpu_profiler.py +10 -4
  352. mindspore/profiler/platform/npu_profiler.py +30 -20
  353. mindspore/profiler/profiler.py +218 -154
  354. mindspore/profiler/profiler_action_controller.py +65 -77
  355. mindspore/profiler/profiler_interface.py +2 -2
  356. mindspore/profiler/schedule.py +10 -4
  357. mindspore/rewrite/common/config.py +1 -0
  358. mindspore/rewrite/common/namer.py +1 -0
  359. mindspore/rewrite/common/namespace.py +1 -0
  360. mindspore/rewrite/node/node.py +31 -11
  361. mindspore/rewrite/parsers/assign_parser.py +1 -1
  362. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  363. mindspore/run_check/_check_version.py +7 -10
  364. mindspore/runtime/__init__.py +8 -6
  365. mindspore/runtime/event.py +10 -4
  366. mindspore/runtime/executor.py +87 -45
  367. mindspore/runtime/memory.py +31 -32
  368. mindspore/runtime/thread_bind_core.py +299 -165
  369. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  370. mindspore/swresample-4.dll +0 -0
  371. mindspore/swscale-6.dll +0 -0
  372. mindspore/tbbmalloc.dll +0 -0
  373. mindspore/tinyxml2.dll +0 -0
  374. mindspore/train/_utils.py +17 -7
  375. mindspore/train/amp.py +43 -23
  376. mindspore/train/callback/__init__.py +5 -5
  377. mindspore/train/callback/_callback.py +2 -1
  378. mindspore/train/callback/_checkpoint.py +4 -14
  379. mindspore/train/callback/_flops_collector.py +11 -7
  380. mindspore/train/callback/_landscape.py +0 -1
  381. mindspore/train/callback/_train_fault_tolerance.py +98 -21
  382. mindspore/train/data_sink.py +15 -6
  383. mindspore/train/dataset_helper.py +14 -5
  384. mindspore/train/model.py +133 -69
  385. mindspore/train/serialization.py +168 -126
  386. mindspore/train/summary/summary_record.py +13 -2
  387. mindspore/train/train_thor/model_thor.py +2 -2
  388. mindspore/turbojpeg.dll +0 -0
  389. mindspore/utils/__init__.py +3 -2
  390. mindspore/utils/dryrun.py +0 -6
  391. mindspore/utils/runtime_execution_order_check.py +163 -77
  392. mindspore/utils/sdc_detect.py +68 -0
  393. mindspore/utils/utils.py +14 -17
  394. mindspore/vcmeta.dll +0 -0
  395. mindspore/vcruntime140.dll +0 -0
  396. mindspore/vcruntime140_1.dll +0 -0
  397. mindspore/version.py +1 -1
  398. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  399. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/RECORD +403 -442
  400. mindspore/_deprecated/jit.py +0 -198
  401. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  402. mindspore/communication/_hccl_management.py +0 -297
  403. mindspore/experimental/es/embedding_service.py +0 -891
  404. mindspore/experimental/es/embedding_service_layer.py +0 -581
  405. mindspore/profiler/common/validator/__init__.py +0 -14
  406. mindspore/profiler/common/validator/validate_path.py +0 -84
  407. mindspore/profiler/parser/__init__.py +0 -14
  408. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  409. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  410. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  411. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  412. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  413. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  414. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  415. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  416. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  417. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  418. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  419. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  420. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  421. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  422. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  423. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  424. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  425. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  426. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  427. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  428. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  429. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  430. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  431. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  432. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  433. mindspore/profiler/parser/container.py +0 -229
  434. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  435. mindspore/profiler/parser/flops_parser.py +0 -531
  436. mindspore/profiler/parser/framework_enum.py +0 -111
  437. mindspore/profiler/parser/framework_parser.py +0 -464
  438. mindspore/profiler/parser/framework_struct.py +0 -61
  439. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  440. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  441. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  442. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  443. mindspore/profiler/parser/hccl_parser.py +0 -573
  444. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  445. mindspore/profiler/parser/integrator.py +0 -526
  446. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  447. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  448. mindspore/profiler/parser/minddata_parser.py +0 -186
  449. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  450. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  451. mindspore/profiler/parser/optime_parser.py +0 -250
  452. mindspore/profiler/parser/profiler_info.py +0 -213
  453. mindspore/profiler/parser/step_trace_parser.py +0 -666
  454. mindspore/utils/hooks.py +0 -81
  455. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  456. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  457. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  458. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -65,7 +65,6 @@ _set_ps_context_func_map = {
65
65
  "enable_ssl": ps_context().set_enable_ssl,
66
66
  "client_password": ps_context().set_client_password,
67
67
  "server_password": ps_context().set_server_password,
68
- "scheduler_manage_port": ps_context().set_scheduler_manage_port,
69
68
  "config_file_path": ps_context().set_config_file_path,
70
69
  }
71
70
 
@@ -78,12 +77,10 @@ _get_ps_context_func_map = {
78
77
  "scheduler_ip": ps_context().scheduler_ip,
79
78
  "scheduler_port": ps_context().scheduler_port,
80
79
  "enable_ssl": ps_context().enable_ssl,
81
- "client_password": ps_context().client_password,
82
- "server_password": ps_context().server_password,
83
- "scheduler_manage_port": ps_context().scheduler_manage_port,
84
80
  "config_file_path": ps_context().config_file_path,
85
81
  }
86
82
 
83
+
87
84
  def _get_ps_mode_rank():
88
85
  ps_rank = ps_context().ps_rank_id()
89
86
  if ps_rank == -1:
@@ -116,8 +113,9 @@ def _set_ps_context(**kwargs):
116
113
  Only after enable_ps is set True, the environment variables will be effective.
117
114
  Default: ``False``.
118
115
  config_file_path (str): Configuration file path used by recovery. Default: ''.
119
- scheduler_manage_port (int): scheduler manage port used to scale out/in. Default: 11202.
120
116
  enable_ssl (bool): Set PS SSL mode enabled or disabled. Default: ``False``.
117
+ When set to False, users need to review and confirm the security of network environment
118
+ where the distributed job is located.
121
119
  client_password (str): Password to decrypt the secret key stored in the client certificate. Default: ''.
122
120
  server_password (str): Password to decrypt the secret key stored in the server certificate. Default: ''.
123
121
 
@@ -126,7 +124,7 @@ def _set_ps_context(**kwargs):
126
124
 
127
125
  Examples:
128
126
  >>> import mindspore as ms
129
- >>> ms.set_ps_context(enable_ps=True, enable_ssl=True, client_password='123456', server_password='123456')
127
+ >>> ms.set_ps_context(enable_ps=True, enable_ssl=True, client_password='', server_password='')
130
128
  """
131
129
  for key, value in kwargs.items():
132
130
  if key not in _set_ps_context_func_map:
@@ -21,9 +21,11 @@ import numpy as np
21
21
  from mindspore.common.tensor import Tensor
22
22
  from mindspore.communication.management import get_rank, get_group_size
23
23
  from mindspore._c_expression import TensorTransform
24
+ from mindspore import log as logger
24
25
 
25
26
  _tensor_transform = TensorTransform.get_instance()
26
-
27
+ COMM_TENSOR_CELL_CACHE = {}
28
+ RESHARD_OP_MAP_CACHE = {}
27
29
 
28
30
  def _get_tensor_strategy(dev_mat, tensor_map):
29
31
  """
@@ -348,7 +350,7 @@ def _extract_layout_item(layout_item):
348
350
  return dev_matrix, tensor_map, opt_shard_step, opt_shard_size
349
351
 
350
352
 
351
- def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
353
+ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id, enable_redist_opt=False):
352
354
  """
353
355
  Transform tensor from source layout to the destination layout.
354
356
 
@@ -362,7 +364,7 @@ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
362
364
  """
363
365
  if not isinstance(from_layout, tuple) or not isinstance(to_layout, tuple):
364
366
  raise TypeError("The layout should be tuple! layout is {} and {}".format(from_layout, to_layout))
365
- return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, rank_id)
367
+ return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, enable_redist_opt, rank_id)
366
368
 
367
369
 
368
370
  def _construct_from_to_tensor_layout(from_full_tensor_shape, from_dev_matrix,
@@ -587,13 +589,15 @@ def _get_needed_rank_list_by_layouts(from_tensor_layout, to_tensor_layout, devic
587
589
  return result_list
588
590
 
589
591
 
590
- def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank):
592
+ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank,
593
+ enable_redist_opt=False):
591
594
  """
592
595
  AllGather op: {op_name, group_ranks + axis}
593
596
  """
594
597
  stack = []
595
598
  index = 0
596
- transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank)
599
+ transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank,
600
+ enable_redist_opt)
597
601
  result_map = {self_rank: transform_operators}
598
602
  for operators in transform_operators:
599
603
  op_name = operators[0]
@@ -606,7 +610,7 @@ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_te
606
610
  for rank in group_info[1]:
607
611
  if rank not in result_map:
608
612
  new_transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout,
609
- device_list, rank)
613
+ device_list, rank, enable_redist_opt)
610
614
  result_map[rank] = new_transform_operators
611
615
  index = 0
612
616
  for operators in new_transform_operators:
@@ -710,8 +714,6 @@ def _apply_operator(operator_name):
710
714
  Returns:
711
715
  The data of tensor after apply operator.
712
716
  """
713
- if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
714
- numpy_data = numpy_data[:]
715
717
  if not isinstance(numpy_data, np.ndarray):
716
718
  raise TypeError("The data should be a numpy.ndarray.")
717
719
  _check_operator(reshape_op)
@@ -732,10 +734,7 @@ def _apply_operator(operator_name):
732
734
  raise TypeError("The data_list should be a list.")
733
735
  new_numpy_data_list = []
734
736
  for numpy_data in numpy_data_list:
735
- if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
736
- new_numpy_data_list.append(numpy_data[:])
737
- else:
738
- new_numpy_data_list.append(numpy_data)
737
+ new_numpy_data_list.append(numpy_data)
739
738
  numpy_data_list = new_numpy_data_list
740
739
  _check_operator(allgather_op)
741
740
  concat_group = allgather_op[1][:-1]
@@ -896,3 +895,159 @@ def _chunk_shape(np_tensor, strategy, depth):
896
895
  output.extend(
897
896
  _chunk_shape(ret_, strategy[len(strategy) - depth + 1:len(strategy)], depth - 1))
898
897
  return output
898
+
899
+
900
+ def _infer_pp_op_map(from_layout, to_layout, self_rank):
901
+ """
902
+ get the ops map for merging pp stages
903
+ """
904
+ from_rank_list = from_layout[3]
905
+ to_rank_list = to_layout[3]
906
+ from_dev_num_in_stage = len(from_rank_list)
907
+ current_rank_stage_id = self_rank // from_dev_num_in_stage
908
+ diff_rank_id = [
909
+ rank_id for rank_id in to_rank_list if rank_id not in from_rank_list]
910
+ end_stage = from_dev_num_in_stage * (current_rank_stage_id + 1)
911
+ start_stage = from_dev_num_in_stage * current_rank_stage_id
912
+ rank_pos_in_stage = list(range(start_stage, end_stage)).index(self_rank)
913
+ root_idx = from_rank_list[rank_pos_in_stage]
914
+ broadcast_rank_list = [root_idx]
915
+ while rank_pos_in_stage < len(diff_rank_id):
916
+ broadcast_rank_list.append(diff_rank_id[rank_pos_in_stage])
917
+ rank_pos_in_stage += from_dev_num_in_stage
918
+ broadcast_rank_list.sort()
919
+ broadcast_map = {rank_id: [('Broadcast', root_idx, broadcast_rank_list)] for rank_id in broadcast_rank_list}
920
+ return broadcast_map
921
+
922
+
923
+ def _get_pipeline_operator_map(from_layout, to_layout, self_rank):
924
+ """
925
+ If src_pp_stages is greater than dst_pp_stages, the weights of the corresponding cards need to
926
+ be communicated via broadcast to swap. Need to communicate src rank0's 01 to src rank2,
927
+ so that rank2 holds param0's data. Similarly, communicate rank1's 02 to rank3
928
+ rank0 01 01 11
929
+ rank1 02 02 12
930
+ pp2 -------> pp1
931
+ rank2 11 03 13
932
+ rank3 12 04 14
933
+
934
+ Args:
935
+ from_layout (tuple): Use tuple to present layout
936
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
937
+ to_layout (tuple): Use tuple to present layout
938
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
939
+ self_rank (int): rank_id
940
+ """
941
+ if len(from_layout[3]) < len(to_layout[3]):
942
+ logger.debug(f"from {from_layout} to {to_layout} need to broadcast data across pp stages")
943
+ comm_tensor_cache_key = (
944
+ f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
945
+ f" -> "
946
+ f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
947
+ global COMM_TENSOR_CELL_CACHE
948
+ if comm_tensor_cache_key not in COMM_TENSOR_CELL_CACHE:
949
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, not match cache")
950
+ broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
951
+ broadcast_op_map_dict = {rank_id: broadcast_map for rank_id in broadcast_map.keys()}
952
+ COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key] = broadcast_op_map_dict
953
+ else:
954
+ comm_tensor_cache_key_rank_list = COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key]
955
+ if self_rank in comm_tensor_cache_key_rank_list:
956
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, match cache")
957
+ broadcast_map = comm_tensor_cache_key_rank_list[self_rank]
958
+ else:
959
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, but rank {self_rank} not match cache")
960
+ broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
961
+ for rank_id in broadcast_map.keys():
962
+ COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key][rank_id] = broadcast_map
963
+ return broadcast_map
964
+ logger.debug(f"from {from_layout} to {to_layout} no need to broadcast data across pp stages")
965
+ return {}
966
+
967
+
968
+ def _is_multi_shard(in_tensor_map):
969
+ """
970
+ whether the input tensor map is in multi shard
971
+ """
972
+ for tensor_map in in_tensor_map:
973
+ if isinstance(tensor_map, (list, tuple)) and len(tensor_map) > 1:
974
+ return True
975
+ return False
976
+
977
+
978
+ def _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
979
+ insert_from_reshape, insert_to_reshape):
980
+ """ insert layout expand op reshape """
981
+ from_dev_matrix = from_info_tuple[0]
982
+ from_tensor_map = from_info_tuple[1]
983
+ from_full_tensor_shape = from_info_tuple[2]
984
+ to_dev_matrix_origin = to_info_tuple[0]
985
+ to_tensor_map_origin = to_info_tuple[1]
986
+ origin_tensor_shape = to_info_tuple[2]
987
+ for param_rank, _ in param_rank_map.items():
988
+ if insert_from_reshape:
989
+ from_slice_tensor_shape = ()
990
+ from_tensor_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
991
+ for i, item in enumerate(from_full_tensor_shape):
992
+ from_slice_tensor_shape += (item // from_tensor_strategy[i],)
993
+ param_rank_map.get(param_rank).insert(0, ('Reshape', list(from_slice_tensor_shape)))
994
+ if insert_to_reshape:
995
+ to_tensor_strategy = _get_tensor_strategy(to_dev_matrix_origin, to_tensor_map_origin)
996
+ to_slice_tensor_shape = ()
997
+ for i, item in enumerate(origin_tensor_shape):
998
+ to_slice_tensor_shape += (item // to_tensor_strategy[i],)
999
+ param_rank_map.get(param_rank).append(('Reshape', list(to_slice_tensor_shape)))
1000
+
1001
+
1002
+ def _infer_reshard_op_map(from_layout, to_layout, self_rank):
1003
+ """infer reshard op map"""
1004
+ from_layout_without_rank_list = from_layout[:-1]
1005
+ to_layout_without_rank_list = to_layout[:-1]
1006
+ if _is_multi_shard(from_layout[1]):
1007
+ # ((2, 1), 1) --> (2, 1, 1) expand tensormap
1008
+ new_layout = _expand_layout(from_layout[0], from_layout[1], from_layout[2])
1009
+ from_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
1010
+ if _is_multi_shard(to_layout[1]):
1011
+ new_layout = _expand_layout(to_layout[0], to_layout[1], to_layout[2])
1012
+ to_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
1013
+ operator_map = _get_needed_rank_transform_operator_map_by_layouts(from_layout_without_rank_list,
1014
+ to_layout_without_rank_list,
1015
+ from_layout[3], self_rank,
1016
+ True)
1017
+ new_to_layout_info = to_layout[:-1]
1018
+ _insert_expand_layout_reshape(operator_map, from_layout_without_rank_list, new_to_layout_info,
1019
+ _is_multi_shard(from_layout[1]), _is_multi_shard(to_layout[1]))
1020
+ return operator_map
1021
+
1022
+
1023
+ def _get_resharding_operator_map(from_layout, to_layout, self_rank):
1024
+ """
1025
+ Args:
1026
+ from_layout (tuple): Use tuple to present layout
1027
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
1028
+ to_layout (tuple): Use tuple to present layout
1029
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
1030
+ self_rank (int): rank_id
1031
+ """
1032
+ reshard_op_cache_key = (
1033
+ f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
1034
+ f" -> "
1035
+ f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
1036
+ global RESHARD_OP_MAP_CACHE
1037
+ if reshard_op_cache_key not in RESHARD_OP_MAP_CACHE:
1038
+ operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
1039
+ op_map_dict = {rank_id: operator_map for rank_id in operator_map}
1040
+ RESHARD_OP_MAP_CACHE[reshard_op_cache_key] = op_map_dict
1041
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, not match cache")
1042
+ else:
1043
+ cache_rank_list_dict = RESHARD_OP_MAP_CACHE[reshard_op_cache_key]
1044
+ if self_rank in cache_rank_list_dict:
1045
+ operator_map = cache_rank_list_dict[self_rank]
1046
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, match cache")
1047
+ else:
1048
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, "
1049
+ f"but rank {self_rank} is not match cache")
1050
+ operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
1051
+ for rank_id in operator_map:
1052
+ RESHARD_OP_MAP_CACHE[reshard_op_cache_key][rank_id] = operator_map
1053
+ return operator_map
@@ -111,7 +111,7 @@ def _check_moe_config(moe_config=None, parallel_config=None):
111
111
  """
112
112
  if not isinstance(moe_config, MoEConfig):
113
113
  raise TypeError(f"'moe_config' must be an instance of MoEConfig, but got {type(moe_config).__name__}.")
114
- use_moe = (moe_config.expert_num > 1)
114
+ use_moe = moe_config.expert_num > 1
115
115
  if use_moe is False:
116
116
  return
117
117
  if moe_config.expert_num % parallel_config.expert_parallel != 0:
@@ -1365,7 +1365,12 @@ class MultiHeadAttention(Cell):
1365
1365
  class TransformerEncoderLayer(Cell):
1366
1366
  r"""
1367
1367
  Transformer Encoder Layer. This is an implementation of the single layer of the transformer
1368
- encoder layer, including multihead attention and feedward layer.
1368
+ encoder layer, mainly including Multi-Head Attention, Feed Forward, Add and LayerNorm layer.
1369
+
1370
+ The TransformerEncoderLayer structure is shown in the following figure:
1371
+
1372
+ .. image:: ../images/TransformerEncoderLayer.png
1373
+ :align: center
1369
1374
 
1370
1375
  Args:
1371
1376
  batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
@@ -1448,7 +1453,7 @@ class TransformerEncoderLayer(Cell):
1448
1453
  >>> # When use use_past=True, it includes two steps to implement the incremental prediction.
1449
1454
  >>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
1450
1455
  >>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
1451
- >>> init_reset = Tensor([True], mstype.bool_)
1456
+ >>> init_reset = Tensor([True], mstype.bool)
1452
1457
  >>> # Set is_first_iteration=True to generate the full memory states
1453
1458
  >>> model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
1454
1459
  ... num_heads=2, use_past=True)
@@ -1462,7 +1467,7 @@ class TransformerEncoderLayer(Cell):
1462
1467
  (2, 2, 16, 4)
1463
1468
  >>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
1464
1469
  >>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
1465
- >>> init_reset = Tensor([False], mstype.bool_)
1470
+ >>> init_reset = Tensor([False], mstype.bool)
1466
1471
  >>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
1467
1472
  >>> # the full sequence.
1468
1473
  >>> model.add_flags_recursive(is_first_iteration=False)
@@ -1532,7 +1537,7 @@ class TransformerEncoderLayer(Cell):
1532
1537
  "and parallel_config. model_parallel is {}."
1533
1538
  .format(ffn_hidden_size, parallel_config.model_parallel))
1534
1539
  _check_moe_config(moe_config, parallel_config)
1535
- self.use_moe = (moe_config.expert_num > 1)
1540
+ self.use_moe = moe_config.expert_num > 1
1536
1541
  self.use_past = use_past
1537
1542
  self.seq_length = seq_length
1538
1543
  self.hidden_size = hidden_size
@@ -1607,7 +1612,7 @@ class TransformerEncoderLayer(Cell):
1607
1612
  "and parallel_config. model_parallel is {}."
1608
1613
  .format(ffn_hidden_size, parallel_config.model_parallel))
1609
1614
  _check_moe_config(moe_config, parallel_config)
1610
- self.use_moe = (moe_config.expert_num > 1)
1615
+ self.use_moe = moe_config.expert_num > 1
1611
1616
  self.use_past = use_past
1612
1617
  self.seq_length = seq_length
1613
1618
  self.hidden_size = hidden_size
@@ -1902,7 +1907,7 @@ class TransformerDecoderLayer(Cell):
1902
1907
  parallel_config=default_dpmp_config):
1903
1908
  super(TransformerDecoderLayer, self).__init__()
1904
1909
  _check_moe_config(moe_config, parallel_config)
1905
- self.use_moe = (moe_config.expert_num > 1)
1910
+ self.use_moe = moe_config.expert_num > 1
1906
1911
  config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
1907
1912
  if batch_size or use_past:
1908
1913
  Validator.check_positive_int(batch_size)
@@ -2370,7 +2375,7 @@ class TransformerEncoder(Cell):
2370
2375
  >>> # When use use_past=True, it includes two steps to implement the incremental prediction.
2371
2376
  >>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
2372
2377
  >>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
2373
- >>> init_reset = Tensor([True], mstype.bool_)
2378
+ >>> init_reset = Tensor([True], mstype.bool)
2374
2379
  >>> # Set is_first_iteration=True to generate the full memory states
2375
2380
  >>> model = TransformerEncoder(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
2376
2381
  ... num_heads=2, num_layers=2, use_past=True)
@@ -2384,7 +2389,7 @@ class TransformerEncoder(Cell):
2384
2389
  (2, 2, 16, 4)
2385
2390
  >>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
2386
2391
  >>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
2387
- >>> init_reset = Tensor([False], mstype.bool_)
2392
+ >>> init_reset = Tensor([False], mstype.bool)
2388
2393
  >>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
2389
2394
  >>> # the full sequence.
2390
2395
  >>> model.add_flags_recursive(is_first_iteration=False)
@@ -2440,7 +2445,7 @@ class TransformerEncoder(Cell):
2440
2445
  super(TransformerEncoder, self).__init__()
2441
2446
  _check_config(parallel_config)
2442
2447
  _check_moe_config(moe_config, parallel_config)
2443
- self.use_moe = (moe_config.expert_num > 1)
2448
+ self.use_moe = moe_config.expert_num > 1
2444
2449
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2445
2450
  if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2446
2451
  self.add = P.Add()
@@ -2677,7 +2682,7 @@ class TransformerDecoder(Cell):
2677
2682
  super(TransformerDecoder, self).__init__()
2678
2683
  _check_moe_config(moe_config, parallel_config)
2679
2684
  _check_config(parallel_config)
2680
- self.use_moe = (moe_config.expert_num > 1)
2685
+ self.use_moe = moe_config.expert_num > 1
2681
2686
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2682
2687
  if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2683
2688
  self.add = P.Add()
@@ -2959,7 +2964,7 @@ class Transformer(Cell):
2959
2964
  if not lambda_func:
2960
2965
  lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
2961
2966
  _check_moe_config(moe_config, parallel_config)
2962
- self.use_moe = (moe_config.expert_num > 1)
2967
+ self.use_moe = moe_config.expert_num > 1
2963
2968
  self.add = P.Add()
2964
2969
  self.aux_loss = Tensor(0.0, mstype.float32)
2965
2970
  if encoder_layers > 0:
@@ -3026,7 +3031,7 @@ class Transformer(Cell):
3026
3031
  if not lambda_func:
3027
3032
  lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
3028
3033
  _check_moe_config(moe_config, parallel_config)
3029
- self.use_moe = (moe_config.expert_num > 1)
3034
+ self.use_moe = moe_config.expert_num > 1
3030
3035
  self.add = P.Add().shard(((), ()))
3031
3036
  self.aux_loss = Tensor(0.0, mstype.float32)
3032
3037
  if encoder_layers > 0:
@@ -21,7 +21,7 @@ import mindspore as ms
21
21
  from mindspore import context, log as logger
22
22
  from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
23
23
  from mindspore.common.tensor import Tensor
24
- from mindspore.common.dtype import dtype_to_nptype
24
+ from mindspore.common.dtype import _dtype_to_nptype
25
25
  from mindspore.common import dtype as mstype
26
26
  from mindspore.communication.management import get_group_size, get_rank
27
27
  from mindspore.communication._comm_helper import _is_initialized
@@ -156,7 +156,7 @@ def _is_in_auto_parallel_mode():
156
156
 
157
157
 
158
158
  def _is_parallel_mode():
159
- if not _is_initialized() or context.get_context('mode') == context.PYNATIVE_MODE:
159
+ if not _is_initialized():
160
160
  return False
161
161
  if os.getenv("RUN_MODE") != "predict":
162
162
  return False
@@ -173,12 +173,6 @@ def _is_in_hybrid_parallel_mode():
173
173
  return _get_parallel_mode() == ms.ParallelMode.HYBRID_PARALLEL
174
174
 
175
175
 
176
- def _is_pynative_parallel():
177
- parallel_mode = context.get_auto_parallel_context('parallel_mode')
178
- return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
179
- context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
180
-
181
-
182
176
  def _get_full_batch():
183
177
  """Get whether to use full_batch."""
184
178
  return auto_parallel_context().get_full_batch()
@@ -452,7 +446,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
452
446
  batchsize_per_device = item
453
447
  else:
454
448
  new_shape += (item,)
455
- new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
449
+ new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_)) # pylint:disable=protected-access
456
450
  start = stage_rank * batchsize_per_device
457
451
  new_tensor_numpy[start: start + batchsize_per_device] = data.asnumpy()
458
452
  else:
@@ -466,7 +460,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
466
460
  end = (stage_rank % dataset_strategy[index][i] + 1) * item
467
461
  s = slice(start, end, 1)
468
462
  slice_index += (s,)
469
- new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
463
+ new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_)) # pylint:disable=protected-access
470
464
  new_tensor_numpy[slice_index] = data.asnumpy()
471
465
  new_tensor = Tensor(new_tensor_numpy, dtype=type_)
472
466
  lst.append(new_tensor)
@@ -773,7 +767,7 @@ def _grads_divided_by_device_num_if_recomputation(grads):
773
767
  """
774
768
  If in pynative parallel and full_batch is True, divide grads by device num to ensure that the gradients is correct.
775
769
  """
776
- if not _is_pynative_parallel() or not _get_full_batch():
770
+ if not _get_full_batch():
777
771
  return grads
778
772
 
779
773
  device_num = _get_device_num()
@@ -14,6 +14,7 @@
14
14
  # ============================================================================
15
15
  """Cell of auto parallel"""
16
16
  import os
17
+ from mindspore import jit
17
18
  from mindspore.nn.cell import Cell
18
19
  from mindspore.parallel.shard import Layout
19
20
  from mindspore.communication.management import get_rank, get_group_size
@@ -162,6 +163,10 @@ class AutoParallel(Cell):
162
163
  super(AutoParallel, self).__init__(auto_prefix=False)
163
164
  self.network = network
164
165
 
166
+ if parallel_mode not in ["semi_auto", "sharding_propagation", "recursive_programming"]:
167
+ raise ValueError("the argument 'parallel_mode' must be one of ['semi_auto', 'sharding_propagation'," \
168
+ " 'recursive_programming'], but got the value : {} .".format(parallel_mode))
169
+
165
170
  self._parallel_mode = parallel_mode
166
171
 
167
172
  self._global_rank = get_rank()
@@ -260,8 +265,10 @@ class AutoParallel(Cell):
260
265
  self._save_strategy_file_path = file_path
261
266
 
262
267
  def disable_strategy_file_only_for_trainable_params(self):
263
- """By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
264
- non-trainable parameters as well."""
268
+ """
269
+ By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
270
+ non-trainable parameters as well.
271
+ """
265
272
  self._only_trainable_params = False
266
273
 
267
274
  def save_operator_strategy_file(self, file_path):
@@ -275,7 +282,8 @@ class AutoParallel(Cell):
275
282
  Note:
276
283
  - It only works when `parallel_mode=sharding_propagation`.
277
284
  - When performing distributed training, users can first save the strategy using dryrun on a single device
278
- and then load strategy to perform distributed training.
285
+ and then load strategy to perform distributed training. Note that only the first device of each node will
286
+ save the strategy file, so the simulated rank id specified by Dryrun must be divisible by 8.
279
287
 
280
288
  Args:
281
289
  file_path (str): Path to save parallel strategy json, must be an absolute path.
@@ -410,7 +418,7 @@ class AutoParallel(Cell):
410
418
  raise ValueError("For 'AutoParallel.dataset_strategy', the argument "
411
419
  "'config' must be 'full_batch' or 'data_parallel', but got the value : {}."
412
420
  .format(config))
413
- self._full_batch = (config == "full_batch")
421
+ self._full_batch = config == "full_batch"
414
422
  self._dataset_strategy_config = config
415
423
  return
416
424
  if not isinstance(config, tuple):
@@ -436,9 +444,9 @@ class AutoParallel(Cell):
436
444
  Args:
437
445
  shard_size (int, optional): Set the optimizer weight shard group size if you want to specific the
438
446
  maximum group size across devices when the parallel optimizer is
439
- enabled. The numerical range can be (0, device_num]. Default value
447
+ enabled. The numerical range can be (0, device_num] or -1. Default value
440
448
  is -1, which means the optimizer weight shard group size will
441
- the data parallel group of each parameter. Default ``-1``.
449
+ the data parallel group of each parameter.
442
450
  threshold (int, optional): Set the threshold of parallel optimizer. When parallel optimizer is
443
451
  enabled, parameters with size smaller than this threshold will not be
444
452
  sharded across the devices. Parameter size = shape[0] \* ... \*
@@ -505,17 +513,18 @@ class AutoParallel(Cell):
505
513
  raise ValueError("For 'AutoParallel.pipeline', the argument 'stages' "
506
514
  "must be larger than zero, but got value: {}.".format(stages))
507
515
  if not isinstance(output_broadcast, bool):
508
- raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
516
+ raise TypeError("For 'AutoParallel.pipeline', the argument 'output_broadcast' "
509
517
  "must be bool type, but got the type : {}.".format(type(output_broadcast)))
510
518
  if not isinstance(interleave, bool):
511
- raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
519
+ raise TypeError("For 'AutoParallel.pipeline', the argument 'interleave' "
512
520
  "must be bool type, but got the type : {}.".format(type(interleave)))
513
521
  if not isinstance(scheduler, str):
514
- raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
522
+ raise TypeError("For 'AutoParallel.pipeline', the argument 'scheduler' "
515
523
  "must be str type, but got the type : {}.".format(type(scheduler)))
516
- if scheduler not in ("1f1b", "gpipe"):
524
+ if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp", "zero_bubble_v"):
517
525
  raise ValueError("For 'AutoParallel.pipeline', the argument "
518
- "'scheduler' must be '1f1b' , 'gpipe' , but got the value : {}."
526
+ "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp'/'zero_bubble_v' ," \
527
+ " but got the value : {}."
519
528
  .format(scheduler))
520
529
  self._pipeline_stages = stages
521
530
  self._pipeline_result_broadcast = output_broadcast
@@ -658,8 +667,11 @@ class AutoParallel(Cell):
658
667
  - recomputation_communication_overlap (bool): Enable overlap between recompute ops and communication ops
659
668
  if True.
660
669
  Default: False.
661
- - grad_matmul_communication_overlap (bool): Enable overlap between dw matmul and
662
- tensor parallel communication ops if True. Default: False.
670
+ - grad_matmul_communication_overlap (bool, str): When set to ``True``, it indicates that overlap
671
+ between dw matmul and tensor parallel communication is enabled. When set to ``False``, it indicates
672
+ that this feature is disabled. When set to str, it only optimizes the specified communication
673
+ operator types, with operators separated by ``,``. For example, "AlltoAll,AlltoAllV" indicates that
674
+ only ``AlltoAll`` and ``AlltoAllV`` are optimized. Default: ``False``.
663
675
  - grad_fa_allgather_overlap (bool): Enable overlap between duplicated allgather by recomputing
664
676
  in sequence parallel and flashattentionscoregrad ops if True. Default: False.
665
677
  - enable_communication_fusion (bool): Enable communication fusion to optimize the number of
@@ -674,7 +686,9 @@ class AutoParallel(Cell):
674
686
  and optimizer parallel allgather communication if True. Currently, do not support
675
687
  `O2 <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.JitConfig.html>`_
676
688
  Default: False.
677
- - computation_communication_fusion_level (int): Enable the fusion between compute and communicate.
689
+ - computation_communication_fusion_level (int): Enable the fusion between compute and communicate,
690
+ which fuses communication tasks and computing tasks, allows for partial pipelining and parallel
691
+ execution of these tasks during operation, thereby enhancing performance.
678
692
  Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
679
693
  This is an experimental configuration, may be changed or canceled in the future.
680
694
 
@@ -685,6 +699,12 @@ class AutoParallel(Cell):
685
699
  - 2: Apply fusion to backward nodes.
686
700
 
687
701
  - 3: Apply fusion to all nodes.
702
+
703
+ .. warning::
704
+ After setting ``export MS_ENABLE_LCCL=on``, the fusion operator based on memory semantics will be
705
+ used. Please note that this operator is still in an experimental stage and may be changed or
706
+ removed in the future.
707
+
688
708
  - dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
689
709
  support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
690
710
 
@@ -728,5 +748,6 @@ class AutoParallel(Cell):
728
748
  self._transformer_opt_config = file_path
729
749
  ctx.ascend_config['parallel_speed_up_json_path'] = file_path
730
750
 
751
+ @jit
731
752
  def construct(self, *args, **kwargs):
732
753
  return self.network(*args, **kwargs)
@@ -15,10 +15,10 @@
15
15
  """Convert distributed checkpoint"""
16
16
  from __future__ import absolute_import
17
17
 
18
- from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, transform_checkpoints
18
+ __all__ = ["rank_list_for_convert", "convert_checkpoint_by_rank", "convert_checkpoints"]
19
19
 
20
- __all__ = ["rank_list_for_convert",
21
- "convert_checkpoint_by_rank", "convert_checkpoints"]
20
+ from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
21
+ transform_checkpoints
22
22
 
23
23
 
24
24
  def rank_list_for_convert(rank_id, src_strategy_file=None, dst_strategy_file=None):