mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0rc1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (403) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +1 -1
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +40 -9
  9. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  10. mindspore/_extends/optimize/cell_utils.py +96 -0
  11. mindspore/_extends/parse/__init__.py +2 -2
  12. mindspore/_extends/parse/compile_config.py +44 -22
  13. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
  14. mindspore/_extends/parse/parser.py +36 -61
  15. mindspore/_extends/parse/resources.py +39 -0
  16. mindspore/_extends/parse/standard_method.py +32 -13
  17. mindspore/_extends/parse/trope.py +8 -1
  18. mindspore/_extends/pijit/__init__.py +1 -2
  19. mindspore/amp.py +4 -4
  20. mindspore/atlprov.dll +0 -0
  21. mindspore/avcodec-59.dll +0 -0
  22. mindspore/avdevice-59.dll +0 -0
  23. mindspore/avfilter-8.dll +0 -0
  24. mindspore/avformat-59.dll +0 -0
  25. mindspore/avutil-57.dll +0 -0
  26. mindspore/boost/adasum.py +1 -1
  27. mindspore/boost/boost_cell_wrapper.py +4 -4
  28. mindspore/c1.dll +0 -0
  29. mindspore/c1xx.dll +0 -0
  30. mindspore/c2.dll +0 -0
  31. mindspore/common/__init__.py +27 -2
  32. mindspore/common/_grad_function.py +2 -1
  33. mindspore/common/_pijit_context.py +28 -7
  34. mindspore/common/_stub_tensor.py +1 -209
  35. mindspore/common/_tensor_cpp_method.py +1 -1
  36. mindspore/common/_tensor_docs.py +76 -15
  37. mindspore/common/api.py +193 -112
  38. mindspore/common/dtype.py +21 -11
  39. mindspore/common/dump.py +10 -15
  40. mindspore/common/generator.py +2 -3
  41. mindspore/common/hook_handle.py +11 -2
  42. mindspore/common/jit_config.py +1 -1
  43. mindspore/common/jit_trace.py +84 -105
  44. mindspore/common/parameter.py +26 -12
  45. mindspore/common/recompute.py +3 -3
  46. mindspore/common/sparse_tensor.py +0 -3
  47. mindspore/common/symbol.py +0 -1
  48. mindspore/common/tensor.py +48 -83
  49. mindspore/communication/_comm_helper.py +46 -4
  50. mindspore/communication/management.py +79 -7
  51. mindspore/context.py +38 -23
  52. mindspore/dataset/core/config.py +3 -3
  53. mindspore/dataset/engine/datasets.py +20 -7
  54. mindspore/dataset/engine/datasets_user_defined.py +32 -2
  55. mindspore/dataset/engine/iterators.py +2 -2
  56. mindspore/dataset/engine/obs/config_loader.py +2 -2
  57. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  58. mindspore/dataset/transforms/py_transforms.py +7 -3
  59. mindspore/dataset/transforms/transforms.py +7 -3
  60. mindspore/dataset/vision/validators.py +1 -0
  61. mindspore/device_context/ascend/device.py +1 -1
  62. mindspore/device_context/gpu/__init__.py +2 -2
  63. mindspore/device_context/gpu/device.py +1 -1
  64. mindspore/device_context/gpu/op_precision.py +4 -2
  65. mindspore/device_context/gpu/op_tuning.py +6 -3
  66. mindspore/device_manager.py +16 -9
  67. mindspore/dnnl.dll +0 -0
  68. mindspore/dpcmi.dll +0 -0
  69. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
  70. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  71. mindspore/experimental/optim/adadelta.py +13 -20
  72. mindspore/experimental/optim/adagrad.py +15 -22
  73. mindspore/experimental/optim/adam.py +17 -24
  74. mindspore/experimental/optim/adamax.py +14 -22
  75. mindspore/experimental/optim/adamw.py +28 -34
  76. mindspore/experimental/optim/asgd.py +15 -25
  77. mindspore/experimental/optim/lr_scheduler.py +27 -45
  78. mindspore/experimental/optim/nadam.py +14 -24
  79. mindspore/experimental/optim/optimizer.py +13 -23
  80. mindspore/experimental/optim/radam.py +18 -24
  81. mindspore/experimental/optim/rmsprop.py +14 -25
  82. mindspore/experimental/optim/rprop.py +15 -26
  83. mindspore/experimental/optim/sgd.py +9 -19
  84. mindspore/hal/__init__.py +4 -4
  85. mindspore/hal/contiguous_tensors_handle.py +2 -2
  86. mindspore/hal/memory.py +1 -0
  87. mindspore/include/api/cell.h +37 -1
  88. mindspore/include/api/delegate.h +10 -0
  89. mindspore/include/api/model.h +3 -0
  90. mindspore/include/api/types.h +2 -2
  91. mindspore/include/c_api/model_c.h +0 -58
  92. mindspore/include/c_api/tensor_c.h +0 -26
  93. mindspore/include/dataset/vision_ascend.h +1 -1
  94. mindspore/jpeg62.dll +0 -0
  95. mindspore/mindrecord/tools/cifar10.py +60 -11
  96. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  97. mindspore/mindspore_backend_common.dll +0 -0
  98. mindspore/mindspore_backend_manager.dll +0 -0
  99. mindspore/mindspore_common.dll +0 -0
  100. mindspore/mindspore_core.dll +0 -0
  101. mindspore/mindspore_cpu_res_manager.dll +0 -0
  102. mindspore/mindspore_dump.dll +0 -0
  103. mindspore/mindspore_frontend.dll +0 -0
  104. mindspore/mindspore_glog.dll +0 -0
  105. mindspore/mindspore_memory_pool.dll +0 -0
  106. mindspore/mindspore_ms_backend.dll +0 -0
  107. mindspore/mindspore_ops.dll +0 -0
  108. mindspore/mindspore_ops_host.dll +0 -0
  109. mindspore/mindspore_ops_kernel_common.dll +0 -0
  110. mindspore/mindspore_profiler.dll +0 -0
  111. mindspore/mindspore_pyboost.dll +0 -0
  112. mindspore/mindspore_pynative.dll +0 -0
  113. mindspore/mindspore_res_manager.dll +0 -0
  114. mindspore/mindspore_runtime_pipeline.dll +0 -0
  115. mindspore/mint/__init__.py +4 -44
  116. mindspore/mint/distributed/__init__.py +1 -0
  117. mindspore/mint/distributed/distributed.py +208 -5
  118. mindspore/mint/nn/__init__.py +1 -1
  119. mindspore/mint/nn/functional.py +53 -6
  120. mindspore/mint/nn/layer/_functions.py +164 -294
  121. mindspore/mint/nn/layer/activation.py +8 -6
  122. mindspore/mint/nn/layer/conv.py +122 -98
  123. mindspore/mint/nn/layer/normalization.py +8 -22
  124. mindspore/mint/optim/adam.py +19 -18
  125. mindspore/mint/optim/adamw.py +14 -8
  126. mindspore/mint/optim/sgd.py +5 -5
  127. mindspore/msobj140.dll +0 -0
  128. mindspore/mspdb140.dll +0 -0
  129. mindspore/mspdbcore.dll +0 -0
  130. mindspore/mspdbst.dll +0 -0
  131. mindspore/mspft140.dll +0 -0
  132. mindspore/msvcdis140.dll +0 -0
  133. mindspore/msvcp140_1.dll +0 -0
  134. mindspore/msvcp140_2.dll +0 -0
  135. mindspore/msvcp140_atomic_wait.dll +0 -0
  136. mindspore/msvcp140_codecvt_ids.dll +0 -0
  137. mindspore/nn/cell.py +325 -499
  138. mindspore/nn/grad/cell_grad.py +11 -12
  139. mindspore/nn/layer/activation.py +32 -34
  140. mindspore/nn/layer/basic.py +67 -64
  141. mindspore/nn/layer/channel_shuffle.py +4 -4
  142. mindspore/nn/layer/combined.py +4 -2
  143. mindspore/nn/layer/conv.py +86 -85
  144. mindspore/nn/layer/dense.py +9 -7
  145. mindspore/nn/layer/embedding.py +50 -52
  146. mindspore/nn/layer/image.py +37 -39
  147. mindspore/nn/layer/math.py +111 -112
  148. mindspore/nn/layer/normalization.py +56 -44
  149. mindspore/nn/layer/pooling.py +58 -63
  150. mindspore/nn/layer/rnn_cells.py +33 -33
  151. mindspore/nn/layer/rnns.py +56 -56
  152. mindspore/nn/layer/thor_layer.py +74 -73
  153. mindspore/nn/layer/transformer.py +11 -1
  154. mindspore/nn/learning_rate_schedule.py +20 -20
  155. mindspore/nn/loss/loss.py +79 -81
  156. mindspore/nn/optim/adam.py +1 -1
  157. mindspore/nn/optim/adasum.py +2 -2
  158. mindspore/nn/optim/optimizer.py +1 -1
  159. mindspore/nn/optim/thor.py +2 -2
  160. mindspore/nn/probability/distribution/exponential.py +2 -1
  161. mindspore/nn/probability/distribution/poisson.py +2 -1
  162. mindspore/nn/sparse/sparse.py +3 -3
  163. mindspore/nn/wrap/cell_wrapper.py +34 -37
  164. mindspore/nn/wrap/grad_reducer.py +37 -37
  165. mindspore/nn/wrap/loss_scale.py +72 -74
  166. mindspore/numpy/array_creations.py +5 -5
  167. mindspore/numpy/fft.py +1 -1
  168. mindspore/numpy/math_ops.py +1 -1
  169. mindspore/opencv_core452.dll +0 -0
  170. mindspore/opencv_imgcodecs452.dll +0 -0
  171. mindspore/opencv_imgproc452.dll +0 -0
  172. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  173. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  174. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  175. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  176. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
  177. mindspore/ops/auto_generate/gen_extend_func.py +1 -51
  178. mindspore/ops/auto_generate/gen_ops_def.py +463 -257
  179. mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
  180. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  181. mindspore/ops/composite/__init__.py +10 -0
  182. mindspore/ops/composite/base.py +8 -4
  183. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  184. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  185. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  186. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  187. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  188. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  189. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  190. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  191. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  192. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  193. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  194. mindspore/ops/function/__init__.py +3 -1
  195. mindspore/ops/function/_add_attr_func.py +11 -6
  196. mindspore/ops/function/array_func.py +7 -94
  197. mindspore/ops/function/debug_func.py +4 -3
  198. mindspore/ops/function/grad/grad_func.py +1 -1
  199. mindspore/ops/function/math_func.py +21 -367
  200. mindspore/ops/function/nn_func.py +26 -41
  201. mindspore/ops/function/other_func.py +4 -1
  202. mindspore/ops/function/random_func.py +31 -4
  203. mindspore/ops/functional.py +0 -2
  204. mindspore/ops/functional_overload.py +463 -6
  205. mindspore/ops/op_info_register.py +21 -0
  206. mindspore/ops/operations/__init__.py +5 -2
  207. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  208. mindspore/ops/operations/_inner_ops.py +3 -6
  209. mindspore/ops/operations/_sequence_ops.py +1 -1
  210. mindspore/ops/operations/comm_ops.py +185 -26
  211. mindspore/ops/operations/custom_ops.py +235 -172
  212. mindspore/ops/operations/debug_ops.py +55 -4
  213. mindspore/ops/operations/image_ops.py +13 -13
  214. mindspore/ops/operations/manually_defined/ops_def.py +15 -16
  215. mindspore/ops/operations/math_ops.py +3 -4
  216. mindspore/ops/operations/nn_ops.py +5 -6
  217. mindspore/ops/primitive.py +6 -10
  218. mindspore/ops/tensor_method.py +36 -4
  219. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  220. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  221. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  222. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  223. mindspore/ops_generate/common/base_generator.py +14 -0
  224. mindspore/ops_generate/common/gen_constants.py +7 -2
  225. mindspore/ops_generate/common/gen_utils.py +0 -19
  226. mindspore/ops_generate/common/op_proto.py +11 -4
  227. mindspore/ops_generate/common/template.py +88 -11
  228. mindspore/ops_generate/gen_ops.py +1 -1
  229. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  230. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  231. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  232. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  233. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  234. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  235. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
  236. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  237. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  238. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  239. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  240. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  241. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  242. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  243. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  244. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  245. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  246. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  247. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  248. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  249. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  250. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  251. mindspore/parallel/_auto_parallel_context.py +4 -2
  252. mindspore/parallel/_cell_wrapper.py +106 -40
  253. mindspore/parallel/_parallel_serialization.py +1 -1
  254. mindspore/parallel/_ps_context.py +4 -6
  255. mindspore/parallel/_tensor.py +167 -12
  256. mindspore/parallel/_transformer/moe.py +1 -1
  257. mindspore/parallel/_transformer/transformer.py +13 -8
  258. mindspore/parallel/auto_parallel.py +12 -5
  259. mindspore/parallel/checkpoint_convert.py +3 -3
  260. mindspore/parallel/checkpoint_transform.py +3 -1
  261. mindspore/parallel/cluster/process_entity/_api.py +84 -48
  262. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  263. mindspore/parallel/cluster/run.py +43 -4
  264. mindspore/parallel/function/__init__.py +8 -1
  265. mindspore/parallel/function/reshard_func.py +1 -1
  266. mindspore/parallel/nn/__init__.py +15 -2
  267. mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
  268. mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
  269. mindspore/parallel/shard.py +2 -2
  270. mindspore/parallel/transform_safetensors.py +462 -174
  271. mindspore/pgodb140.dll +0 -0
  272. mindspore/pgort140.dll +0 -0
  273. mindspore/profiler/__init__.py +2 -1
  274. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  275. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  276. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  277. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  278. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  279. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  280. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  281. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  282. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  283. mindspore/profiler/analysis/task_manager.py +1 -1
  284. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  285. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  286. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
  287. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  288. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  289. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  290. mindspore/profiler/common/constant.py +16 -0
  291. mindspore/profiler/common/profiler_context.py +25 -27
  292. mindspore/profiler/common/profiler_info.py +0 -16
  293. mindspore/profiler/common/profiler_op_analyse.py +235 -0
  294. mindspore/profiler/common/profiler_output_path.py +23 -8
  295. mindspore/profiler/common/profiler_parameters.py +128 -35
  296. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  297. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  298. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  299. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  300. mindspore/profiler/dynamic_profiler.py +305 -314
  301. mindspore/profiler/envprofiler.py +12 -7
  302. mindspore/profiler/experimental_config.py +96 -6
  303. mindspore/profiler/mstx.py +33 -12
  304. mindspore/profiler/platform/__init__.py +2 -3
  305. mindspore/profiler/platform/npu_profiler.py +29 -19
  306. mindspore/profiler/profiler.py +35 -19
  307. mindspore/profiler/profiler_action_controller.py +64 -76
  308. mindspore/profiler/schedule.py +10 -4
  309. mindspore/rewrite/common/config.py +1 -0
  310. mindspore/rewrite/common/namer.py +1 -0
  311. mindspore/rewrite/common/namespace.py +1 -0
  312. mindspore/rewrite/node/node.py +31 -11
  313. mindspore/rewrite/parsers/assign_parser.py +1 -1
  314. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  315. mindspore/run_check/_check_version.py +7 -10
  316. mindspore/runtime/__init__.py +5 -5
  317. mindspore/runtime/event.py +10 -4
  318. mindspore/runtime/executor.py +60 -45
  319. mindspore/runtime/memory.py +21 -30
  320. mindspore/runtime/thread_bind_core.py +298 -164
  321. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  322. mindspore/swresample-4.dll +0 -0
  323. mindspore/swscale-6.dll +0 -0
  324. mindspore/tbbmalloc.dll +0 -0
  325. mindspore/tinyxml2.dll +0 -0
  326. mindspore/train/_utils.py +6 -2
  327. mindspore/train/amp.py +43 -20
  328. mindspore/train/callback/__init__.py +5 -5
  329. mindspore/train/callback/_checkpoint.py +3 -6
  330. mindspore/train/callback/_flops_collector.py +1 -1
  331. mindspore/train/callback/_landscape.py +0 -1
  332. mindspore/train/callback/_train_fault_tolerance.py +71 -13
  333. mindspore/train/data_sink.py +11 -2
  334. mindspore/train/dataset_helper.py +9 -0
  335. mindspore/train/model.py +51 -33
  336. mindspore/train/serialization.py +133 -111
  337. mindspore/train/summary/summary_record.py +13 -2
  338. mindspore/turbojpeg.dll +0 -0
  339. mindspore/utils/__init__.py +3 -2
  340. mindspore/utils/dryrun.py +0 -6
  341. mindspore/utils/runtime_execution_order_check.py +162 -78
  342. mindspore/utils/sdc_detect.py +68 -0
  343. mindspore/utils/utils.py +6 -9
  344. mindspore/vcmeta.dll +0 -0
  345. mindspore/vcruntime140.dll +0 -0
  346. mindspore/vcruntime140_1.dll +0 -0
  347. mindspore/version.py +1 -1
  348. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
  349. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +352 -390
  350. mindspore/_deprecated/jit.py +0 -198
  351. mindspore/experimental/es/__init__.py +0 -22
  352. mindspore/experimental/es/embedding_service.py +0 -891
  353. mindspore/experimental/es/embedding_service_layer.py +0 -581
  354. mindspore/profiler/parser/__init__.py +0 -14
  355. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  356. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  357. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  358. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  359. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  360. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  361. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  362. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  363. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  364. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  365. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  366. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  367. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  368. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  369. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  370. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  371. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  372. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  373. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  374. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  375. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  376. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  377. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  378. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  379. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  380. mindspore/profiler/parser/container.py +0 -229
  381. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  382. mindspore/profiler/parser/flops_parser.py +0 -531
  383. mindspore/profiler/parser/framework_enum.py +0 -111
  384. mindspore/profiler/parser/framework_parser.py +0 -464
  385. mindspore/profiler/parser/framework_struct.py +0 -61
  386. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  387. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  388. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  389. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  390. mindspore/profiler/parser/hccl_parser.py +0 -573
  391. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  392. mindspore/profiler/parser/integrator.py +0 -526
  393. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  394. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  395. mindspore/profiler/parser/minddata_parser.py +0 -186
  396. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  397. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  398. mindspore/profiler/parser/optime_parser.py +0 -250
  399. mindspore/profiler/parser/profiler_info.py +0 -213
  400. mindspore/profiler/parser/step_trace_parser.py +0 -666
  401. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
  402. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  403. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -21,9 +21,11 @@ import numpy as np
21
21
  from mindspore.common.tensor import Tensor
22
22
  from mindspore.communication.management import get_rank, get_group_size
23
23
  from mindspore._c_expression import TensorTransform
24
+ from mindspore import log as logger
24
25
 
25
26
  _tensor_transform = TensorTransform.get_instance()
26
-
27
+ COMM_TENSOR_CELL_CACHE = {}
28
+ RESHARD_OP_MAP_CACHE = {}
27
29
 
28
30
  def _get_tensor_strategy(dev_mat, tensor_map):
29
31
  """
@@ -348,7 +350,7 @@ def _extract_layout_item(layout_item):
348
350
  return dev_matrix, tensor_map, opt_shard_step, opt_shard_size
349
351
 
350
352
 
351
- def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
353
+ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id, enable_redist_opt=False):
352
354
  """
353
355
  Transform tensor from source layout to the destination layout.
354
356
 
@@ -362,7 +364,7 @@ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
362
364
  """
363
365
  if not isinstance(from_layout, tuple) or not isinstance(to_layout, tuple):
364
366
  raise TypeError("The layout should be tuple! layout is {} and {}".format(from_layout, to_layout))
365
- return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, rank_id)
367
+ return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, enable_redist_opt, rank_id)
366
368
 
367
369
 
368
370
  def _construct_from_to_tensor_layout(from_full_tensor_shape, from_dev_matrix,
@@ -587,13 +589,15 @@ def _get_needed_rank_list_by_layouts(from_tensor_layout, to_tensor_layout, devic
587
589
  return result_list
588
590
 
589
591
 
590
- def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank):
592
+ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank,
593
+ enable_redist_opt=False):
591
594
  """
592
595
  AllGather op: {op_name, group_ranks + axis}
593
596
  """
594
597
  stack = []
595
598
  index = 0
596
- transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank)
599
+ transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank,
600
+ enable_redist_opt)
597
601
  result_map = {self_rank: transform_operators}
598
602
  for operators in transform_operators:
599
603
  op_name = operators[0]
@@ -606,7 +610,7 @@ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_te
606
610
  for rank in group_info[1]:
607
611
  if rank not in result_map:
608
612
  new_transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout,
609
- device_list, rank)
613
+ device_list, rank, enable_redist_opt)
610
614
  result_map[rank] = new_transform_operators
611
615
  index = 0
612
616
  for operators in new_transform_operators:
@@ -710,8 +714,6 @@ def _apply_operator(operator_name):
710
714
  Returns:
711
715
  The data of tensor after apply operator.
712
716
  """
713
- if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
714
- numpy_data = numpy_data[:]
715
717
  if not isinstance(numpy_data, np.ndarray):
716
718
  raise TypeError("The data should be a numpy.ndarray.")
717
719
  _check_operator(reshape_op)
@@ -732,10 +734,7 @@ def _apply_operator(operator_name):
732
734
  raise TypeError("The data_list should be a list.")
733
735
  new_numpy_data_list = []
734
736
  for numpy_data in numpy_data_list:
735
- if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
736
- new_numpy_data_list.append(numpy_data[:])
737
- else:
738
- new_numpy_data_list.append(numpy_data)
737
+ new_numpy_data_list.append(numpy_data)
739
738
  numpy_data_list = new_numpy_data_list
740
739
  _check_operator(allgather_op)
741
740
  concat_group = allgather_op[1][:-1]
@@ -896,3 +895,159 @@ def _chunk_shape(np_tensor, strategy, depth):
896
895
  output.extend(
897
896
  _chunk_shape(ret_, strategy[len(strategy) - depth + 1:len(strategy)], depth - 1))
898
897
  return output
898
+
899
+
900
+ def _infer_pp_op_map(from_layout, to_layout, self_rank):
901
+ """
902
+ get the ops map for merging pp stages
903
+ """
904
+ from_rank_list = from_layout[3]
905
+ to_rank_list = to_layout[3]
906
+ from_dev_num_in_stage = len(from_rank_list)
907
+ current_rank_stage_id = self_rank // from_dev_num_in_stage
908
+ diff_rank_id = [
909
+ rank_id for rank_id in to_rank_list if rank_id not in from_rank_list]
910
+ end_stage = from_dev_num_in_stage * (current_rank_stage_id + 1)
911
+ start_stage = from_dev_num_in_stage * current_rank_stage_id
912
+ rank_pos_in_stage = list(range(start_stage, end_stage)).index(self_rank)
913
+ root_idx = from_rank_list[rank_pos_in_stage]
914
+ broadcast_rank_list = [root_idx]
915
+ while rank_pos_in_stage < len(diff_rank_id):
916
+ broadcast_rank_list.append(diff_rank_id[rank_pos_in_stage])
917
+ rank_pos_in_stage += from_dev_num_in_stage
918
+ broadcast_rank_list.sort()
919
+ broadcast_map = {rank_id: [('Broadcast', root_idx, broadcast_rank_list)] for rank_id in broadcast_rank_list}
920
+ return broadcast_map
921
+
922
+
923
+ def _get_pipeline_operator_map(from_layout, to_layout, self_rank):
924
+ """
925
+ If src_pp_stages is greater than dst_pp_stages, the weights of the corresponding cards need to
926
+ be communicated via broadcast to swap. Need to communicate src rank0's 01 to src rank2,
927
+ so that rank2 holds param0's data. Similarly, communicate rank1's 02 to rank3
928
+ rank0 01 01 11
929
+ rank1 02 02 12
930
+ pp2 -------> pp1
931
+ rank2 11 03 13
932
+ rank3 12 04 14
933
+
934
+ Args:
935
+ from_layout (tuple): Use tuple to present layout
936
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
937
+ to_layout (tuple): Use tuple to present layout
938
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
939
+ self_rank (int): rank_id
940
+ """
941
+ if len(from_layout[3]) < len(to_layout[3]):
942
+ logger.debug(f"from {from_layout} to {to_layout} need to broadcast data across pp stages")
943
+ comm_tensor_cache_key = (
944
+ f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
945
+ f" -> "
946
+ f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
947
+ global COMM_TENSOR_CELL_CACHE
948
+ if comm_tensor_cache_key not in COMM_TENSOR_CELL_CACHE:
949
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, not match cache")
950
+ broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
951
+ broadcast_op_map_dict = {rank_id: broadcast_map for rank_id in broadcast_map.keys()}
952
+ COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key] = broadcast_op_map_dict
953
+ else:
954
+ comm_tensor_cache_key_rank_list = COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key]
955
+ if self_rank in comm_tensor_cache_key_rank_list:
956
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, match cache")
957
+ broadcast_map = comm_tensor_cache_key_rank_list[self_rank]
958
+ else:
959
+ logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, but rank {self_rank} not match cache")
960
+ broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
961
+ for rank_id in broadcast_map.keys():
962
+ COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key][rank_id] = broadcast_map
963
+ return broadcast_map
964
+ logger.debug(f"from {from_layout} to {to_layout} no need to broadcast data across pp stages")
965
+ return {}
966
+
967
+
968
+ def _is_multi_shard(in_tensor_map):
969
+ """
970
+ whether the input tensor map is in multi shard
971
+ """
972
+ for tensor_map in in_tensor_map:
973
+ if isinstance(tensor_map, (list, tuple)) and len(tensor_map) > 1:
974
+ return True
975
+ return False
976
+
977
+
978
+ def _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
979
+ insert_from_reshape, insert_to_reshape):
980
+ """ insert layout expand op reshape """
981
+ from_dev_matrix = from_info_tuple[0]
982
+ from_tensor_map = from_info_tuple[1]
983
+ from_full_tensor_shape = from_info_tuple[2]
984
+ to_dev_matrix_origin = to_info_tuple[0]
985
+ to_tensor_map_origin = to_info_tuple[1]
986
+ origin_tensor_shape = to_info_tuple[2]
987
+ for param_rank, _ in param_rank_map.items():
988
+ if insert_from_reshape:
989
+ from_slice_tensor_shape = ()
990
+ from_tensor_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
991
+ for i, item in enumerate(from_full_tensor_shape):
992
+ from_slice_tensor_shape += (item // from_tensor_strategy[i],)
993
+ param_rank_map.get(param_rank).insert(0, ('Reshape', list(from_slice_tensor_shape)))
994
+ if insert_to_reshape:
995
+ to_tensor_strategy = _get_tensor_strategy(to_dev_matrix_origin, to_tensor_map_origin)
996
+ to_slice_tensor_shape = ()
997
+ for i, item in enumerate(origin_tensor_shape):
998
+ to_slice_tensor_shape += (item // to_tensor_strategy[i],)
999
+ param_rank_map.get(param_rank).append(('Reshape', list(to_slice_tensor_shape)))
1000
+
1001
+
1002
+ def _infer_reshard_op_map(from_layout, to_layout, self_rank):
1003
+ """infer reshard op map"""
1004
+ from_layout_without_rank_list = from_layout[:-1]
1005
+ to_layout_without_rank_list = to_layout[:-1]
1006
+ if _is_multi_shard(from_layout[1]):
1007
+ # ((2, 1), 1) --> (2, 1, 1) expand tensormap
1008
+ new_layout = _expand_layout(from_layout[0], from_layout[1], from_layout[2])
1009
+ from_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
1010
+ if _is_multi_shard(to_layout[1]):
1011
+ new_layout = _expand_layout(to_layout[0], to_layout[1], to_layout[2])
1012
+ to_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
1013
+ operator_map = _get_needed_rank_transform_operator_map_by_layouts(from_layout_without_rank_list,
1014
+ to_layout_without_rank_list,
1015
+ from_layout[3], self_rank,
1016
+ True)
1017
+ new_to_layout_info = to_layout[:-1]
1018
+ _insert_expand_layout_reshape(operator_map, from_layout_without_rank_list, new_to_layout_info,
1019
+ _is_multi_shard(from_layout[1]), _is_multi_shard(to_layout[1]))
1020
+ return operator_map
1021
+
1022
+
1023
+ def _get_resharding_operator_map(from_layout, to_layout, self_rank):
1024
+ """
1025
+ Args:
1026
+ from_layout (tuple): Use tuple to present layout
1027
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
1028
+ to_layout (tuple): Use tuple to present layout
1029
+ (device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
1030
+ self_rank (int): rank_id
1031
+ """
1032
+ reshard_op_cache_key = (
1033
+ f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
1034
+ f" -> "
1035
+ f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
1036
+ global RESHARD_OP_MAP_CACHE
1037
+ if reshard_op_cache_key not in RESHARD_OP_MAP_CACHE:
1038
+ operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
1039
+ op_map_dict = {rank_id: operator_map for rank_id in operator_map}
1040
+ RESHARD_OP_MAP_CACHE[reshard_op_cache_key] = op_map_dict
1041
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, not match cache")
1042
+ else:
1043
+ cache_rank_list_dict = RESHARD_OP_MAP_CACHE[reshard_op_cache_key]
1044
+ if self_rank in cache_rank_list_dict:
1045
+ operator_map = cache_rank_list_dict[self_rank]
1046
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, match cache")
1047
+ else:
1048
+ logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, "
1049
+ f"but rank {self_rank} is not match cache")
1050
+ operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
1051
+ for rank_id in operator_map:
1052
+ RESHARD_OP_MAP_CACHE[reshard_op_cache_key][rank_id] = operator_map
1053
+ return operator_map
@@ -111,7 +111,7 @@ def _check_moe_config(moe_config=None, parallel_config=None):
111
111
  """
112
112
  if not isinstance(moe_config, MoEConfig):
113
113
  raise TypeError(f"'moe_config' must be an instance of MoEConfig, but got {type(moe_config).__name__}.")
114
- use_moe = (moe_config.expert_num > 1)
114
+ use_moe = moe_config.expert_num > 1
115
115
  if use_moe is False:
116
116
  return
117
117
  if moe_config.expert_num % parallel_config.expert_parallel != 0:
@@ -1365,7 +1365,12 @@ class MultiHeadAttention(Cell):
1365
1365
  class TransformerEncoderLayer(Cell):
1366
1366
  r"""
1367
1367
  Transformer Encoder Layer. This is an implementation of the single layer of the transformer
1368
- encoder layer, including multihead attention and feedward layer.
1368
+ encoder layer, mainly including Multi-Head Attention, Feed Forward, Add and LayerNorm layer.
1369
+
1370
+ The TransformerEncoderLayer structure is shown in the following figure:
1371
+
1372
+ .. image:: ../images/TransformerEncoderLayer.png
1373
+ :align: center
1369
1374
 
1370
1375
  Args:
1371
1376
  batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
@@ -1532,7 +1537,7 @@ class TransformerEncoderLayer(Cell):
1532
1537
  "and parallel_config. model_parallel is {}."
1533
1538
  .format(ffn_hidden_size, parallel_config.model_parallel))
1534
1539
  _check_moe_config(moe_config, parallel_config)
1535
- self.use_moe = (moe_config.expert_num > 1)
1540
+ self.use_moe = moe_config.expert_num > 1
1536
1541
  self.use_past = use_past
1537
1542
  self.seq_length = seq_length
1538
1543
  self.hidden_size = hidden_size
@@ -1607,7 +1612,7 @@ class TransformerEncoderLayer(Cell):
1607
1612
  "and parallel_config. model_parallel is {}."
1608
1613
  .format(ffn_hidden_size, parallel_config.model_parallel))
1609
1614
  _check_moe_config(moe_config, parallel_config)
1610
- self.use_moe = (moe_config.expert_num > 1)
1615
+ self.use_moe = moe_config.expert_num > 1
1611
1616
  self.use_past = use_past
1612
1617
  self.seq_length = seq_length
1613
1618
  self.hidden_size = hidden_size
@@ -1902,7 +1907,7 @@ class TransformerDecoderLayer(Cell):
1902
1907
  parallel_config=default_dpmp_config):
1903
1908
  super(TransformerDecoderLayer, self).__init__()
1904
1909
  _check_moe_config(moe_config, parallel_config)
1905
- self.use_moe = (moe_config.expert_num > 1)
1910
+ self.use_moe = moe_config.expert_num > 1
1906
1911
  config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
1907
1912
  if batch_size or use_past:
1908
1913
  Validator.check_positive_int(batch_size)
@@ -2440,7 +2445,7 @@ class TransformerEncoder(Cell):
2440
2445
  super(TransformerEncoder, self).__init__()
2441
2446
  _check_config(parallel_config)
2442
2447
  _check_moe_config(moe_config, parallel_config)
2443
- self.use_moe = (moe_config.expert_num > 1)
2448
+ self.use_moe = moe_config.expert_num > 1
2444
2449
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2445
2450
  if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2446
2451
  self.add = P.Add()
@@ -2677,7 +2682,7 @@ class TransformerDecoder(Cell):
2677
2682
  super(TransformerDecoder, self).__init__()
2678
2683
  _check_moe_config(moe_config, parallel_config)
2679
2684
  _check_config(parallel_config)
2680
- self.use_moe = (moe_config.expert_num > 1)
2685
+ self.use_moe = moe_config.expert_num > 1
2681
2686
  config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
2682
2687
  if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
2683
2688
  self.add = P.Add()
@@ -2959,7 +2964,7 @@ class Transformer(Cell):
2959
2964
  if not lambda_func:
2960
2965
  lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
2961
2966
  _check_moe_config(moe_config, parallel_config)
2962
- self.use_moe = (moe_config.expert_num > 1)
2967
+ self.use_moe = moe_config.expert_num > 1
2963
2968
  self.add = P.Add()
2964
2969
  self.aux_loss = Tensor(0.0, mstype.float32)
2965
2970
  if encoder_layers > 0:
@@ -3026,7 +3031,7 @@ class Transformer(Cell):
3026
3031
  if not lambda_func:
3027
3032
  lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
3028
3033
  _check_moe_config(moe_config, parallel_config)
3029
- self.use_moe = (moe_config.expert_num > 1)
3034
+ self.use_moe = moe_config.expert_num > 1
3030
3035
  self.add = P.Add().shard(((), ()))
3031
3036
  self.aux_loss = Tensor(0.0, mstype.float32)
3032
3037
  if encoder_layers > 0:
@@ -162,6 +162,10 @@ class AutoParallel(Cell):
162
162
  super(AutoParallel, self).__init__(auto_prefix=False)
163
163
  self.network = network
164
164
 
165
+ if parallel_mode not in ["semi_auto", "sharding_propagation", "recursive_programming"]:
166
+ raise ValueError("the argument 'parallel_mode' must be one of ['semi_auto', 'sharding_propagation'," \
167
+ " 'recursive_programming'], but got the value : {} .".format(parallel_mode))
168
+
165
169
  self._parallel_mode = parallel_mode
166
170
 
167
171
  self._global_rank = get_rank()
@@ -260,8 +264,10 @@ class AutoParallel(Cell):
260
264
  self._save_strategy_file_path = file_path
261
265
 
262
266
  def disable_strategy_file_only_for_trainable_params(self):
263
- """By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
264
- non-trainable parameters as well."""
267
+ """
268
+ By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
269
+ non-trainable parameters as well.
270
+ """
265
271
  self._only_trainable_params = False
266
272
 
267
273
  def save_operator_strategy_file(self, file_path):
@@ -410,7 +416,7 @@ class AutoParallel(Cell):
410
416
  raise ValueError("For 'AutoParallel.dataset_strategy', the argument "
411
417
  "'config' must be 'full_batch' or 'data_parallel', but got the value : {}."
412
418
  .format(config))
413
- self._full_batch = (config == "full_batch")
419
+ self._full_batch = config == "full_batch"
414
420
  self._dataset_strategy_config = config
415
421
  return
416
422
  if not isinstance(config, tuple):
@@ -513,9 +519,10 @@ class AutoParallel(Cell):
513
519
  if not isinstance(scheduler, str):
514
520
  raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
515
521
  "must be str type, but got the type : {}.".format(type(scheduler)))
516
- if scheduler not in ("1f1b", "gpipe"):
522
+ if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"):
517
523
  raise ValueError("For 'AutoParallel.pipeline', the argument "
518
- "'scheduler' must be '1f1b' , 'gpipe' , but got the value : {}."
524
+ "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp' ," \
525
+ " but got the value : {}."
519
526
  .format(scheduler))
520
527
  self._pipeline_stages = stages
521
528
  self._pipeline_result_broadcast = output_broadcast
@@ -15,10 +15,10 @@
15
15
  """Convert distributed checkpoint"""
16
16
  from __future__ import absolute_import
17
17
 
18
- from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, transform_checkpoints
18
+ __all__ = ["rank_list_for_convert", "convert_checkpoint_by_rank", "convert_checkpoints"]
19
19
 
20
- __all__ = ["rank_list_for_convert",
21
- "convert_checkpoint_by_rank", "convert_checkpoints"]
20
+ from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
21
+ transform_checkpoints
22
22
 
23
23
 
24
24
  def rank_list_for_convert(rank_id, src_strategy_file=None, dst_strategy_file=None):
@@ -582,6 +582,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
582
582
  The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
583
583
  too large, otherwise it may cause freezing.
584
584
 
585
+ This function does not support converting remove_redundancy's checkpoint file.
586
+
585
587
  Args:
586
588
  src_checkpoints_dir (str): The source checkpoints directory.
587
589
  dst_checkpoints_dir (str): The destination checkpoints directory to save the converted checkpoints.
@@ -1187,7 +1189,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
1187
1189
  param_not_in_ckpt = []
1188
1190
  for _, param in network.parameters_and_names():
1189
1191
  sliced_params = []
1190
- if param.name not in rank_list.keys():
1192
+ if param.name not in rank_list:
1191
1193
  param_not_in_strategy.append(param.name)
1192
1194
  continue
1193
1195
  if param.name not in param_total_dict:
@@ -22,7 +22,8 @@ import socket
22
22
  import psutil
23
23
  import mindspore.log as logger
24
24
  from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url, \
25
- _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip
25
+ _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip, _generate_auto_bind_core_strategy, \
26
+ _generate_bind_core_strategy
26
27
 
27
28
 
28
29
  class _Node:
@@ -79,11 +80,12 @@ class _ComputeGraphNode(_Node):
79
80
  Worker node for dynamic networking. Inherits from the Node class.
80
81
  """
81
82
 
82
- def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file,
83
+ def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, node_rank, args_list, output_file,
83
84
  tail_worker_log, join, is_simulation):
84
85
  super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file,
85
86
  tail_worker_log, join, is_simulation)
86
87
  self.node_id = node_id
88
+ self.node_rank = node_rank
87
89
 
88
90
  def run(self):
89
91
  """
@@ -95,6 +97,8 @@ class _ComputeGraphNode(_Node):
95
97
  super().run()
96
98
  if self.node_id is not None:
97
99
  os.environ["MS_NODE_ID"] = str(self.node_id)
100
+ if self.node_rank is not None:
101
+ os.environ["MS_NODE_RANK"] = str(self.node_rank)
98
102
  # If simulation level is set, environment variable 'MS_ROLE' will not be set.
99
103
  if not self.is_simulation:
100
104
  os.environ["MS_ROLE"] = "MS_WORKER"
@@ -119,6 +123,9 @@ class _ComputeGraphNode(_Node):
119
123
  return subprocess.Popen(['/usr/bin/tail', '-f', self.output_file])
120
124
 
121
125
  def enable_tail_worker_log(self):
126
+ """
127
+ Get valid rank ID for tailing the corresponding worker log.
128
+ """
122
129
  tail_worker_log_list = []
123
130
  if self.tail_worker_log != "-1":
124
131
  tail_worker_log_list.extend([int(num) for num in self.tail_worker_log.split(',')])
@@ -169,7 +176,7 @@ class _ProcessManager:
169
176
 
170
177
  self.sim_level = args.sim_level
171
178
  self.sim_rank_id = args.sim_rank_id
172
- self.is_simulation = (self.sim_level != -1)
179
+ self.is_simulation = self.sim_level != -1
173
180
  if self.is_simulation:
174
181
  os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
175
182
  elif os.getenv("MS_SIMULATION_LEVEL"):
@@ -205,15 +212,24 @@ class _ProcessManager:
205
212
  finally:
206
213
  os.umask(origin_mask)
207
214
 
215
+ self.device_to_cpu_map = {}
216
+ if self.bind_core is True:
217
+ self.device_to_cpu_map = _generate_auto_bind_core_strategy(self.local_worker_num)
218
+
208
219
  self.proc_rank_map = {}
209
220
  self.enable_mindx = False
221
+ self._check_taskd()
222
+
223
+ def _check_taskd(self):
224
+ """check if enable taskd."""
210
225
  tft_env = os.getenv("MS_ENABLE_TFT", "")
211
- if ("TTP:1" in tft_env) or ("UCE:1" in tft_env) or ("ARF:1" in tft_env):
226
+ if any(v in tft_env for v in ('TTP:1', 'UCE:1', 'ARF:1', 'TSP:1', 'RSC:1', 'HCCE:1')):
212
227
  try:
213
228
  from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
214
229
  self.msmgr = MSRunPlugin()
215
230
  self.msmgr.register_callbacks("KILL_WORKER", self.kill_workers)
216
231
  self.msmgr.register_callbacks("START_ALL_WORKER", self.start_all_workers)
232
+ self.msmgr.register_callbacks("START_WORKER_LIST", self.start_worker_list)
217
233
  self.msmgr.register_callbacks("MONITOR", self.monitor_rank_status)
218
234
  self.enable_mindx = True
219
235
  os.environ["MS_ENABLE_RECOVERY"] = str(1)
@@ -261,6 +277,45 @@ class _ProcessManager:
261
277
  self.is_simulation)
262
278
  self.msn_process = msn.run()
263
279
 
280
+ def _start_single_worker(self, local_rank):
281
+ """
282
+ Start worker processor
283
+
284
+ Args:
285
+ local_rank: local rank id.
286
+ """
287
+ os.environ["DEVICE_ID"] = str(local_rank)
288
+ node_id, log_name = self._get_node_id_and_log_path(local_rank)
289
+ if node_id is None:
290
+ logger.warning(f"Rank ids will be assigned automatically, "
291
+ "please use 'grep -rn 'rank id:' command to check each worker log's rank id.")
292
+ else:
293
+ # If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
294
+ # This is for rank_table method's compatibility consideration.
295
+ os.environ["RANK_ID"] = str(node_id)
296
+ print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
297
+ f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
298
+ if self.is_simulation and (self.sim_rank_id != -1):
299
+ # Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
300
+ os.environ["RANK_ID"] = str(self.sim_rank_id)
301
+ logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
302
+
303
+ if self.bind_core:
304
+ affinity_cpu_str = _generate_bind_core_strategy(local_rank, self.device_to_cpu_map, self.bind_core)
305
+ if affinity_cpu_str is not None:
306
+ cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, affinity_cpu_str)
307
+ else:
308
+ cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
309
+ else:
310
+ cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
311
+ cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
312
+ node_id, self.node_rank, cmd, log_name, self.tail_worker_log, self.join,
313
+ self.is_simulation)
314
+ process, tail_process = cgn.run()
315
+ self.cgn_processes.append(process)
316
+ self.tail_cgn_processes.append(tail_process)
317
+ self.proc_rank_map[local_rank] = process
318
+
264
319
  def start_workers(self):
265
320
  """
266
321
  Starts the worker nodes.
@@ -275,40 +330,8 @@ class _ProcessManager:
275
330
  "'rank_id' of each process will be assigned after cluster is successfully built.\n"
276
331
  "You can access 'RANK_ID' environment variable after calling "
277
332
  "'mindspore.communication.init()'")
278
-
279
333
  for i in range(self.local_worker_num):
280
- os.environ["DEVICE_ID"] = str(i)
281
- node_id, log_name = self._get_node_id_and_log_path(i)
282
- if node_id is None:
283
- logger.warning(f"Rank ids will be assigned automatically, "
284
- "please use 'grep -rn 'rank id:' command to check each worker log's rank id.")
285
- else:
286
- # If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
287
- # This is for rank_table method's compatibility consideration.
288
- os.environ["RANK_ID"] = str(node_id)
289
- print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
290
- f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
291
- if self.is_simulation and (self.sim_rank_id != -1):
292
- # Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
293
- os.environ["RANK_ID"] = str(self.sim_rank_id)
294
- logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
295
-
296
- if self.bind_core:
297
- cpu_num = subprocess.getoutput("cat /proc/cpuinfo|grep processor|wc -l")
298
- if not cpu_num.isdigit():
299
- raise RuntimeError(f"Got cpu number from '/proc/cpuinfo' is {cpu_num}, failed to bind core.")
300
- avg = int(cpu_num) // self.local_worker_num
301
- cpu_start = avg * i
302
- cpu_end = cpu_start + avg - 1
303
- cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, cpu_start, cpu_end)
304
- else:
305
- cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
306
- cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
307
- node_id, cmd, log_name, self.tail_worker_log, self.join, self.is_simulation)
308
- process, tail_process = cgn.run()
309
- self.cgn_processes.append(process)
310
- self.tail_cgn_processes.append(tail_process)
311
- self.proc_rank_map[i] = process
334
+ self._start_single_worker(i)
312
335
 
313
336
  def join_processes(self):
314
337
  """
@@ -334,7 +357,7 @@ class _ProcessManager:
334
357
  continue
335
358
  elif ret_code != 0:
336
359
  has_exception = True
337
- logger.error(f"Worker process {p.pid} exit with exception.")
360
+ logger.error(f"Worker process {p.pid} exit with exception. Error code: {ret_code}.")
338
361
  break
339
362
  else:
340
363
  success_cgn_processes.add(p)
@@ -420,14 +443,9 @@ class _ProcessManager:
420
443
  Args:
421
444
  NA.
422
445
  """
423
- for p in self.cgn_processes:
424
- if p.poll() is None:
425
- p.kill()
446
+ self.kill_worker_processes()
447
+ self.kill_tail_log_processes()
426
448
  self.cgn_processes.clear()
427
-
428
- for p in self.tail_cgn_processes:
429
- if p is not None:
430
- p.kill()
431
449
  self.tail_cgn_processes.clear()
432
450
 
433
451
  def kill_single_worker(self, pid):
@@ -441,7 +459,7 @@ class _ProcessManager:
441
459
  for i in range(len(self.cgn_processes)):
442
460
  p = self.cgn_processes[i]
443
461
  if p.pid == pid and p.poll() is None:
444
- p.kill()
462
+ os.killpg(os.getpgid(p.pid), signal.SIGKILL)
445
463
  del self.cgn_processes[i]
446
464
  tail_p = self.tail_cgn_processes[i]
447
465
  if tail_p is not None:
@@ -499,7 +517,8 @@ class _ProcessManager:
499
517
  p_status = p.poll()
500
518
  if (not psutil.pid_exists(p.pid)) and (p_status != 0):
501
519
  p_status = 300
502
- return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id}
520
+ return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id, "local_rank": rank_id,
521
+ "node_id": self.node_rank}
503
522
  except KeyError:
504
523
  logger.info(f"Process rank {rank_id} has not been initialized.")
505
524
  return {"pid": None, "status": 200, "global_rank": global_rank_id}
@@ -519,7 +538,24 @@ class _ProcessManager:
519
538
  self.start_workers()
520
539
  worker_status = self.monitor_rank_status([-1])
521
540
  for i in range(self.local_worker_num):
522
- if worker_status[i]["status"] != None: # pylint: disable=singleton-comparison
541
+ if worker_status[i]["status"] is not None:
542
+ return 1
543
+ return 0
544
+
545
+ def start_worker_list(self, rank_ids):
546
+ """
547
+ Start worker processor by rank list.
548
+
549
+ Args:
550
+ rank_ids: worker process's local rank list, which is also device_id.
551
+ """
552
+ if not isinstance(rank_ids, list):
553
+ raise TypeError(f"The type of 'rank_ids' must be a list, but got:{rank_ids}")
554
+ for idx in rank_ids:
555
+ self._start_single_worker(idx)
556
+ worker_status = self.monitor_rank_status(rank_ids)
557
+ for i in rank_ids:
558
+ if worker_status[i]["status"] is not None:
523
559
  return 1
524
560
  return 0
525
561