mindspore 2.4.1__cp311-cp311-win_amd64.whl → 2.5.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (395) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +8 -3
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +0 -5
  9. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  10. mindspore/_extends/parse/compile_config.py +64 -0
  11. mindspore/_extends/parse/deprecated/__init__.py +0 -0
  12. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
  13. mindspore/_extends/parse/parser.py +23 -5
  14. mindspore/_extends/parse/standard_method.py +123 -27
  15. mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
  16. mindspore/amp.py +7 -1
  17. mindspore/atlprov.dll +0 -0
  18. mindspore/avcodec-59.dll +0 -0
  19. mindspore/avdevice-59.dll +0 -0
  20. mindspore/avfilter-8.dll +0 -0
  21. mindspore/avformat-59.dll +0 -0
  22. mindspore/avutil-57.dll +0 -0
  23. mindspore/boost/boost_cell_wrapper.py +136 -41
  24. mindspore/c1.dll +0 -0
  25. mindspore/c1xx.dll +0 -0
  26. mindspore/c2.dll +0 -0
  27. mindspore/common/__init__.py +3 -1
  28. mindspore/common/_register_for_tensor.py +0 -1
  29. mindspore/common/_stub_tensor.py +25 -4
  30. mindspore/common/_tensor_cpp_method.py +17 -0
  31. mindspore/common/_tensor_docs.py +6132 -0
  32. mindspore/common/api.py +99 -25
  33. mindspore/common/dtype.py +34 -34
  34. mindspore/common/dump.py +2 -1
  35. mindspore/common/file_system.py +8 -1
  36. mindspore/common/generator.py +2 -0
  37. mindspore/common/hook_handle.py +3 -1
  38. mindspore/common/initializer.py +3 -4
  39. mindspore/common/lazy_inline.py +8 -2
  40. mindspore/common/mindir_util.py +10 -2
  41. mindspore/common/parameter.py +30 -27
  42. mindspore/common/tensor.py +713 -1337
  43. mindspore/communication/__init__.py +1 -1
  44. mindspore/communication/_comm_helper.py +10 -0
  45. mindspore/communication/comm_func.py +215 -173
  46. mindspore/communication/management.py +23 -20
  47. mindspore/context.py +292 -193
  48. mindspore/dataset/__init__.py +23 -19
  49. mindspore/dataset/callback/ds_callback.py +2 -1
  50. mindspore/dataset/core/config.py +84 -3
  51. mindspore/dataset/engine/cache_admin.py +3 -3
  52. mindspore/dataset/engine/cache_client.py +5 -4
  53. mindspore/dataset/engine/datasets.py +192 -149
  54. mindspore/dataset/engine/datasets_audio.py +14 -0
  55. mindspore/dataset/engine/datasets_standard_format.py +28 -11
  56. mindspore/dataset/engine/datasets_text.py +38 -1
  57. mindspore/dataset/engine/datasets_user_defined.py +125 -65
  58. mindspore/dataset/engine/datasets_vision.py +81 -8
  59. mindspore/dataset/engine/iterators.py +281 -63
  60. mindspore/dataset/engine/obs/util.py +8 -0
  61. mindspore/dataset/engine/queue.py +40 -0
  62. mindspore/dataset/engine/samplers.py +26 -2
  63. mindspore/dataset/engine/serializer_deserializer.py +1 -1
  64. mindspore/dataset/engine/validators.py +43 -11
  65. mindspore/dataset/transforms/py_transforms_util.py +17 -0
  66. mindspore/dataset/transforms/transforms.py +29 -12
  67. mindspore/dataset/vision/validators.py +1 -2
  68. mindspore/device_context/__init__.py +21 -0
  69. mindspore/device_context/ascend/__init__.py +25 -0
  70. mindspore/device_context/ascend/device.py +72 -0
  71. mindspore/device_context/ascend/op_debug.py +94 -0
  72. mindspore/device_context/ascend/op_precision.py +193 -0
  73. mindspore/device_context/ascend/op_tuning.py +127 -0
  74. mindspore/device_context/cpu/__init__.py +25 -0
  75. mindspore/device_context/cpu/device.py +62 -0
  76. mindspore/device_context/cpu/op_tuning.py +43 -0
  77. mindspore/device_context/gpu/__init__.py +21 -0
  78. mindspore/device_context/gpu/device.py +70 -0
  79. mindspore/device_context/gpu/op_precision.py +67 -0
  80. mindspore/device_context/gpu/op_tuning.py +175 -0
  81. mindspore/device_manager.py +134 -0
  82. mindspore/dnnl.dll +0 -0
  83. mindspore/dpcmi.dll +0 -0
  84. mindspore/experimental/llm_boost/__init__.py +3 -2
  85. mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
  86. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
  87. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
  88. mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
  89. mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
  90. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  91. mindspore/experimental/llm_boost/register.py +1 -0
  92. mindspore/experimental/optim/adadelta.py +26 -22
  93. mindspore/experimental/optim/adam.py +3 -0
  94. mindspore/experimental/optim/lr_scheduler.py +33 -24
  95. mindspore/experimental/optim/radam.py +33 -30
  96. mindspore/hal/device.py +28 -0
  97. mindspore/hal/event.py +17 -0
  98. mindspore/hal/memory.py +94 -3
  99. mindspore/hal/stream.py +91 -6
  100. mindspore/include/api/context.h +1 -2
  101. mindspore/include/dataset/constants.h +2 -2
  102. mindspore/jpeg62.dll +0 -0
  103. mindspore/log.py +12 -0
  104. mindspore/mindrecord/__init__.py +1 -1
  105. mindspore/mindrecord/config.py +17 -316
  106. mindspore/mindrecord/filereader.py +1 -9
  107. mindspore/mindrecord/filewriter.py +5 -15
  108. mindspore/mindrecord/mindpage.py +1 -9
  109. mindspore/mindspore_backend.dll +0 -0
  110. mindspore/mindspore_common.dll +0 -0
  111. mindspore/mindspore_core.dll +0 -0
  112. mindspore/mindspore_glog.dll +0 -0
  113. mindspore/mindspore_ops.dll +0 -0
  114. mindspore/mint/__init__.py +824 -218
  115. mindspore/mint/distributed/__init__.py +66 -4
  116. mindspore/mint/distributed/distributed.py +2594 -44
  117. mindspore/mint/linalg/__init__.py +6 -0
  118. mindspore/mint/nn/__init__.py +473 -14
  119. mindspore/mint/nn/functional.py +486 -11
  120. mindspore/mint/nn/layer/__init__.py +17 -4
  121. mindspore/mint/nn/layer/_functions.py +330 -0
  122. mindspore/mint/nn/layer/activation.py +169 -1
  123. mindspore/mint/nn/layer/basic.py +123 -0
  124. mindspore/mint/nn/layer/conv.py +727 -0
  125. mindspore/mint/nn/layer/normalization.py +215 -19
  126. mindspore/mint/nn/layer/padding.py +797 -0
  127. mindspore/mint/nn/layer/pooling.py +170 -0
  128. mindspore/mint/optim/__init__.py +2 -1
  129. mindspore/mint/optim/adam.py +223 -0
  130. mindspore/mint/optim/adamw.py +26 -19
  131. mindspore/mint/special/__init__.py +2 -1
  132. mindspore/msobj140.dll +0 -0
  133. mindspore/mspdb140.dll +0 -0
  134. mindspore/mspdbcore.dll +0 -0
  135. mindspore/mspdbst.dll +0 -0
  136. mindspore/mspft140.dll +0 -0
  137. mindspore/msvcdis140.dll +0 -0
  138. mindspore/msvcp140_1.dll +0 -0
  139. mindspore/msvcp140_2.dll +0 -0
  140. mindspore/msvcp140_atomic_wait.dll +0 -0
  141. mindspore/msvcp140_codecvt_ids.dll +0 -0
  142. mindspore/multiprocessing/__init__.py +5 -0
  143. mindspore/nn/__init__.py +2 -0
  144. mindspore/nn/cell.py +142 -21
  145. mindspore/nn/dynamic_lr.py +2 -1
  146. mindspore/nn/layer/activation.py +6 -6
  147. mindspore/nn/layer/basic.py +35 -25
  148. mindspore/nn/layer/channel_shuffle.py +3 -3
  149. mindspore/nn/layer/conv.py +3 -0
  150. mindspore/nn/layer/embedding.py +3 -3
  151. mindspore/nn/layer/normalization.py +8 -7
  152. mindspore/nn/layer/padding.py +4 -3
  153. mindspore/nn/layer/pooling.py +55 -23
  154. mindspore/nn/layer/rnn_cells.py +1 -1
  155. mindspore/nn/layer/rnns.py +2 -1
  156. mindspore/nn/layer/timedistributed.py +5 -5
  157. mindspore/nn/layer/transformer.py +48 -26
  158. mindspore/nn/learning_rate_schedule.py +5 -3
  159. mindspore/nn/loss/loss.py +31 -36
  160. mindspore/nn/optim/ada_grad.py +1 -0
  161. mindspore/nn/optim/adadelta.py +2 -2
  162. mindspore/nn/optim/adam.py +1 -1
  163. mindspore/nn/optim/lars.py +1 -4
  164. mindspore/nn/optim/optimizer.py +1 -1
  165. mindspore/nn/optim/rprop.py +2 -2
  166. mindspore/nn/optim/thor.py +2 -1
  167. mindspore/nn/utils/__init__.py +22 -0
  168. mindspore/nn/utils/init.py +73 -0
  169. mindspore/nn/wrap/cell_wrapper.py +4 -6
  170. mindspore/nn/wrap/loss_scale.py +3 -4
  171. mindspore/numpy/array_creations.py +60 -62
  172. mindspore/numpy/array_ops.py +148 -143
  173. mindspore/numpy/logic_ops.py +41 -42
  174. mindspore/numpy/math_ops.py +361 -359
  175. mindspore/numpy/utils.py +16 -16
  176. mindspore/numpy/utils_const.py +4 -4
  177. mindspore/opencv_core452.dll +0 -0
  178. mindspore/opencv_imgcodecs452.dll +0 -0
  179. mindspore/opencv_imgproc452.dll +0 -0
  180. mindspore/ops/__init__.py +2 -1
  181. mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
  182. mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
  183. mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
  184. mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
  185. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  186. mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
  187. mindspore/ops/_vmap/vmap_array_ops.py +20 -19
  188. mindspore/ops/_vmap/vmap_base.py +0 -2
  189. mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
  190. mindspore/ops/_vmap/vmap_math_ops.py +11 -9
  191. mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
  192. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
  193. mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
  194. mindspore/ops/auto_generate/gen_extend_func.py +554 -60
  195. mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
  196. mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
  197. mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
  198. mindspore/ops/composite/base.py +1 -1
  199. mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
  200. mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
  201. mindspore/ops/function/__init__.py +12 -0
  202. mindspore/ops/function/array_func.py +561 -159
  203. mindspore/ops/function/clip_func.py +64 -0
  204. mindspore/ops/function/debug_func.py +28 -20
  205. mindspore/ops/function/image_func.py +1 -1
  206. mindspore/ops/function/linalg_func.py +5 -4
  207. mindspore/ops/function/math_func.py +1664 -294
  208. mindspore/ops/function/nn_func.py +988 -317
  209. mindspore/ops/function/parameter_func.py +3 -56
  210. mindspore/ops/function/random_func.py +243 -33
  211. mindspore/ops/function/sparse_unary_func.py +1 -1
  212. mindspore/ops/functional.py +18 -5
  213. mindspore/ops/functional_overload.py +897 -0
  214. mindspore/ops/operations/__init__.py +3 -2
  215. mindspore/ops/operations/_embedding_cache_ops.py +4 -4
  216. mindspore/ops/operations/_grad_ops.py +2 -34
  217. mindspore/ops/operations/_infer_ops.py +2 -1
  218. mindspore/ops/operations/_inner_ops.py +38 -8
  219. mindspore/ops/operations/array_ops.py +45 -303
  220. mindspore/ops/operations/comm_ops.py +23 -17
  221. mindspore/ops/operations/custom_ops.py +7 -49
  222. mindspore/ops/operations/debug_ops.py +42 -47
  223. mindspore/ops/operations/inner_ops.py +6 -4
  224. mindspore/ops/operations/linalg_ops.py +3 -2
  225. mindspore/ops/operations/manually_defined/ops_def.py +185 -104
  226. mindspore/ops/operations/math_ops.py +11 -216
  227. mindspore/ops/operations/nn_ops.py +153 -310
  228. mindspore/ops/primitive.py +23 -21
  229. mindspore/ops/tensor_method.py +1669 -0
  230. mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
  231. mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
  232. mindspore/ops_generate/arg_handler.py +0 -61
  233. mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
  234. mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
  235. mindspore/ops_generate/base_generator.py +11 -0
  236. mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
  237. mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
  238. mindspore/ops_generate/functional_overload_py_generator.py +110 -0
  239. mindspore/ops_generate/functions_cc_generator.py +233 -0
  240. mindspore/ops_generate/gen_aclnn_implement.py +110 -114
  241. mindspore/ops_generate/gen_constants.py +157 -3
  242. mindspore/ops_generate/gen_ops.py +245 -990
  243. mindspore/ops_generate/gen_pyboost_func.py +97 -998
  244. mindspore/ops_generate/gen_utils.py +119 -33
  245. mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
  246. mindspore/ops_generate/op_api_proto.py +206 -0
  247. mindspore/ops_generate/op_def_py_generator.py +131 -0
  248. mindspore/ops_generate/op_prim_py_generator.py +480 -0
  249. mindspore/ops_generate/op_proto.py +373 -108
  250. mindspore/ops_generate/op_template_parser.py +436 -0
  251. mindspore/ops_generate/ops_def_cc_generator.py +288 -0
  252. mindspore/ops_generate/ops_def_h_generator.py +74 -0
  253. mindspore/ops_generate/ops_name_h_generator.py +68 -0
  254. mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
  255. mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
  256. mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
  257. mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
  258. mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
  259. mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
  260. mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
  261. mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
  262. mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
  263. mindspore/ops_generate/pyboost_utils.py +92 -33
  264. mindspore/ops_generate/template.py +294 -44
  265. mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
  266. mindspore/parallel/__init__.py +3 -3
  267. mindspore/parallel/_auto_parallel_context.py +44 -34
  268. mindspore/parallel/_cell_wrapper.py +22 -3
  269. mindspore/parallel/_parallel_serialization.py +13 -2
  270. mindspore/parallel/_utils.py +4 -2
  271. mindspore/parallel/algo_parameter_config.py +1 -1
  272. mindspore/parallel/checkpoint_transform.py +44 -0
  273. mindspore/parallel/cluster/process_entity/_api.py +131 -37
  274. mindspore/parallel/cluster/process_entity/_utils.py +41 -6
  275. mindspore/parallel/cluster/run.py +20 -3
  276. mindspore/parallel/parameter_broadcast.py +1 -1
  277. mindspore/parallel/shard.py +3 -0
  278. mindspore/parallel/transform_safetensors.py +119 -253
  279. mindspore/pgodb140.dll +0 -0
  280. mindspore/pgort140.dll +0 -0
  281. mindspore/profiler/__init__.py +17 -4
  282. mindspore/profiler/analysis/__init__.py +0 -0
  283. mindspore/profiler/analysis/parser/__init__.py +0 -0
  284. mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
  285. mindspore/profiler/analysis/parser/base_parser.py +158 -0
  286. mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
  287. mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
  288. mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
  289. mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
  290. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
  291. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
  292. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
  293. mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
  294. mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
  295. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
  296. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
  297. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
  298. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
  299. mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
  300. mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
  301. mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
  302. mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
  303. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
  304. mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
  305. mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
  306. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
  307. mindspore/profiler/analysis/task_manager.py +131 -0
  308. mindspore/profiler/analysis/time_converter.py +84 -0
  309. mindspore/profiler/analysis/viewer/__init__.py +0 -0
  310. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
  311. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
  312. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
  313. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
  314. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
  315. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
  316. mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
  317. mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
  318. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
  319. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
  320. mindspore/profiler/analysis/work_flow.py +73 -0
  321. mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
  322. mindspore/profiler/common/command_executor.py +90 -0
  323. mindspore/profiler/common/constant.py +174 -3
  324. mindspore/profiler/common/file_manager.py +208 -0
  325. mindspore/profiler/common/log.py +130 -0
  326. mindspore/profiler/common/msprof_cmd_tool.py +202 -0
  327. mindspore/profiler/common/path_manager.py +371 -0
  328. mindspore/profiler/common/process_bar.py +168 -0
  329. mindspore/profiler/common/process_pool.py +9 -3
  330. mindspore/profiler/common/profiler_context.py +476 -0
  331. mindspore/profiler/common/profiler_info.py +304 -0
  332. mindspore/profiler/common/profiler_output_path.py +284 -0
  333. mindspore/profiler/common/profiler_parameters.py +210 -0
  334. mindspore/profiler/common/profiler_path_manager.py +120 -0
  335. mindspore/profiler/common/record_function.py +76 -0
  336. mindspore/profiler/common/tlv_decoder.py +76 -0
  337. mindspore/profiler/common/util.py +75 -2
  338. mindspore/profiler/dynamic_profiler.py +270 -37
  339. mindspore/profiler/envprofiler.py +138 -0
  340. mindspore/profiler/mstx.py +199 -0
  341. mindspore/profiler/platform/__init__.py +21 -0
  342. mindspore/profiler/platform/base_profiler.py +40 -0
  343. mindspore/profiler/platform/cpu_profiler.py +124 -0
  344. mindspore/profiler/platform/gpu_profiler.py +74 -0
  345. mindspore/profiler/platform/npu_profiler.py +309 -0
  346. mindspore/profiler/profiler.py +580 -93
  347. mindspore/profiler/profiler_action_controller.py +187 -0
  348. mindspore/profiler/profiler_interface.py +114 -0
  349. mindspore/profiler/schedule.py +208 -0
  350. mindspore/rewrite/api/symbol_tree.py +1 -2
  351. mindspore/run_check/_check_version.py +18 -13
  352. mindspore/runtime/__init__.py +37 -0
  353. mindspore/runtime/device.py +27 -0
  354. mindspore/runtime/event.py +209 -0
  355. mindspore/runtime/executor.py +148 -0
  356. mindspore/runtime/memory.py +392 -0
  357. mindspore/runtime/stream.py +460 -0
  358. mindspore/runtime/thread_bind_core.py +401 -0
  359. mindspore/swresample-4.dll +0 -0
  360. mindspore/swscale-6.dll +0 -0
  361. mindspore/tbbmalloc.dll +0 -0
  362. mindspore/tinyxml2.dll +0 -0
  363. mindspore/train/__init__.py +2 -2
  364. mindspore/train/_utils.py +53 -18
  365. mindspore/train/amp.py +8 -4
  366. mindspore/train/callback/_checkpoint.py +32 -18
  367. mindspore/train/callback/_early_stop.py +1 -1
  368. mindspore/train/callback/_flops_collector.py +105 -69
  369. mindspore/train/callback/_history.py +1 -1
  370. mindspore/train/callback/_summary_collector.py +44 -6
  371. mindspore/train/callback/_tft_register.py +37 -15
  372. mindspore/train/dataset_helper.py +11 -11
  373. mindspore/train/metrics/precision.py +4 -5
  374. mindspore/train/mind_ir_pb2.py +167 -46
  375. mindspore/train/model.py +13 -14
  376. mindspore/train/serialization.py +461 -72
  377. mindspore/train/summary/summary_record.py +1 -2
  378. mindspore/train/train_thor/model_thor.py +1 -1
  379. mindspore/turbojpeg.dll +0 -0
  380. mindspore/utils/__init__.py +4 -2
  381. mindspore/utils/dryrun.py +138 -0
  382. mindspore/utils/runtime_execution_order_check.py +550 -0
  383. mindspore/vcmeta.dll +0 -0
  384. mindspore/vcruntime140.dll +0 -0
  385. mindspore/vcruntime140_1.dll +0 -0
  386. mindspore/version.py +1 -1
  387. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
  388. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +391 -265
  389. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
  390. mindspore/common/_tensor_overload.py +0 -139
  391. mindspore/mindspore_np_dtype.dll +0 -0
  392. mindspore/profiler/envprofiling.py +0 -254
  393. mindspore/profiler/profiling.py +0 -1926
  394. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
  395. {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
@@ -370,7 +370,7 @@ def get_algo_parameters(attr_key):
370
370
  Examples:
371
371
  >>> import mindspore as ms
372
372
  >>> ms.get_algo_parameters("fully_use_devices")
373
- True
373
+ False
374
374
  """
375
375
  if attr_key not in get_algo_parameters_config_func_map:
376
376
  raise ValueError("Get context keyword %s is not recognized!" % attr_key)
@@ -28,6 +28,7 @@ from mindspore.parallel._parallel_serialization import _rank_list_for_transform_
28
28
  _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
29
29
  _merge_protobuf_strategy, _merge_json_strategy, _extract_src_dst_layout_map_by_src
30
30
  from mindspore.parallel.transform_safetensors import _transform_safetensors, _collect_safetensor_files
31
+ from mindspore._c_expression import AutoParallelContext
31
32
 
32
33
  __all__ = ["merge_pipeline_strategys", "rank_list_for_transform", "transform_checkpoint_by_rank",
33
34
  "transform_checkpoints", "sync_pipeline_shared_parameters", "load_segmented_checkpoints"]
@@ -648,3 +649,46 @@ def load_segmented_checkpoints(ckpt_file_dir, net=None, strict_load=False, filte
648
649
  parameter_dict.update(ms.load_checkpoint(checkpoint_file, net, strict_load, filter_prefix, dec_key,
649
650
  dec_mode, specify_prefix, choice_func))
650
651
  return parameter_dict
652
+
653
+
654
+ def set_op_strategy_config(mode="SAVE", path=""):
655
+ """
656
+ Set strategy json configuration when using sharding propagation.
657
+
658
+ .. warning::
659
+ This is an experimental interface, may be changed or canceled in the future;
660
+ This interface currently doesn't support saving or loading strategies using layout.
661
+
662
+ Note:
663
+ - It only works when `parallel_mode=ParallelMode.AUTO_PARALLEL` and `search_mode='sharding_propagation'`.
664
+ - It only supports saving and reloading with the same configuration for the same network. If the network
665
+ or training hyperparameters are modified after using the `SAVE` mode to save the strategies of operator
666
+ to the setting json file, which may lead to the failure of using the `LOAD` mode to load operator
667
+ strategies from json.
668
+ - When performing distributed training, users can first save the strategy using dryrun on a single device
669
+ and then load strategy to perform distributed training.
670
+
671
+ Args:
672
+ mode (str): The parameter for choosing save or load .json file. Default value: ``"SAVE"`` .
673
+ path (str): Path to save or load parallel strategy json, must be an absolute path. Default value: ``""`` .
674
+
675
+ Raises:
676
+ KeyError: When type is not ``"SAVE"`` or ``"LOAD"`` .
677
+ KeyError: When path does not end in ``".json"`` .
678
+ KeyError: When path is not an absolute path.
679
+ """
680
+ if not os.path.isabs(path):
681
+ raise KeyError("File path must be an absolute path")
682
+ _, file_type = os.path.splitext(path)
683
+ if file_type != ".json":
684
+ raise KeyError("File type must be .json")
685
+ dir_path = os.path.dirname(path)
686
+ if dir_path and not os.path.exists(dir_path):
687
+ os.makedirs(dir_path, mode=0o700, exist_ok=True)
688
+ check_mode_type = ["SAVE", "LOAD"]
689
+ if mode in check_mode_type:
690
+ if AutoParallelContext.get_instance() is None:
691
+ raise ValueError("Get AutoParallelContext instance failed!!!")
692
+ AutoParallelContext.get_instance().set_ops_strategy_json_config(mode, path, "all")
693
+ else:
694
+ raise KeyError("Type must be 'SAVE' or 'LOAD'")
@@ -16,23 +16,30 @@
16
16
  import os
17
17
  import re
18
18
  import sys
19
+ import signal
19
20
  import subprocess
21
+ import socket
20
22
  import mindspore.log as logger
21
23
  from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url,\
22
- _is_local_ip, _send_scale_num
24
+ _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip
23
25
 
24
26
  class _Node:
25
27
  """
26
28
  Base class for dynamic networking nodes.
27
29
 
28
30
  """
29
- def __init__(self, worker_num, sched_host, sched_port, timeout, args_list, output_file):
31
+ def __init__(self, worker_num, sched_host, sched_port, timeout, args_list, output_file, tail_worker_log,
32
+ join, is_simulation):
30
33
  self.worker_num = worker_num
31
34
  self.sched_host = sched_host
32
35
  self.sched_port = sched_port
33
36
  self.args_list = args_list
34
37
  self.output_file = output_file
35
38
  self.timeout = timeout
39
+ self.tail_worker_log = tail_worker_log
40
+ self.join = join
41
+ self.is_simulation = is_simulation
42
+
36
43
 
37
44
  def run(self):
38
45
  """
@@ -40,9 +47,11 @@ class _Node:
40
47
 
41
48
  """
42
49
  os.environ["MS_WORKER_NUM"] = str(self.worker_num)
43
- os.environ["MS_SCHED_HOST"] = self.sched_host
44
- os.environ["MS_SCHED_PORT"] = str(self.sched_port)
45
- os.environ["MS_TOPO_TIMEOUT"] = str(self.timeout)
50
+ # If simulation level is set, environment variables for dynamic networking will not be set and scheduler will not be started.
51
+ if not self.is_simulation:
52
+ os.environ["MS_SCHED_HOST"] = self.sched_host
53
+ os.environ["MS_SCHED_PORT"] = str(self.sched_port)
54
+ os.environ["MS_TOPO_TIMEOUT"] = str(self.timeout)
46
55
 
47
56
  class _MetaServerNode(_Node):
48
57
  """
@@ -63,8 +72,10 @@ class _ComputeGraphNode(_Node):
63
72
  """
64
73
  Worker node for dynamic networking. Inherits from the Node class.
65
74
  """
66
- def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file):
67
- super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file)
75
+ def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file,
76
+ tail_worker_log, join, is_simulation):
77
+ super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file,
78
+ tail_worker_log, join, is_simulation)
68
79
  self.node_id = node_id
69
80
 
70
81
 
@@ -78,9 +89,36 @@ class _ComputeGraphNode(_Node):
78
89
  super().run()
79
90
  if self.node_id is not None:
80
91
  os.environ["MS_NODE_ID"] = str(self.node_id)
81
- os.environ["MS_ROLE"] = "MS_WORKER"
92
+ # If simulation level is set, environment variable 'MS_ROLE' will not be set.
93
+ if not self.is_simulation:
94
+ os.environ["MS_ROLE"] = "MS_WORKER"
95
+ tail_worker_process = None
96
+ is_tail_worker_log = self.enable_tail_worker_log()
97
+ if self.join and not is_tail_worker_log:
98
+ logger.warning(f"The '--tail_worker_log' is:{self.tail_worker_log}, "
99
+ f"which doesn't contain this worker {self.node_id}."
100
+ f" So this worker {self.node_id}'s log will not be output to console. Reset "
101
+ "'--tail_worker_log', if you want to output this worker's log to console.")
82
102
  with open(self.output_file, "w") as file_handle:
83
- return subprocess.Popen(self.args_list, stdout=file_handle, stderr=subprocess.STDOUT)
103
+ worker_process = subprocess.Popen(self.args_list, preexec_fn=os.setsid, stdout=file_handle,
104
+ stderr=subprocess.STDOUT)
105
+ if self.join and is_tail_worker_log:
106
+ tail_worker_process = self.output_to_console()
107
+ return worker_process, tail_worker_process
108
+
109
+ def output_to_console(self):
110
+ """
111
+ Output worker log file to console.
112
+ """
113
+ return subprocess.Popen(['/usr/bin/tail', '-f', self.output_file])
114
+
115
+ def enable_tail_worker_log(self):
116
+ tail_worker_log_list = []
117
+ if self.tail_worker_log != "-1":
118
+ tail_worker_log_list.extend([int(num) for num in self.tail_worker_log.split(',')])
119
+ if self.tail_worker_log != "-1" and self.node_id not in tail_worker_log_list:
120
+ return False
121
+ return True
84
122
 
85
123
 
86
124
  class _ProcessManager:
@@ -99,13 +137,14 @@ class _ProcessManager:
99
137
  """
100
138
  self.msn_process = None
101
139
  self.cgn_processes = []
140
+ self.tail_cgn_processes = []
102
141
 
103
- """`is_master` flags whether the current node is the master node."""
104
- self.is_master = _is_local_ip(args.master_addr)
105
-
106
- self.master_addr = args.master_addr
142
+ self.master_addr = _convert_addr_to_ip(args.master_addr)
107
143
  self.master_port = args.master_port
108
144
 
145
+ """`is_master` flags whether the current node is the master node."""
146
+ self.is_master = _is_local_ip(self.master_addr)
147
+
109
148
  self.worker_num = args.worker_num
110
149
  if self.worker_num <= 0:
111
150
  raise ValueError(f"worker_num must be greater than 0, but got {self.worker_num}.")
@@ -115,6 +154,8 @@ class _ProcessManager:
115
154
 
116
155
  self.log_dir = args.log_dir
117
156
  self.join = args.join
157
+ self.worker_log_name = args.worker_log_name
158
+ self.tail_worker_log = args.tail_worker_log
118
159
  self.cluster_time_out = args.cluster_time_out
119
160
  self.bind_core = args.bind_core
120
161
  self.rank_table_file = args.rank_table_file
@@ -123,19 +164,21 @@ class _ProcessManager:
123
164
  self.sim_rank_id = args.sim_rank_id
124
165
  self.is_simulation = (self.sim_level != -1)
125
166
  if self.is_simulation:
126
- # If simulation level is set, reset the worker_num and local_worker_num to 1
127
- # so that host cluster could be initialized.
128
- self.worker_num = 1
129
- self.local_worker_num = 1
130
167
  os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
131
168
  elif os.getenv("MS_SIMULATION_LEVEL"):
132
- # If simulation level env is set, load RANK_ID and RANK_SIZE envs.
133
- self.worker_num = 1
134
- self.local_worker_num = 1
135
169
  self.is_simulation = True
136
- self.sim_rank_id = os.getenv("RANK_ID", "0")
170
+ self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
137
171
  if os.getenv("RANK_SIZE"):
138
172
  self.exported_rank_size = os.getenv("RANK_SIZE")
173
+ # If sim_rank_id is set, single worker can be started.
174
+ if self.is_simulation and (self.sim_rank_id != -1):
175
+ logger.info(f"Simulation rank id is set to {self.sim_rank_id}, will dryrun a single process.")
176
+ self.local_worker_num = 1
177
+ if self.is_simulation and self.local_worker_num > 128:
178
+ self.local_worker_num = 1
179
+ self.sim_rank_id = 0
180
+ logger.warning(f"In dryrun case, local worker num is set to larger than 128. "
181
+ "To avoid a system clash, local worker num is set to 1.")
139
182
 
140
183
  self.cmd = args.task_script
141
184
  self.cmd_args = args.task_script_args
@@ -173,7 +216,7 @@ class _ProcessManager:
173
216
  else:
174
217
  sys.exit()
175
218
  else:
176
- if self.is_master:
219
+ if self.is_master and not self.is_simulation:
177
220
  self.start_scheduler()
178
221
  self.start_workers()
179
222
 
@@ -190,7 +233,8 @@ class _ProcessManager:
190
233
  os.environ['RANK_ID'] = str(0)
191
234
  msn = _MetaServerNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
192
235
  _generate_cmd_args_list(self.cmd, self.cmd_args),
193
- os.path.join(self.log_dir, "scheduler.log"))
236
+ os.path.join(self.log_dir, "scheduler.log"), self.tail_worker_log, self.join,
237
+ self.is_simulation)
194
238
  self.msn_process = msn.run()
195
239
 
196
240
  def start_workers(self):
@@ -208,9 +252,6 @@ class _ProcessManager:
208
252
  "You can access 'RANK_ID' environment variable after calling "
209
253
  "'mindspore.communication.init()'")
210
254
 
211
- if self.is_simulation and self.worker_num != 1:
212
- raise ValueError(f"Simulation level is set, worker_num must be 1, but got {self.worker_num}.")
213
-
214
255
  for i in range(self.local_worker_num):
215
256
  os.environ["DEVICE_ID"] = str(i)
216
257
  node_id, log_name = self._get_node_id_and_log_path(i)
@@ -223,9 +264,10 @@ class _ProcessManager:
223
264
  os.environ["RANK_ID"] = str(node_id)
224
265
  logger.warning(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
225
266
  "Environment variable [RANK_ID] is exported.")
226
- if self.is_simulation:
227
- # Reset RANK_ID env to sim_rank_id.
267
+ if self.is_simulation and (self.sim_rank_id != -1):
268
+ # Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
228
269
  os.environ["RANK_ID"] = str(self.sim_rank_id)
270
+ logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
229
271
 
230
272
  cpu_num = subprocess.getoutput("cat /proc/cpuinfo|grep processor|wc -l")
231
273
  if not cpu_num.isdigit():
@@ -238,9 +280,11 @@ class _ProcessManager:
238
280
  else:
239
281
  cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
240
282
  cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
241
- node_id, cmd, log_name)
242
- process = cgn.run()
283
+ node_id, cmd, log_name, self.tail_worker_log, self.join, self.is_simulation)
284
+ process, tail_process = cgn.run()
243
285
  self.cgn_processes.append(process)
286
+ self.tail_cgn_processes.append(tail_process)
287
+
244
288
 
245
289
  def join_processes(self):
246
290
  """
@@ -248,8 +292,14 @@ class _ProcessManager:
248
292
  If there's any process does not exit normally, logs will be analyzed
249
293
  so that understandable root cause of exception could be returned.
250
294
  """
295
+ def signal_handler(sig, frame):
296
+ logger.warning("msrun process received SIGNIN (Ctrl+C), terminating all workers.")
297
+ self.kill_all_processes()
298
+ sys.exit(0)
299
+
251
300
  has_exception = False
252
301
  success_cgn_processes = set()
302
+ signal.signal(signal.SIGINT, signal_handler)
253
303
  while True:
254
304
  # Traversal all workers and kill immediately if any exception happens.
255
305
  for p in self.cgn_processes:
@@ -266,15 +316,14 @@ class _ProcessManager:
266
316
 
267
317
  if has_exception:
268
318
  logger.warning("There's worker exits with exception, kill all other workers.")
269
- for p in self.cgn_processes:
270
- if p.poll() is None:
271
- p.kill()
319
+ self.kill_worker_processes()
320
+ self.kill_tail_log_processes()
272
321
  break
273
322
  elif len(success_cgn_processes) == len(self.cgn_processes):
274
323
  logger.info("All workers successfully exit!")
324
+ self.kill_tail_log_processes()
275
325
  break
276
326
 
277
-
278
327
  if self.msn_process:
279
328
  self.msn_process.wait()
280
329
  if self.msn_process.returncode != 0:
@@ -287,6 +336,35 @@ class _ProcessManager:
287
336
  raise RuntimeError("Distributed job exited with exception. Please check logs in "
288
337
  f"directory: {self.log_dir}.")
289
338
 
339
+ def kill_tail_log_processes(self):
340
+ """
341
+ Kills all tail worker log processes.
342
+
343
+ """
344
+ for p_tail in self.tail_cgn_processes:
345
+ if p_tail is not None:
346
+ logger.debug("Tail worker log process:{p_tail.pid} has been killed!")
347
+ p_tail.kill()
348
+
349
+ def kill_worker_processes(self):
350
+ """
351
+ Kills all worker processes.
352
+
353
+ """
354
+ for p in self.cgn_processes:
355
+ if p.poll() is None:
356
+ os.killpg(os.getpgid(p.pid), signal.SIGKILL)
357
+
358
+ def kill_all_processes(self):
359
+ """
360
+ Kills all running processes, including scheduler, worker and tail log.
361
+
362
+ """
363
+ self.kill_worker_processes()
364
+ self.kill_tail_log_processes()
365
+ if self.msn_process.poll() is None:
366
+ self.msn_process.kill()
367
+
290
368
  def stop_processes(self):
291
369
  """
292
370
  Stops all running processes.
@@ -310,26 +388,29 @@ class _ProcessManager:
310
388
  self.start_scheduler()
311
389
  self.start_workers()
312
390
 
391
+
313
392
  def _get_node_id_and_log_path(self, index):
314
393
  """
315
394
  Generate node id and log path for corresponding process.
316
395
  """
396
+ formatted_log_name = self.format_worker_log_name()
317
397
  if self.local_worker_num > self.worker_num:
318
398
  raise ValueError(f"Total worker number is {self.worker_num}, "
319
399
  f"but got exceeded local worker number: {self.local_worker_num}.")
320
400
  if self.local_worker_num == self.worker_num:
321
- return index, os.path.join(self.log_dir, "worker_" + str(index) + ".log")
401
+ return index, os.path.join(self.log_dir, formatted_log_name + "_" + str(index) + ".log")
322
402
 
323
403
  if self.node_rank >= 0:
324
404
  # We assume that each node has same process number.
325
405
  node_id = self.node_rank * self.local_worker_num + index
326
- log_name = os.path.join(self.log_dir, "worker_" + str(node_id) + ".log")
406
+ log_name = os.path.join(self.log_dir, formatted_log_name + "_" + str(node_id) + ".log")
327
407
  else:
328
408
  # If node_rank is default value -1, let MindSpore assign rank id.
329
409
  node_id = None
330
- log_name = os.path.join(self.log_dir, "worker_" + str(index) + ".log")
410
+ log_name = os.path.join(self.log_dir, formatted_log_name + "_" + str(index) + ".log")
331
411
  return node_id, log_name
332
412
 
413
+
333
414
  def _analyze_log(self):
334
415
  """
335
416
  Analyze exception logs.
@@ -350,3 +431,16 @@ class _ProcessManager:
350
431
  logger.error(f"Time out nodes are {time_out_node_ids}")
351
432
 
352
433
  os.system(f"grep -rn -E 'ERROR|CRITICAL|Traceback|Error' -C 5 {self.log_dir}")
434
+
435
+
436
+ def format_worker_log_name(self):
437
+ """
438
+ Format worker log files' name.
439
+ """
440
+ if not self.worker_log_name:
441
+ formatted_worker_log_name = "worker"
442
+ else:
443
+ current_ip = _get_local_ip(self.master_addr)
444
+ formatted_worker_log_name = re.sub(r'\{ip\}', current_ip, self.worker_log_name)
445
+ formatted_worker_log_name = re.sub(r'\{hostname\}', socket.gethostname(), formatted_worker_log_name)
446
+ return formatted_worker_log_name
@@ -16,8 +16,11 @@
16
16
  import os
17
17
  import json
18
18
  import socket
19
+ import ipaddress
19
20
  import mindspore.log as logger
20
21
 
22
+ CURRENT_IP = None
23
+
21
24
  def _generate_cmd(cmd, cmd_args, output_name):
22
25
  """
23
26
  Generates a command string to execute a Python script in the background, r
@@ -67,6 +70,24 @@ def _generate_url(addr, port):
67
70
  return url
68
71
 
69
72
 
73
+ def _get_local_ip(ip_address):
74
+ """
75
+ Get current IP address.
76
+
77
+ """
78
+ global CURRENT_IP
79
+ if CURRENT_IP is None:
80
+ try:
81
+ s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
82
+ s.connect((ip_address, 0))
83
+ CURRENT_IP = s.getsockname()[0]
84
+ s.close()
85
+ except Exception as e:
86
+ raise RuntimeError(f"Get local ip failed: {e}. Please check whether an accessible address "
87
+ "is input by '--master_address'.")
88
+ return CURRENT_IP
89
+
90
+
70
91
  def _is_local_ip(ip_address):
71
92
  """
72
93
  Check if the current input IP address is a local IP address.
@@ -75,13 +96,8 @@ def _is_local_ip(ip_address):
75
96
  p = os.popen("ip -j addr")
76
97
  addr_info_str = p.read()
77
98
  p.close()
99
+ current_ip = _get_local_ip(ip_address)
78
100
  if not addr_info_str:
79
- # This means this host has no "ip -j addr" command.
80
- # We use socket module to get local ip address.
81
- s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
82
- s.connect((ip_address, 0))
83
- current_ip = s.getsockname()[0]
84
- s.close()
85
101
  return current_ip == ip_address
86
102
 
87
103
  addr_infos = json.loads(addr_info_str)
@@ -93,6 +109,25 @@ def _is_local_ip(ip_address):
93
109
  return False
94
110
 
95
111
 
112
+ def _convert_addr_to_ip(master_addr):
113
+ """
114
+ Check whether the input parameter 'master_addr' is IPv4. If a hostname is inserted, it will be converted
115
+ to IP and then set as master host's IP.
116
+
117
+ """
118
+ try:
119
+ ipaddress.IPv4Address(master_addr)
120
+ return master_addr
121
+ except ipaddress.AddressValueError:
122
+ try:
123
+ ip_address = socket.gethostbyname(master_addr)
124
+ logger.info(f"Convert input host name:{master_addr} to ip address:{ip_address}.")
125
+ return ip_address
126
+ except socket.gaierror as e:
127
+ raise RuntimeError(f"DNS resolution failed: {e}. Please check whether a correct host name "
128
+ "is input by '--master_address'.")
129
+
130
+
96
131
  def _send_scale_num(url, scale_num):
97
132
  """
98
133
  Send an HTTP request to a specified URL, informing scale_num.
@@ -37,8 +37,8 @@ def get_args():
37
37
  parser.add_argument(
38
38
  "--master_addr",
39
39
  default="127.0.0.1", type=str,
40
- help="specifies the IP address of the scheduler and its data type is string."
41
- " Allowed values: valid IP addresses."
40
+ help="specifies the IP address or the host name of the scheduler and its data type is string."
41
+ " Allowed values: valid IP addresses or valid host name."
42
42
  )
43
43
  parser.add_argument(
44
44
  "--master_port", default=8118, type=int,
@@ -91,7 +91,7 @@ def get_args():
91
91
  )
92
92
  parser.add_argument(
93
93
  "--sim_rank_id",
94
- default=0,
94
+ default=-1,
95
95
  type=int,
96
96
  help="specifies simulation process's rank id. Only one process is spawned in simulation scenario."
97
97
  )
@@ -102,6 +102,23 @@ def get_args():
102
102
  help="specifies rank table file path. This path is not used to initialize distributed job in "
103
103
  "'rank table file manner' but to help support other features."
104
104
  )
105
+ parser.add_argument(
106
+ "--worker_log_name",
107
+ default="",
108
+ type=str,
109
+ help="Specifies the worker log file name as a string for current node; the default is worker_[rankid]. "
110
+ "Support configuring the current IP address and host name by using {ip} and {hostname} respectively. "
111
+ "e.g. --worker_log_name=worker_{ip}_{hostname}_test, worker [rankid] log name for current node "
112
+ "will be worker_[real IP address]_[real host name]_test_[rankid]."
113
+ )
114
+ parser.add_argument(
115
+ "--tail_worker_log",
116
+ default="-1",
117
+ type=str,
118
+ help="Only tail worker log to console when '--join=True' and the configured value should be within "
119
+ "[0, local_worker_num], otherwise worker log will not be tail. All worker logs will be tail by "
120
+ "default. Support tail the specified worker log (e.g. --tail_log=0 tail the worker 0 log to console)."
121
+ )
105
122
  parser.add_argument(
106
123
  "task_script",
107
124
  type=str,
@@ -56,7 +56,7 @@ def parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
56
56
  >>> from mindspore.parallel.parameter_broadcast import parameter_broadcast
57
57
  >>> from mindspore.train.serialization import load_checkpoint, load_param_into_net
58
58
  >>> ms.set_context(mode=ms.GRAPH_MODE)
59
- >>> ms.set_context(max_device_memory="28GB")
59
+ >>> ms.runtime.set_memory(max_size="28GB")
60
60
  >>> ms.set_auto_parallel_context(parallel_mode=ms.ParallelMode.SEMI_AUTO_PARALLEL)
61
61
  >>> init()
62
62
  >>> ms.set_seed(1)
@@ -24,6 +24,9 @@ class Layout:
24
24
  """
25
25
  Parallel layout describes the detailed sharding information.
26
26
 
27
+ For more detailed information, refer to the file `Higher-order Operator-level Parallelism
28
+ <https://www.mindspore.cn/docs/en/master/model_train/parallel/advanced_operator_parallel.html>`_.
29
+
27
30
  Note:
28
31
  - It is valid only in semi auto parallel or auto parallel mode.
29
32
  - The multiplication result of the `device_matrix` must be equal to the device count in a pipeline stage.