mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (455) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +64 -83
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +47 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +177 -52
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +338 -208
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +2 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +84 -133
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +47 -38
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +69 -23
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +1 -0
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +4 -44
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +425 -19
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +125 -101
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +488 -620
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +86 -85
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +2 -4
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/lamb.py +1 -3
  188. mindspore/nn/optim/optimizer.py +1 -1
  189. mindspore/nn/optim/tft_wrapper.py +2 -3
  190. mindspore/nn/optim/thor.py +2 -2
  191. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  192. mindspore/nn/probability/distribution/exponential.py +2 -1
  193. mindspore/nn/probability/distribution/poisson.py +2 -1
  194. mindspore/nn/sparse/sparse.py +3 -3
  195. mindspore/nn/wrap/cell_wrapper.py +73 -42
  196. mindspore/nn/wrap/grad_reducer.py +37 -52
  197. mindspore/nn/wrap/loss_scale.py +72 -74
  198. mindspore/numpy/array_creations.py +7 -7
  199. mindspore/numpy/fft.py +1 -1
  200. mindspore/numpy/math_ops.py +1 -1
  201. mindspore/numpy/utils_const.py +1 -1
  202. mindspore/opencv_core452.dll +0 -0
  203. mindspore/opencv_imgcodecs452.dll +0 -0
  204. mindspore/opencv_imgproc452.dll +0 -0
  205. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  206. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  207. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  208. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  209. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  210. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  211. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  212. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
  213. mindspore/ops/auto_generate/gen_extend_func.py +5 -55
  214. mindspore/ops/auto_generate/gen_ops_def.py +753 -273
  215. mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
  216. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  217. mindspore/ops/composite/__init__.py +10 -0
  218. mindspore/ops/composite/base.py +9 -5
  219. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  220. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  221. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  222. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  223. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  224. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  225. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  226. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  227. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  228. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  229. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  230. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  231. mindspore/ops/function/__init__.py +4 -1
  232. mindspore/ops/function/_add_attr_func.py +11 -6
  233. mindspore/ops/function/array_func.py +17 -100
  234. mindspore/ops/function/debug_func.py +8 -5
  235. mindspore/ops/function/grad/grad_func.py +5 -13
  236. mindspore/ops/function/math_func.py +65 -399
  237. mindspore/ops/function/nn_func.py +44 -61
  238. mindspore/ops/function/other_func.py +4 -1
  239. mindspore/ops/function/random_func.py +31 -4
  240. mindspore/ops/functional.py +2 -3
  241. mindspore/ops/functional_overload.py +486 -18
  242. mindspore/ops/op_info_register.py +21 -0
  243. mindspore/ops/operations/__init__.py +5 -2
  244. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  245. mindspore/ops/operations/_inner_ops.py +14 -18
  246. mindspore/ops/operations/_sequence_ops.py +1 -1
  247. mindspore/ops/operations/array_ops.py +4 -50
  248. mindspore/ops/operations/comm_ops.py +186 -41
  249. mindspore/ops/operations/custom_ops.py +244 -175
  250. mindspore/ops/operations/debug_ops.py +55 -4
  251. mindspore/ops/operations/image_ops.py +13 -13
  252. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  253. mindspore/ops/operations/math_ops.py +8 -9
  254. mindspore/ops/operations/nn_ops.py +6 -7
  255. mindspore/ops/primitive.py +9 -20
  256. mindspore/ops/tensor_method.py +52 -11
  257. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  258. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  259. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  260. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  261. mindspore/ops_generate/common/base_generator.py +14 -0
  262. mindspore/ops_generate/common/gen_constants.py +7 -2
  263. mindspore/ops_generate/common/gen_utils.py +0 -19
  264. mindspore/ops_generate/common/op_proto.py +11 -4
  265. mindspore/ops_generate/common/template.py +88 -11
  266. mindspore/ops_generate/gen_ops.py +1 -1
  267. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  268. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  269. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  270. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  271. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  272. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  273. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  274. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  275. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  276. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  277. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  278. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  279. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  280. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  281. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  282. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  283. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  284. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  285. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  286. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  287. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  288. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  289. mindspore/parallel/_auto_parallel_context.py +9 -17
  290. mindspore/parallel/_cell_wrapper.py +106 -40
  291. mindspore/parallel/_parallel_serialization.py +4 -3
  292. mindspore/parallel/_ps_context.py +4 -6
  293. mindspore/parallel/_tensor.py +167 -12
  294. mindspore/parallel/_transformer/moe.py +1 -1
  295. mindspore/parallel/_transformer/transformer.py +17 -12
  296. mindspore/parallel/_utils.py +5 -11
  297. mindspore/parallel/auto_parallel.py +33 -12
  298. mindspore/parallel/checkpoint_convert.py +3 -3
  299. mindspore/parallel/checkpoint_transform.py +5 -1
  300. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  301. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  302. mindspore/parallel/cluster/run.py +48 -7
  303. mindspore/parallel/function/__init__.py +8 -1
  304. mindspore/parallel/function/reshard_func.py +7 -6
  305. mindspore/parallel/nn/__init__.py +15 -2
  306. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  307. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  308. mindspore/parallel/shard.py +9 -23
  309. mindspore/parallel/transform_safetensors.py +468 -174
  310. mindspore/pgodb140.dll +0 -0
  311. mindspore/pgort140.dll +0 -0
  312. mindspore/profiler/__init__.py +2 -1
  313. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  314. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  315. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  316. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  317. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  318. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  319. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  321. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  322. mindspore/profiler/analysis/task_manager.py +1 -1
  323. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  324. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  325. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  326. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  327. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  328. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  329. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  330. mindspore/profiler/common/constant.py +16 -0
  331. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  332. mindspore/profiler/common/path_manager.py +9 -0
  333. mindspore/profiler/common/profiler_context.py +50 -29
  334. mindspore/profiler/common/profiler_info.py +0 -16
  335. mindspore/profiler/common/profiler_meta_data.py +1 -0
  336. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  337. mindspore/profiler/common/profiler_output_path.py +23 -8
  338. mindspore/profiler/common/profiler_parameters.py +128 -35
  339. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  340. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  341. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  342. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  343. mindspore/profiler/dynamic_profiler.py +374 -338
  344. mindspore/profiler/envprofiler.py +42 -12
  345. mindspore/profiler/experimental_config.py +112 -7
  346. mindspore/profiler/mstx.py +33 -12
  347. mindspore/profiler/platform/__init__.py +2 -3
  348. mindspore/profiler/platform/cpu_profiler.py +10 -4
  349. mindspore/profiler/platform/npu_profiler.py +30 -20
  350. mindspore/profiler/profiler.py +218 -154
  351. mindspore/profiler/profiler_action_controller.py +65 -77
  352. mindspore/profiler/profiler_interface.py +2 -2
  353. mindspore/profiler/schedule.py +10 -4
  354. mindspore/rewrite/common/config.py +1 -0
  355. mindspore/rewrite/common/namer.py +1 -0
  356. mindspore/rewrite/common/namespace.py +1 -0
  357. mindspore/rewrite/node/node.py +31 -11
  358. mindspore/rewrite/parsers/assign_parser.py +1 -1
  359. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  360. mindspore/run_check/_check_version.py +7 -10
  361. mindspore/runtime/__init__.py +8 -6
  362. mindspore/runtime/event.py +10 -4
  363. mindspore/runtime/executor.py +87 -45
  364. mindspore/runtime/memory.py +22 -30
  365. mindspore/runtime/thread_bind_core.py +299 -165
  366. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  367. mindspore/swresample-4.dll +0 -0
  368. mindspore/swscale-6.dll +0 -0
  369. mindspore/tbbmalloc.dll +0 -0
  370. mindspore/tinyxml2.dll +0 -0
  371. mindspore/train/_utils.py +9 -5
  372. mindspore/train/amp.py +43 -23
  373. mindspore/train/callback/__init__.py +5 -5
  374. mindspore/train/callback/_callback.py +2 -1
  375. mindspore/train/callback/_checkpoint.py +4 -14
  376. mindspore/train/callback/_flops_collector.py +11 -7
  377. mindspore/train/callback/_landscape.py +0 -1
  378. mindspore/train/callback/_train_fault_tolerance.py +72 -18
  379. mindspore/train/data_sink.py +15 -6
  380. mindspore/train/dataset_helper.py +14 -5
  381. mindspore/train/model.py +49 -47
  382. mindspore/train/serialization.py +168 -126
  383. mindspore/train/summary/summary_record.py +13 -2
  384. mindspore/train/train_thor/model_thor.py +2 -2
  385. mindspore/turbojpeg.dll +0 -0
  386. mindspore/utils/__init__.py +3 -2
  387. mindspore/utils/dryrun.py +0 -6
  388. mindspore/utils/runtime_execution_order_check.py +162 -78
  389. mindspore/utils/sdc_detect.py +68 -0
  390. mindspore/utils/utils.py +14 -17
  391. mindspore/vcmeta.dll +0 -0
  392. mindspore/vcruntime140.dll +0 -0
  393. mindspore/vcruntime140_1.dll +0 -0
  394. mindspore/version.py +1 -1
  395. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  396. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
  397. mindspore/_deprecated/jit.py +0 -198
  398. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  399. mindspore/communication/_hccl_management.py +0 -297
  400. mindspore/experimental/es/embedding_service.py +0 -891
  401. mindspore/experimental/es/embedding_service_layer.py +0 -581
  402. mindspore/profiler/common/validator/__init__.py +0 -14
  403. mindspore/profiler/common/validator/validate_path.py +0 -84
  404. mindspore/profiler/parser/__init__.py +0 -14
  405. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  406. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  407. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  408. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  409. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  410. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  411. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  412. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  413. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  414. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  415. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  416. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  417. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  418. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  419. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  420. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  421. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  422. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  423. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  424. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  425. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  426. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  427. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  428. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  429. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  430. mindspore/profiler/parser/container.py +0 -229
  431. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  432. mindspore/profiler/parser/flops_parser.py +0 -531
  433. mindspore/profiler/parser/framework_enum.py +0 -111
  434. mindspore/profiler/parser/framework_parser.py +0 -464
  435. mindspore/profiler/parser/framework_struct.py +0 -61
  436. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  437. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  438. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  439. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  440. mindspore/profiler/parser/hccl_parser.py +0 -573
  441. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  442. mindspore/profiler/parser/integrator.py +0 -526
  443. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  444. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  445. mindspore/profiler/parser/minddata_parser.py +0 -186
  446. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  447. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  448. mindspore/profiler/parser/optime_parser.py +0 -250
  449. mindspore/profiler/parser/profiler_info.py +0 -213
  450. mindspore/profiler/parser/step_trace_parser.py +0 -666
  451. mindspore/utils/hooks.py +0 -81
  452. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  453. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  454. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  455. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,8 @@ import socket
22
22
  import psutil
23
23
  import mindspore.log as logger
24
24
  from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url, \
25
- _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip
25
+ _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip, _generate_auto_bind_core_strategy, \
26
+ _generate_bind_core_strategy
26
27
 
27
28
 
28
29
  class _Node:
@@ -79,11 +80,12 @@ class _ComputeGraphNode(_Node):
79
80
  Worker node for dynamic networking. Inherits from the Node class.
80
81
  """
81
82
 
82
- def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file,
83
+ def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, node_rank, args_list, output_file,
83
84
  tail_worker_log, join, is_simulation):
84
85
  super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file,
85
86
  tail_worker_log, join, is_simulation)
86
87
  self.node_id = node_id
88
+ self.node_rank = node_rank
87
89
 
88
90
  def run(self):
89
91
  """
@@ -95,6 +97,8 @@ class _ComputeGraphNode(_Node):
95
97
  super().run()
96
98
  if self.node_id is not None:
97
99
  os.environ["MS_NODE_ID"] = str(self.node_id)
100
+ if self.node_rank is not None:
101
+ os.environ["MS_NODE_RANK"] = str(self.node_rank)
98
102
  # If simulation level is set, environment variable 'MS_ROLE' will not be set.
99
103
  if not self.is_simulation:
100
104
  os.environ["MS_ROLE"] = "MS_WORKER"
@@ -119,6 +123,9 @@ class _ComputeGraphNode(_Node):
119
123
  return subprocess.Popen(['/usr/bin/tail', '-f', self.output_file])
120
124
 
121
125
  def enable_tail_worker_log(self):
126
+ """
127
+ Get valid rank ID for tailing the corresponding worker log.
128
+ """
122
129
  tail_worker_log_list = []
123
130
  if self.tail_worker_log != "-1":
124
131
  tail_worker_log_list.extend([int(num) for num in self.tail_worker_log.split(',')])
@@ -169,12 +176,15 @@ class _ProcessManager:
169
176
 
170
177
  self.sim_level = args.sim_level
171
178
  self.sim_rank_id = args.sim_rank_id
172
- self.is_simulation = (self.sim_level != -1)
179
+ self.is_simulation = self.sim_level != -1
173
180
  if self.is_simulation:
174
181
  os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
182
+ if self.sim_rank_id == -1:
183
+ self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
175
184
  elif os.getenv("MS_SIMULATION_LEVEL"):
176
185
  self.is_simulation = True
177
- self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
186
+ if self.sim_rank_id == -1:
187
+ self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
178
188
  if os.getenv("RANK_SIZE"):
179
189
  self.exported_rank_size = os.getenv("RANK_SIZE")
180
190
  # If sim_rank_id is set, single worker can be started.
@@ -205,15 +215,24 @@ class _ProcessManager:
205
215
  finally:
206
216
  os.umask(origin_mask)
207
217
 
218
+ self.device_to_cpu_map = {}
219
+ if self.bind_core is True:
220
+ self.device_to_cpu_map = _generate_auto_bind_core_strategy(self.local_worker_num)
221
+
208
222
  self.proc_rank_map = {}
209
223
  self.enable_mindx = False
224
+ self._check_taskd()
225
+
226
+ def _check_taskd(self):
227
+ """check if enable taskd."""
210
228
  tft_env = os.getenv("MS_ENABLE_TFT", "")
211
- if ("TTP:1" in tft_env) or ("UCE:1" in tft_env) or ("ARF:1" in tft_env):
229
+ if any(v in tft_env for v in ('TTP:1', 'UCE:1', 'ARF:1', 'TSP:1', 'RSC:1', 'HCCE:1')):
212
230
  try:
213
231
  from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
214
232
  self.msmgr = MSRunPlugin()
215
233
  self.msmgr.register_callbacks("KILL_WORKER", self.kill_workers)
216
234
  self.msmgr.register_callbacks("START_ALL_WORKER", self.start_all_workers)
235
+ self.msmgr.register_callbacks("START_WORKER_LIST", self.start_worker_list)
217
236
  self.msmgr.register_callbacks("MONITOR", self.monitor_rank_status)
218
237
  self.enable_mindx = True
219
238
  os.environ["MS_ENABLE_RECOVERY"] = str(1)
@@ -261,6 +280,45 @@ class _ProcessManager:
261
280
  self.is_simulation)
262
281
  self.msn_process = msn.run()
263
282
 
283
+ def _start_single_worker(self, local_rank):
284
+ """
285
+ Start worker processor
286
+
287
+ Args:
288
+ local_rank: local rank id.
289
+ """
290
+ os.environ["DEVICE_ID"] = str(local_rank)
291
+ node_id, log_name = self._get_node_id_and_log_path(local_rank)
292
+ if node_id is None:
293
+ logger.warning(f"Rank ids will be assigned automatically, "
294
+ "please use 'grep -rn 'rank id:' command to check each worker log's rank id.")
295
+ else:
296
+ # If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
297
+ # This is for rank_table method's compatibility consideration.
298
+ os.environ["RANK_ID"] = str(node_id)
299
+ print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
300
+ f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
301
+ if self.is_simulation and (self.sim_rank_id != -1):
302
+ # Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
303
+ os.environ["RANK_ID"] = str(self.sim_rank_id)
304
+ logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
305
+
306
+ if self.bind_core:
307
+ affinity_cpu_str = _generate_bind_core_strategy(local_rank, self.device_to_cpu_map, self.bind_core)
308
+ if affinity_cpu_str is not None:
309
+ cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, affinity_cpu_str)
310
+ else:
311
+ cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
312
+ else:
313
+ cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
314
+ cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
315
+ node_id, self.node_rank, cmd, log_name, self.tail_worker_log, self.join,
316
+ self.is_simulation)
317
+ process, tail_process = cgn.run()
318
+ self.cgn_processes.append(process)
319
+ self.tail_cgn_processes.append(tail_process)
320
+ self.proc_rank_map[local_rank] = process
321
+
264
322
  def start_workers(self):
265
323
  """
266
324
  Starts the worker nodes.
@@ -275,40 +333,8 @@ class _ProcessManager:
275
333
  "'rank_id' of each process will be assigned after cluster is successfully built.\n"
276
334
  "You can access 'RANK_ID' environment variable after calling "
277
335
  "'mindspore.communication.init()'")
278
-
279
336
  for i in range(self.local_worker_num):
280
- os.environ["DEVICE_ID"] = str(i)
281
- node_id, log_name = self._get_node_id_and_log_path(i)
282
- if node_id is None:
283
- logger.warning(f"Rank ids will be assigned automatically, "
284
- "please use 'grep -rn 'rank id:' command to check each worker log's rank id.")
285
- else:
286
- # If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
287
- # This is for rank_table method's compatibility consideration.
288
- os.environ["RANK_ID"] = str(node_id)
289
- print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
290
- f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
291
- if self.is_simulation and (self.sim_rank_id != -1):
292
- # Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
293
- os.environ["RANK_ID"] = str(self.sim_rank_id)
294
- logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
295
-
296
- if self.bind_core:
297
- cpu_num = subprocess.getoutput("cat /proc/cpuinfo|grep processor|wc -l")
298
- if not cpu_num.isdigit():
299
- raise RuntimeError(f"Got cpu number from '/proc/cpuinfo' is {cpu_num}, failed to bind core.")
300
- avg = int(cpu_num) // self.local_worker_num
301
- cpu_start = avg * i
302
- cpu_end = cpu_start + avg - 1
303
- cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, cpu_start, cpu_end)
304
- else:
305
- cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
306
- cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
307
- node_id, cmd, log_name, self.tail_worker_log, self.join, self.is_simulation)
308
- process, tail_process = cgn.run()
309
- self.cgn_processes.append(process)
310
- self.tail_cgn_processes.append(tail_process)
311
- self.proc_rank_map[i] = process
337
+ self._start_single_worker(i)
312
338
 
313
339
  def join_processes(self):
314
340
  """
@@ -334,7 +360,7 @@ class _ProcessManager:
334
360
  continue
335
361
  elif ret_code != 0:
336
362
  has_exception = True
337
- logger.error(f"Worker process {p.pid} exit with exception.")
363
+ logger.error(f"Worker process {p.pid} exit with exception. Error code: {ret_code}.")
338
364
  break
339
365
  else:
340
366
  success_cgn_processes.add(p)
@@ -420,14 +446,9 @@ class _ProcessManager:
420
446
  Args:
421
447
  NA.
422
448
  """
423
- for p in self.cgn_processes:
424
- if p.poll() is None:
425
- p.kill()
449
+ self.kill_worker_processes()
450
+ self.kill_tail_log_processes()
426
451
  self.cgn_processes.clear()
427
-
428
- for p in self.tail_cgn_processes:
429
- if p is not None:
430
- p.kill()
431
452
  self.tail_cgn_processes.clear()
432
453
 
433
454
  def kill_single_worker(self, pid):
@@ -441,7 +462,7 @@ class _ProcessManager:
441
462
  for i in range(len(self.cgn_processes)):
442
463
  p = self.cgn_processes[i]
443
464
  if p.pid == pid and p.poll() is None:
444
- p.kill()
465
+ os.killpg(os.getpgid(p.pid), signal.SIGKILL)
445
466
  del self.cgn_processes[i]
446
467
  tail_p = self.tail_cgn_processes[i]
447
468
  if tail_p is not None:
@@ -499,7 +520,8 @@ class _ProcessManager:
499
520
  p_status = p.poll()
500
521
  if (not psutil.pid_exists(p.pid)) and (p_status != 0):
501
522
  p_status = 300
502
- return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id}
523
+ return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id, "local_rank": rank_id,
524
+ "node_id": self.node_rank}
503
525
  except KeyError:
504
526
  logger.info(f"Process rank {rank_id} has not been initialized.")
505
527
  return {"pid": None, "status": 200, "global_rank": global_rank_id}
@@ -519,7 +541,24 @@ class _ProcessManager:
519
541
  self.start_workers()
520
542
  worker_status = self.monitor_rank_status([-1])
521
543
  for i in range(self.local_worker_num):
522
- if worker_status[i]["status"] != None: # pylint: disable=singleton-comparison
544
+ if worker_status[i]["status"] is not None:
545
+ return 1
546
+ return 0
547
+
548
+ def start_worker_list(self, rank_ids):
549
+ """
550
+ Start worker processor by rank list.
551
+
552
+ Args:
553
+ rank_ids: worker process's local rank list, which is also device_id.
554
+ """
555
+ if not isinstance(rank_ids, list):
556
+ raise TypeError(f"The type of 'rank_ids' must be a list, but got:{rank_ids}")
557
+ for idx in rank_ids:
558
+ self._start_single_worker(idx)
559
+ worker_status = self.monitor_rank_status(rank_ids)
560
+ for i in rank_ids:
561
+ if worker_status[i]["status"] is not None:
523
562
  return 1
524
563
  return 0
525
564
 
@@ -18,6 +18,8 @@ import json
18
18
  import socket
19
19
  import ipaddress
20
20
  import mindspore.log as logger
21
+ from mindspore.runtime.thread_bind_core import _get_physical_device_id, _get_cpu_available, \
22
+ _auto_generate_strategy, _equal_distribution_strategy
21
23
 
22
24
  CURRENT_IP = None
23
25
 
@@ -45,19 +47,19 @@ def _generate_cmd_args_list(cmd, cmd_args):
45
47
  return [cmd] + cmd_args
46
48
 
47
49
 
48
- def _generate_cmd_args_list_with_core(cmd, cmd_args, cpu_start, cpu_end):
50
+ def _generate_cmd_args_list_with_core(cmd, cmd_args, affinity_cpu_str):
49
51
  """
50
52
  Generates arguments list for 'Popen'. It consists of a binary file name and subsequential arguments.
51
53
  """
52
54
  # Bind cpu cores to this process.
53
- taskset_args = ['taskset'] + ['-c'] + [str(cpu_start) + '-' + str(cpu_end)]
55
+ taskset_args = ['taskset'] + ['-c'] + [affinity_cpu_str]
54
56
  final_cmd = []
55
57
  if cmd not in ['python', 'pytest', 'python3']:
56
58
  # If user don't set binary file name, defaulty use 'python' to launch the job.
57
59
  final_cmd = taskset_args + ['python'] + [cmd] + cmd_args
58
60
  else:
59
61
  final_cmd = taskset_args + [cmd] + cmd_args
60
- logger.info(f"Launch process with command: {' '.join(final_cmd)}")
62
+ logger.warning(f"Launch process with command: {' '.join(final_cmd)}")
61
63
  return final_cmd
62
64
 
63
65
 
@@ -83,8 +85,8 @@ def _get_local_ip(ip_address):
83
85
  CURRENT_IP = s.getsockname()[0]
84
86
  s.close()
85
87
  except Exception as e:
86
- raise RuntimeError(f"Get local ip failed: {e}. Please check whether an accessible address "
87
- "is input by '--master_address'.")
88
+ raise RuntimeError("Get local ip has failed. Please verify that the accessible address has been "
89
+ "specified in the '--master_address' parameter") from e
88
90
  return CURRENT_IP
89
91
 
90
92
 
@@ -124,8 +126,8 @@ def _convert_addr_to_ip(master_addr):
124
126
  logger.info(f"Convert input host name:{master_addr} to ip address:{ip_address}.")
125
127
  return ip_address
126
128
  except socket.gaierror as e:
127
- raise RuntimeError(f"DNS resolution failed: {e}. Please check whether a correct host name "
128
- "is input by '--master_address'.")
129
+ raise RuntimeError("DNS resolution has failed. Please verify that the correct hostname has been "
130
+ "specified in the '--master_address' parameter") from e
129
131
 
130
132
 
131
133
  def _send_scale_num(url, scale_num):
@@ -134,3 +136,89 @@ def _send_scale_num(url, scale_num):
134
136
 
135
137
  """
136
138
  return ""
139
+
140
+
141
+ def _parse_global_device_to_cpu_map(local_rank_id, physical_device_id, device_to_cpu_map):
142
+ """
143
+ Parse the global device_to_cpu_map and return a cpu list for assigned local_rank_id.
144
+
145
+ """
146
+ input_device_id = int(list(device_to_cpu_map.keys())[local_rank_id].replace("device", ""))
147
+ if physical_device_id != input_device_id:
148
+ return ""
149
+ affinity_cpu_list = list(device_to_cpu_map.values())[local_rank_id]
150
+ affinity_cpu_str = ",".join(affinity_cpu_list)
151
+ return affinity_cpu_str
152
+
153
+
154
+ def _generate_auto_bind_core_strategy(local_worker_num):
155
+ """
156
+ Get device to core range assigned for the all processes.
157
+
158
+ """
159
+ simulation_level = os.getenv("MS_SIMULATION_LEVEL", "").strip()
160
+
161
+ try:
162
+ available_cpus = _get_cpu_available()
163
+ except RuntimeError as e:
164
+ logger.warning(f"Failed to acquire available cpu info, error: {e} Will not launch process with taskset.")
165
+ return {}
166
+
167
+ if not simulation_level:
168
+ device_to_cpu_map = _auto_generate_strategy(local_worker_num, available_cpus)
169
+ else:
170
+ device_to_cpu_map = _equal_distribution_strategy(local_worker_num, available_cpus)
171
+
172
+ return device_to_cpu_map
173
+
174
+
175
+ def ranges_to_str(num_list):
176
+ """
177
+ Convert a num list to a range string.
178
+
179
+ """
180
+ ranges = []
181
+ start = num_list[0]
182
+ for i in range(1, len(num_list)):
183
+ if num_list[i] != num_list[i-1] + 1:
184
+ ranges.append((start, num_list[i-1]))
185
+ start = num_list[i]
186
+ ranges.append((start, num_list[-1]))
187
+
188
+ parts = []
189
+ for start, end in ranges:
190
+ if start == end:
191
+ parts.append(str(start))
192
+ else:
193
+ parts.append(f"{start}-{end}")
194
+ return ",".join(parts)
195
+
196
+
197
+ def _generate_bind_core_strategy(local_rank_id, device_to_cpu_map, arg_bind_core):
198
+ """
199
+ Get device to core range assigned for the all processes.
200
+
201
+ """
202
+ affinity_cpu_str = ""
203
+ cpu_list_for_device = []
204
+ simulation_level = os.getenv("MS_SIMULATION_LEVEL", "").strip()
205
+
206
+ try:
207
+ physical_device_id = _get_physical_device_id(local_rank_id, simulation_level)
208
+ except RuntimeError as e:
209
+ logger.warning(f"Failed to acquire device id, error: {e} Will not launch process with taskset.")
210
+ return None
211
+
212
+ if isinstance(arg_bind_core, dict):
213
+ affinity_cpu_str = _parse_global_device_to_cpu_map(local_rank_id, physical_device_id, arg_bind_core)
214
+ if not affinity_cpu_str:
215
+ logger.warning(f"Failed to find physical_device_id[{physical_device_id}] for "
216
+ f"process[{local_rank_id}]. Will not launch process with taskset.")
217
+ return None
218
+ elif arg_bind_core is True:
219
+ cpu_list_for_device = device_to_cpu_map.get(physical_device_id, [])
220
+ if not cpu_list_for_device:
221
+ return None
222
+ os.environ["MSRUN_CPU_LIST"] = str(cpu_list_for_device)
223
+ affinity_cpu_str = ranges_to_str(cpu_list_for_device)
224
+ return affinity_cpu_str
@@ -14,9 +14,47 @@
14
14
  # ============================================================================
15
15
  """Entrypoint of ms_run"""
16
16
  import ast
17
- from argparse import REMAINDER, ArgumentParser
17
+ import re
18
+ import json
19
+ from argparse import REMAINDER, ArgumentParser, ArgumentTypeError
18
20
  from .process_entity import _ProcessManager
19
21
 
22
+
23
+ def parse_and_validate_bind_core(value):
24
+ """
25
+ Parse input argument of --bind_core.
26
+
27
+ """
28
+ if value.lower() == "true":
29
+ return True
30
+ if value.lower() == "false":
31
+ return False
32
+
33
+ try:
34
+ value_dict = json.loads(value)
35
+ except json.JSONDecodeError as e:
36
+ raise ArgumentTypeError("Failed to parse JSON into a dictionary") from e
37
+
38
+ if isinstance(value_dict, dict):
39
+ range_pattern = re.compile(r'^\d+-\d+$')
40
+ for device_id, affinity_cpu_list in value_dict.items():
41
+ if not re.fullmatch(r"device\d+", device_id):
42
+ raise ArgumentTypeError(f"Key '{device_id}' must be in format 'deviceX' (X ≥ 0).")
43
+ if not isinstance(affinity_cpu_list, list):
44
+ raise ArgumentTypeError(f"Value for '{device_id}':{affinity_cpu_list} should be a list, "
45
+ f"but got {type(affinity_cpu_list)}.")
46
+
47
+ for cpu_range in affinity_cpu_list:
48
+ if not isinstance(cpu_range, str):
49
+ raise ArgumentTypeError(f"CPU range '{cpu_range}' in '{affinity_cpu_list}' should be a string.")
50
+ if not range_pattern.match(cpu_range):
51
+ raise ArgumentTypeError(f"CPU range '{cpu_range}' in '{affinity_cpu_list}' should be "
52
+ "in format 'cpuidX-cpuidY'.")
53
+ return value_dict
54
+
55
+ raise ArgumentTypeError(f"Type of {value} should be bool or dict, but got {type(value)}.")
56
+
57
+
20
58
  def get_args():
21
59
  """
22
60
  Parses and retrieves command-line arguments.
@@ -77,23 +115,26 @@ def get_args():
77
115
  parser.add_argument(
78
116
  "--bind_core",
79
117
  default=False,
80
- type=ast.literal_eval,
81
- choices=[True, False],
82
- help="specifies whether msrun should bind cpu cores to spawned processes."
118
+ type=parse_and_validate_bind_core,
119
+ help="specifies whether msrun should bind CPU cores to spawned processes. "
120
+ "If set to True, msrun will bind core based on the environment automatically, "
121
+ "and if passed a dict, msrun will bind core based on this dict information."
83
122
  )
84
123
  parser.add_argument(
85
124
  "--sim_level",
86
125
  default=-1,
87
126
  type=int,
88
127
  choices=[0, 1, 2, 3],
89
- help="specifies simulation level. When this argument is set, msrun only spawns one process "
90
- "but export RANK_SIZE with value worker_num and RANK_ID with value sim_rank_id."
128
+ help="specifies simulation level. This argument activates dryrun mode, functioning "
129
+ "equivalently to environment variable 'MS_SIMULATION_LEVEL' while having higher priority."
91
130
  )
92
131
  parser.add_argument(
93
132
  "--sim_rank_id",
94
133
  default=-1,
95
134
  type=int,
96
- help="specifies simulation process's rank id. Only one process is spawned in simulation scenario."
135
+ help="specifies simulation process's rank id. When this argument is set, only one process "
136
+ "is spawned on dryrun mode, functioning equivalently to environment variable 'RANK_ID' "
137
+ "while having higher priority."
97
138
  )
98
139
  parser.add_argument(
99
140
  "--rank_table_file",
@@ -16,8 +16,15 @@
16
16
  """
17
17
  Parallel function operator
18
18
  """
19
+ from __future__ import absolute_import
19
20
 
20
- from mindspore.parallel.function.reshard_func import reshard
21
+ from . import (
22
+ reshard_func
23
+ )
24
+
25
+ from .reshard_func import (
26
+ reshard
27
+ )
21
28
 
22
29
  __all__ = []
23
30
  __all__.extend(reshard_func.__all__)
@@ -42,11 +42,12 @@ def reshard(tensor, layout):
42
42
  can check :class:`mindspore.parallel.Layout` for reference.
43
43
 
44
44
  Note:
45
- - In the Graph mode, this function can set the sharding propagation strategy of a tensor.
46
- For those tensor do not manually be set, their strategies are decided by the sharding
47
- strategy propagation algorithm automatically.
48
- - In PyNative mode, you can use this method to arrange tensors in a cell (that is, cells
49
- that use Cell.shard/F.shard in PyNative mode) that is executed in parallel in graph mode.
45
+ In the Graph mode, this function can set the sharding propagation strategy of a tensor.
46
+ For those tensor do not manually be set, their strategies are decided by the sharding
47
+ strategy propagation algorithm automatically.
48
+
49
+ .. warning::
50
+ The method is currently not supported in PyNative mode.
50
51
 
51
52
  Args:
52
53
  tensor (Tensor): The tensor to be set the sharding strategy.
@@ -235,7 +236,7 @@ def _redistribute(tensor, dst_dtensor_info):
235
236
  global REDIST_CELL_CACHE
236
237
  redist_cache_key = (f"{src_layout_info['device_matrix']}, {src_layout_info['tensor_map']} -> "
237
238
  f"{dst_layout_info['device_matrix']}, {dst_layout_info['tensor_map']}")
238
- if redist_cache_key in REDIST_CELL_CACHE.keys():
239
+ if redist_cache_key in REDIST_CELL_CACHE:
239
240
  logger.debug(f"redist_cache_key is {redist_cache_key}, match cache")
240
241
  redist_func = REDIST_CELL_CACHE[redist_cache_key]
241
242
  else:
@@ -17,8 +17,21 @@ Interfaces for parallel-related functionality
17
17
  """
18
18
  from __future__ import absolute_import
19
19
 
20
- from mindspore.parallel.nn.parallel_grad_reducer import PipelineGradReducer
21
- from mindspore.parallel.nn.parallel_cell_wrapper import PipelineCell, Pipeline, MicroBatchInterleaved, GradAccumulation
20
+ from . import (
21
+ parallel_grad_reducer,
22
+ parallel_cell_wrapper
23
+ )
24
+
25
+ from .parallel_grad_reducer import (
26
+ PipelineGradReducer
27
+ )
28
+
29
+ from .parallel_cell_wrapper import (
30
+ PipelineCell,
31
+ Pipeline,
32
+ MicroBatchInterleaved,
33
+ GradAccumulation
34
+ )
22
35
 
23
36
  __all__ = []
24
37
  __all__.extend(parallel_grad_reducer.__all__)
@@ -17,6 +17,8 @@
17
17
  from __future__ import absolute_import
18
18
  from __future__ import division
19
19
 
20
+ __all__ = ['PipelineCell', 'Pipeline', 'MicroBatchInterleaved', 'GradAccumulation']
21
+
20
22
  from mindspore import nn
21
23
  from mindspore.ops import operations as P
22
24
  from mindspore.nn.cell import Cell
@@ -24,12 +26,10 @@ from mindspore.nn.wrap.cell_wrapper import _MicroBatch
24
26
  from mindspore import log as logger
25
27
 
26
28
 
27
- __all__ = ['PipelineCell', 'Pipeline', 'MicroBatchInterleaved', 'GradAccumulation']
28
-
29
-
30
29
  class PipelineCell(Cell):
31
30
  """
32
- Slice MiniBatch into finer-grained MicroBatch for use in pipeline-parallel training.
31
+ Slice MiniBatch into finer-grained MicroBatch for use in pipeline-parallel training,
32
+ and specify the segment info.
33
33
 
34
34
  Note:
35
35
  micro_size must be greater or equal to pipeline stages.
@@ -38,6 +38,8 @@ class PipelineCell(Cell):
38
38
  network (Cell): The target network to wrap.
39
39
  micro_size (int): MicroBatch size.
40
40
  stage_config (dict, optional): The stage configuration for each cell's execution in pipeline parallel.
41
+ segment_config (dict, optional): The segment configuration for each cell's execution in pipeline parallel.
42
+ Default ``None``.
41
43
 
42
44
  Supported Platforms:
43
45
  ``Ascend``
@@ -49,7 +51,7 @@ class PipelineCell(Cell):
49
51
  >>> net = LeNet5()
50
52
  >>> net = nn.PipelineCell(net, 4, stage_config={"cell_name_0": 0, "cell_name_1": 1})
51
53
  """
52
- def __init__(self, network, micro_size, stage_config=None):
54
+ def __init__(self, network, micro_size, stage_config=None, segment_config=None):
53
55
  super(PipelineCell, self).__init__(auto_prefix=False)
54
56
  self.network = network
55
57
  self.micro_inputs = nn.CellList()
@@ -101,15 +103,46 @@ class PipelineCell(Cell):
101
103
  " config stage num:" + str(config_stage_num))
102
104
  logger.warning("network:" + str(self.network))
103
105
  logger.warning("cell name available:")
104
- for cell_name, cell in self.network.cells_and_names():
106
+ for cell_name, _ in self.network.cells_and_names():
105
107
  logger.warning(cell_name)
106
108
  raise KeyError("For 'PipelineCell', the argument 'stage_config' : {} is not "
107
109
  "found in 'network' : {}".format(config_dict, network))
108
-
109
- def construct(self, *inputs):
110
+ if segment_config is None:
111
+ return
112
+ self._config_segment(segment_config)
113
+
114
+
115
+ def _config_segment(self, segment_config):
116
+ """
117
+ Config segment num for cell.
118
+ """
119
+ config_dict = segment_config.copy()
120
+
121
+ for cell_name, cell in self.network.cells_and_names():
122
+ if cell_name in segment_config:
123
+ setattr(cell, "pipeline_segment", segment_config[cell_name])
124
+ del config_dict[cell_name]
125
+ if str(self.network) in segment_config:
126
+ setattr(self.network, "pipeline_segment", segment_config[str(self.network)])
127
+ del config_dict[str(self.network)]
128
+ # if there are any config elements left, print them
129
+ if config_dict:
130
+ for config_cell_name, config_segment_num in config_dict.items():
131
+ logger.error("pipeline_cell segment_config set pipeline_segment fail!")
132
+ logger.warning("config cell name:" + str(config_cell_name) +
133
+ " config segment num:" + str(config_segment_num))
134
+ logger.warning("network:" + str(self.network))
135
+ logger.warning("cell name available:")
136
+ for cell_name, _ in self.network.cells_and_names():
137
+ logger.warning(cell_name)
138
+ raise KeyError("For 'PipelineCell', the argument 'segment_config' : {} is not "
139
+ "found in 'network' : {}".format(config_dict, self.network))
140
+
141
+
142
+ def construct(self, *args, **kwargs):
110
143
  ret = None
111
144
  for i in range(self.micro_size):
112
- micro_input = self.micro_inputs[i](i, *inputs)
145
+ micro_input = self.micro_inputs[i](i, *args, **kwargs)
113
146
  output = self.network(*micro_input)
114
147
  if ret is not None:
115
148
  ret = self.add_list[i](ret, output)
@@ -120,7 +153,8 @@ class PipelineCell(Cell):
120
153
 
121
154
  class Pipeline(PipelineCell):
122
155
  """
123
- Specify the number of micro_batch for pipeline parallelism and the division rules for stage.
156
+ Specify the number of micro_batch for pipeline parallelism and the division rules for stage,
157
+ and specify the segment info.
124
158
 
125
159
  Note:
126
160
  micro_size must be greater or equal to pipeline stages.
@@ -129,6 +163,8 @@ class Pipeline(PipelineCell):
129
163
  network (Cell): The target network to wrap.
130
164
  micro_size (int): MicroBatch size.
131
165
  stage_config (dict, optional): Stage configuration for cell's execution in pipeline parallel. Default ``None``.
166
+ segment_config (dict, optional): The segment configuration for each cell's execution in pipeline parallel.
167
+ Default ``None``.
132
168
 
133
169
  Raises:
134
170
  TypeError: The type of `net` is not cell.
@@ -197,10 +233,10 @@ class MicroBatchInterleaved(Cell):
197
233
  self.interleave_inputs.append(interleave_data)
198
234
  self._get_attr_from_cell(network)
199
235
 
200
- def construct(self, *inputs):
236
+ def construct(self, *args, **kwargs):
201
237
  output = 0.0
202
238
  for i in range(self.interleave_num):
203
- interleave_input = self.interleave_inputs[i](i, *inputs)
239
+ interleave_input = self.interleave_inputs[i](i, *args, **kwargs)
204
240
  output = self.add(output, self.network(*interleave_input))
205
241
  return output
206
242
 
@@ -251,10 +287,10 @@ class GradAccumulation(Cell):
251
287
  self.add_list.append(self.add)
252
288
  self._get_attr_from_cell(network)
253
289
 
254
- def construct(self, *inputs):
290
+ def construct(self, *args, **kwargs):
255
291
  ret = None
256
292
  for i in range(self.micro_size):
257
- micro_input = self.micro_inputs[i](i, *inputs)
293
+ micro_input = self.micro_inputs[i](i, *args, **kwargs)
258
294
  output = self.network(*micro_input)
259
295
  if ret is not None:
260
296
  ret = self.add_list[i](ret, output)