mindspore 2.5.0__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (493) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +6 -4
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_check_jit_forbidden_api.py +3 -0
  9. mindspore/_checkparam.py +3 -33
  10. mindspore/_deprecated/__init__.py +17 -0
  11. mindspore/_deprecated/jit.py +198 -0
  12. mindspore/_extends/builtin_operations.py +1 -1
  13. mindspore/_extends/parse/__init__.py +6 -7
  14. mindspore/_extends/parse/compile_config.py +19 -0
  15. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +22 -3
  16. mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
  17. mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
  18. mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
  19. mindspore/_extends/parse/parser.py +25 -194
  20. mindspore/_extends/parse/resources.py +1 -5
  21. mindspore/_extends/parse/standard_method.py +109 -75
  22. mindspore/_extends/pijit/__init__.py +2 -2
  23. mindspore/_extends/pijit/pijit_func_white_list.py +16 -11
  24. mindspore/_extends/pijit/tensor_func_list.py +27 -0
  25. mindspore/_extends/utils.py +1 -1
  26. mindspore/amp.py +4 -4
  27. mindspore/atlprov.dll +0 -0
  28. mindspore/avcodec-59.dll +0 -0
  29. mindspore/avdevice-59.dll +0 -0
  30. mindspore/avfilter-8.dll +0 -0
  31. mindspore/avformat-59.dll +0 -0
  32. mindspore/avutil-57.dll +0 -0
  33. mindspore/boost/__init__.py +2 -2
  34. mindspore/boost/base.py +3 -7
  35. mindspore/boost/boost_cell_wrapper.py +2 -2
  36. mindspore/c1.dll +0 -0
  37. mindspore/c1xx.dll +0 -0
  38. mindspore/c2.dll +0 -0
  39. mindspore/common/__init__.py +4 -3
  40. mindspore/common/_grad_function.py +56 -0
  41. mindspore/common/_pijit_context.py +14 -5
  42. mindspore/common/_register_for_tensor.py +1 -1
  43. mindspore/common/_stub_tensor.py +5 -10
  44. mindspore/common/_tensor_cpp_method.py +1 -1
  45. mindspore/common/_tensor_docs.py +2014 -3386
  46. mindspore/common/api.py +386 -355
  47. mindspore/common/auto_dynamic_shape.py +41 -44
  48. mindspore/common/dtype.py +5 -2
  49. mindspore/common/dump.py +7 -5
  50. mindspore/common/file_system.py +3 -0
  51. mindspore/common/generator.py +3 -0
  52. mindspore/common/hook_handle.py +5 -3
  53. mindspore/common/initializer.py +10 -6
  54. mindspore/common/jit_begin_end.py +94 -0
  55. mindspore/common/jit_config.py +6 -1
  56. mindspore/common/jit_context.py +76 -0
  57. mindspore/common/jit_trace.py +378 -0
  58. mindspore/common/lazy_inline.py +2 -2
  59. mindspore/common/mutable.py +5 -4
  60. mindspore/common/parameter.py +106 -39
  61. mindspore/common/seed.py +2 -2
  62. mindspore/common/sparse_tensor.py +23 -17
  63. mindspore/common/tensor.py +332 -714
  64. mindspore/communication/__init__.py +7 -5
  65. mindspore/communication/_comm_helper.py +47 -2
  66. mindspore/communication/comm_func.py +70 -53
  67. mindspore/communication/management.py +83 -17
  68. mindspore/context.py +228 -571
  69. mindspore/dataset/__init__.py +44 -20
  70. mindspore/dataset/audio/__init__.py +2 -8
  71. mindspore/dataset/audio/transforms.py +3 -17
  72. mindspore/dataset/core/config.py +3 -3
  73. mindspore/dataset/engine/cache_client.py +1 -1
  74. mindspore/dataset/engine/datasets.py +102 -120
  75. mindspore/dataset/engine/datasets_audio.py +22 -22
  76. mindspore/dataset/engine/datasets_standard_format.py +43 -24
  77. mindspore/dataset/engine/datasets_text.py +78 -85
  78. mindspore/dataset/engine/datasets_user_defined.py +109 -77
  79. mindspore/dataset/engine/datasets_vision.py +111 -108
  80. mindspore/dataset/engine/iterators.py +5 -3
  81. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
  82. mindspore/dataset/engine/samplers.py +279 -57
  83. mindspore/dataset/engine/serializer_deserializer.py +2 -1
  84. mindspore/dataset/engine/validators.py +10 -0
  85. mindspore/dataset/text/__init__.py +7 -6
  86. mindspore/dataset/text/transforms.py +6 -5
  87. mindspore/dataset/text/utils.py +3 -3
  88. mindspore/dataset/transforms/__init__.py +0 -9
  89. mindspore/dataset/transforms/transforms.py +3 -3
  90. mindspore/dataset/utils/browse_dataset.py +1 -1
  91. mindspore/dataset/vision/__init__.py +2 -9
  92. mindspore/dataset/vision/transforms.py +202 -158
  93. mindspore/dataset/vision/utils.py +7 -5
  94. mindspore/device_context/ascend/op_debug.py +60 -1
  95. mindspore/device_context/ascend/op_tuning.py +0 -4
  96. mindspore/device_manager.py +39 -3
  97. mindspore/dnnl.dll +0 -0
  98. mindspore/dpcmi.dll +0 -0
  99. mindspore/experimental/es/embedding_service.py +35 -27
  100. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -2
  101. mindspore/experimental/map_parameter.py +4 -4
  102. mindspore/experimental/optim/adadelta.py +22 -26
  103. mindspore/experimental/optim/adagrad.py +4 -4
  104. mindspore/experimental/optim/adam.py +4 -0
  105. mindspore/experimental/optim/adamax.py +4 -4
  106. mindspore/experimental/optim/adamw.py +4 -0
  107. mindspore/experimental/optim/asgd.py +1 -1
  108. mindspore/experimental/optim/lr_scheduler.py +40 -22
  109. mindspore/experimental/optim/radam.py +5 -5
  110. mindspore/experimental/optim/rprop.py +1 -1
  111. mindspore/experimental/optim/sgd.py +1 -1
  112. mindspore/hal/contiguous_tensors_handle.py +6 -10
  113. mindspore/hal/device.py +55 -81
  114. mindspore/hal/event.py +38 -55
  115. mindspore/hal/memory.py +115 -147
  116. mindspore/hal/stream.py +81 -125
  117. mindspore/include/dataset/constants.h +7 -4
  118. mindspore/include/dataset/execute.h +2 -2
  119. mindspore/jpeg62.dll +0 -0
  120. mindspore/log.py +40 -2
  121. mindspore/mindrecord/__init__.py +20 -7
  122. mindspore/mindspore_backend_common.dll +0 -0
  123. mindspore/mindspore_backend_manager.dll +0 -0
  124. mindspore/mindspore_common.dll +0 -0
  125. mindspore/mindspore_core.dll +0 -0
  126. mindspore/mindspore_dump.dll +0 -0
  127. mindspore/mindspore_frontend.dll +0 -0
  128. mindspore/mindspore_glog.dll +0 -0
  129. mindspore/mindspore_memory_pool.dll +0 -0
  130. mindspore/mindspore_ms_backend.dll +0 -0
  131. mindspore/mindspore_ops.dll +0 -0
  132. mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
  133. mindspore/mindspore_ops_kernel_common.dll +0 -0
  134. mindspore/mindspore_profiler.dll +0 -0
  135. mindspore/mindspore_pyboost.dll +0 -0
  136. mindspore/mindspore_pynative.dll +0 -0
  137. mindspore/mindspore_res_manager.dll +0 -0
  138. mindspore/mindspore_runtime_pipeline.dll +0 -0
  139. mindspore/mint/__init__.py +133 -702
  140. mindspore/mint/distributed/__init__.py +5 -1
  141. mindspore/mint/distributed/distributed.py +198 -113
  142. mindspore/mint/linalg/__init__.py +2 -0
  143. mindspore/mint/nn/__init__.py +280 -18
  144. mindspore/mint/nn/functional.py +282 -64
  145. mindspore/mint/nn/layer/__init__.py +4 -0
  146. mindspore/mint/nn/layer/_functions.py +7 -3
  147. mindspore/mint/nn/layer/activation.py +120 -13
  148. mindspore/mint/nn/layer/conv.py +234 -28
  149. mindspore/mint/nn/layer/normalization.py +15 -16
  150. mindspore/mint/nn/layer/padding.py +1 -1
  151. mindspore/mint/nn/layer/pooling.py +66 -1
  152. mindspore/mint/optim/__init__.py +2 -1
  153. mindspore/mint/optim/sgd.py +171 -0
  154. mindspore/msobj140.dll +0 -0
  155. mindspore/mspdb140.dll +0 -0
  156. mindspore/mspdbcore.dll +0 -0
  157. mindspore/mspdbst.dll +0 -0
  158. mindspore/mspft140.dll +0 -0
  159. mindspore/msvcdis140.dll +0 -0
  160. mindspore/msvcp140_1.dll +0 -0
  161. mindspore/msvcp140_2.dll +0 -0
  162. mindspore/msvcp140_atomic_wait.dll +0 -0
  163. mindspore/msvcp140_codecvt_ids.dll +0 -0
  164. mindspore/nn/__init__.py +4 -1
  165. mindspore/nn/cell.py +1253 -179
  166. mindspore/nn/layer/activation.py +23 -21
  167. mindspore/nn/layer/basic.py +22 -16
  168. mindspore/nn/layer/container.py +1 -1
  169. mindspore/nn/layer/conv.py +53 -42
  170. mindspore/nn/layer/embedding.py +9 -8
  171. mindspore/nn/layer/normalization.py +48 -42
  172. mindspore/nn/layer/pooling.py +75 -31
  173. mindspore/nn/layer/transformer.py +11 -10
  174. mindspore/nn/learning_rate_schedule.py +4 -2
  175. mindspore/nn/loss/loss.py +27 -19
  176. mindspore/nn/optim/ada_grad.py +6 -5
  177. mindspore/nn/optim/adadelta.py +9 -7
  178. mindspore/nn/optim/adafactor.py +1 -1
  179. mindspore/nn/optim/adam.py +18 -14
  180. mindspore/nn/optim/adamax.py +8 -7
  181. mindspore/nn/optim/adasum.py +5 -5
  182. mindspore/nn/optim/asgd.py +3 -1
  183. mindspore/nn/optim/ftrl.py +11 -9
  184. mindspore/nn/optim/lamb.py +1 -1
  185. mindspore/nn/optim/lazyadam.py +12 -10
  186. mindspore/nn/optim/momentum.py +7 -6
  187. mindspore/nn/optim/optimizer.py +2 -2
  188. mindspore/nn/optim/proximal_ada_grad.py +12 -10
  189. mindspore/nn/optim/rmsprop.py +13 -12
  190. mindspore/nn/optim/rprop.py +9 -7
  191. mindspore/nn/optim/sgd.py +9 -6
  192. mindspore/nn/optim/tft_wrapper.py +5 -2
  193. mindspore/nn/probability/bijector/bijector.py +17 -11
  194. mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
  195. mindspore/nn/probability/bijector/invert.py +2 -2
  196. mindspore/nn/probability/bijector/scalar_affine.py +3 -3
  197. mindspore/nn/probability/bijector/softplus.py +3 -2
  198. mindspore/nn/probability/distribution/beta.py +3 -3
  199. mindspore/nn/probability/distribution/categorical.py +1 -1
  200. mindspore/nn/probability/distribution/cauchy.py +4 -2
  201. mindspore/nn/probability/distribution/exponential.py +6 -7
  202. mindspore/nn/probability/distribution/gamma.py +2 -2
  203. mindspore/nn/probability/distribution/gumbel.py +2 -2
  204. mindspore/nn/probability/distribution/half_normal.py +5 -3
  205. mindspore/nn/probability/distribution/logistic.py +5 -3
  206. mindspore/nn/probability/distribution/poisson.py +1 -1
  207. mindspore/nn/probability/distribution/uniform.py +5 -3
  208. mindspore/nn/reinforcement/_tensors_queue.py +1 -1
  209. mindspore/nn/reinforcement/tensor_array.py +1 -1
  210. mindspore/nn/wrap/__init__.py +6 -6
  211. mindspore/nn/wrap/cell_wrapper.py +178 -117
  212. mindspore/nn/wrap/grad_reducer.py +45 -36
  213. mindspore/nn/wrap/loss_scale.py +3 -3
  214. mindspore/numpy/array_creations.py +3 -3
  215. mindspore/numpy/array_ops.py +1 -1
  216. mindspore/numpy/utils.py +1 -2
  217. mindspore/numpy/utils_const.py +1 -2
  218. mindspore/opencv_core452.dll +0 -0
  219. mindspore/opencv_imgcodecs452.dll +0 -0
  220. mindspore/opencv_imgproc452.dll +0 -0
  221. mindspore/ops/__init__.py +3 -2
  222. mindspore/ops/_grad_experimental/grad_comm_ops.py +18 -3
  223. mindspore/ops/_grad_experimental/grad_debug_ops.py +8 -1
  224. mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
  225. mindspore/ops/_register_for_op.py +0 -11
  226. mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
  227. mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -4
  228. mindspore/ops/_vmap/vmap_array_ops.py +32 -6
  229. mindspore/ops/_vmap/vmap_grad_nn_ops.py +2 -1
  230. mindspore/ops/_vmap/vmap_math_ops.py +4 -7
  231. mindspore/ops/_vmap/vmap_nn_ops.py +9 -8
  232. mindspore/ops/auto_generate/__init__.py +4 -3
  233. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +127 -52
  234. mindspore/ops/auto_generate/gen_extend_func.py +286 -208
  235. mindspore/ops/auto_generate/gen_ops_def.py +2783 -2335
  236. mindspore/ops/auto_generate/gen_ops_prim.py +8992 -2686
  237. mindspore/ops/auto_generate/pyboost_inner_prim.py +106 -76
  238. mindspore/ops/composite/__init__.py +2 -1
  239. mindspore/ops/composite/base.py +19 -24
  240. mindspore/ops/composite/math_ops.py +6 -16
  241. mindspore/ops/composite/multitype_ops/__init__.py +5 -2
  242. mindspore/ops/composite/multitype_ops/_compile_utils.py +4 -5
  243. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
  244. mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
  245. mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
  246. mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
  247. mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
  248. mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
  249. mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
  250. mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
  251. mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
  252. mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
  253. mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
  254. mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
  255. mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
  256. mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
  257. mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
  258. mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
  259. mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
  260. mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
  261. mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
  262. mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
  263. mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
  264. mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
  265. mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
  266. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
  267. mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
  268. mindspore/ops/composite/multitype_ops/pow_impl.py +2 -1
  269. mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
  270. mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
  271. mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
  272. mindspore/ops/function/__init__.py +28 -2
  273. mindspore/ops/function/_add_attr_func.py +58 -0
  274. mindspore/ops/function/array_func.py +1631 -2347
  275. mindspore/ops/function/clip_func.py +38 -45
  276. mindspore/ops/function/debug_func.py +36 -44
  277. mindspore/ops/function/grad/__init__.py +1 -0
  278. mindspore/ops/function/grad/grad_func.py +104 -71
  279. mindspore/ops/function/image_func.py +1 -1
  280. mindspore/ops/function/linalg_func.py +46 -78
  281. mindspore/ops/function/math_func.py +3024 -3855
  282. mindspore/ops/function/nn_func.py +678 -274
  283. mindspore/ops/function/other_func.py +159 -1
  284. mindspore/ops/function/parameter_func.py +17 -30
  285. mindspore/ops/function/random_func.py +216 -361
  286. mindspore/ops/function/reshard_func.py +4 -70
  287. mindspore/ops/function/sparse_func.py +3 -3
  288. mindspore/ops/function/sparse_unary_func.py +5 -5
  289. mindspore/ops/function/spectral_func.py +25 -58
  290. mindspore/ops/function/vmap_func.py +26 -18
  291. mindspore/ops/functional.py +8 -5
  292. mindspore/ops/functional_overload.py +655 -4
  293. mindspore/ops/op_info_register.py +32 -244
  294. mindspore/ops/operations/__init__.py +21 -14
  295. mindspore/ops/operations/_custom_ops_utils.py +235 -0
  296. mindspore/ops/operations/_grad_ops.py +1 -10
  297. mindspore/ops/operations/_inner_ops.py +5 -76
  298. mindspore/ops/operations/_ms_kernel.py +4 -10
  299. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  300. mindspore/ops/operations/_scalar_ops.py +3 -2
  301. mindspore/ops/operations/_sequence_ops.py +1 -1
  302. mindspore/ops/operations/_tensor_array.py +1 -1
  303. mindspore/ops/operations/array_ops.py +39 -24
  304. mindspore/ops/operations/comm_ops.py +150 -107
  305. mindspore/ops/operations/custom_ops.py +287 -32
  306. mindspore/ops/operations/debug_ops.py +119 -16
  307. mindspore/ops/operations/inner_ops.py +1 -1
  308. mindspore/ops/operations/linalg_ops.py +1 -58
  309. mindspore/ops/operations/manually_defined/_inner.py +1 -1
  310. mindspore/ops/operations/manually_defined/ops_def.py +746 -79
  311. mindspore/ops/operations/math_ops.py +21 -18
  312. mindspore/ops/operations/nn_ops.py +67 -224
  313. mindspore/ops/operations/other_ops.py +62 -9
  314. mindspore/ops/operations/random_ops.py +13 -7
  315. mindspore/ops/operations/reshard_ops.py +1 -1
  316. mindspore/ops/operations/sparse_ops.py +2 -2
  317. mindspore/ops/primitive.py +43 -32
  318. mindspore/ops/tensor_method.py +243 -17
  319. mindspore/ops_generate/__init__.py +0 -5
  320. mindspore/ops_generate/aclnn/__init__.py +0 -0
  321. mindspore/ops_generate/{aclnn_kernel_register_auto_cc_generator.py → aclnn/aclnn_kernel_register_auto_cc_generator.py} +43 -18
  322. mindspore/ops_generate/{gen_aclnn_implement.py → aclnn/gen_aclnn_implement.py} +49 -51
  323. mindspore/ops_generate/api/__init__.py +0 -0
  324. mindspore/ops_generate/{add_tensor_docs_generator.py → api/add_tensor_docs_generator.py} +9 -7
  325. mindspore/ops_generate/{cpp_create_prim_instance_helper_generator.py → api/cpp_create_prim_instance_helper_generator.py} +6 -9
  326. mindspore/ops_generate/{functional_map_cpp_generator.py → api/functional_map_cpp_generator.py} +25 -12
  327. mindspore/ops_generate/{functional_overload_py_generator.py → api/functional_overload_py_generator.py} +8 -6
  328. mindspore/ops_generate/{functions_cc_generator.py → api/functions_cc_generator.py} +14 -10
  329. mindspore/ops_generate/api/gen_api.py +103 -0
  330. mindspore/ops_generate/{op_api_proto.py → api/op_api_proto.py} +98 -69
  331. mindspore/ops_generate/{tensor_func_reg_cpp_generator.py → api/tensor_func_reg_cpp_generator.py} +82 -43
  332. mindspore/ops_generate/common/__init__.py +0 -0
  333. mindspore/ops_generate/common/gen_constants.py +91 -0
  334. mindspore/ops_generate/{gen_utils.py → common/gen_utils.py} +72 -19
  335. mindspore/ops_generate/{op_proto.py → common/op_proto.py} +64 -1
  336. mindspore/ops_generate/{template.py → common/template.py} +96 -84
  337. mindspore/ops_generate/gen_ops.py +23 -325
  338. mindspore/ops_generate/op_def/__init__.py +0 -0
  339. mindspore/ops_generate/op_def/gen_op_def.py +90 -0
  340. mindspore/ops_generate/{lite_ops_cpp_generator.py → op_def/lite_ops_cpp_generator.py} +47 -11
  341. mindspore/ops_generate/{ops_def_cc_generator.py → op_def/ops_def_cc_generator.py} +18 -10
  342. mindspore/ops_generate/{ops_def_h_generator.py → op_def/ops_def_h_generator.py} +5 -5
  343. mindspore/ops_generate/{ops_name_h_generator.py → op_def/ops_name_h_generator.py} +30 -15
  344. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
  345. mindspore/ops_generate/op_def_py/__init__.py +0 -0
  346. mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
  347. mindspore/ops_generate/{op_def_py_generator.py → op_def_py/op_def_py_generator.py} +6 -5
  348. mindspore/ops_generate/{op_prim_py_generator.py → op_def_py/op_prim_py_generator.py} +24 -15
  349. mindspore/ops_generate/pyboost/__init__.py +0 -0
  350. mindspore/ops_generate/{auto_grad_impl_cc_generator.py → pyboost/auto_grad_impl_cc_generator.py} +11 -7
  351. mindspore/ops_generate/{auto_grad_reg_cc_generator.py → pyboost/auto_grad_reg_cc_generator.py} +7 -7
  352. mindspore/ops_generate/{gen_pyboost_func.py → pyboost/gen_pyboost_func.py} +40 -16
  353. mindspore/ops_generate/{op_template_parser.py → pyboost/op_template_parser.py} +105 -24
  354. mindspore/ops_generate/{pyboost_functions_cpp_generator.py → pyboost/pyboost_functions_cpp_generator.py} +55 -18
  355. mindspore/ops_generate/{pyboost_functions_h_generator.py → pyboost/pyboost_functions_h_generator.py} +42 -10
  356. mindspore/ops_generate/{pyboost_functions_py_generator.py → pyboost/pyboost_functions_py_generator.py} +6 -6
  357. mindspore/ops_generate/{pyboost_grad_function_cpp_generator.py → pyboost/pyboost_grad_function_cpp_generator.py} +11 -10
  358. mindspore/ops_generate/{pyboost_inner_prim_generator.py → pyboost/pyboost_inner_prim_generator.py} +8 -7
  359. mindspore/ops_generate/{pyboost_native_grad_functions_generator.py → pyboost/pyboost_native_grad_functions_generator.py} +14 -10
  360. mindspore/ops_generate/{pyboost_op_cpp_code_generator.py → pyboost/pyboost_op_cpp_code_generator.py} +140 -53
  361. mindspore/ops_generate/{pyboost_overload_functions_cpp_generator.py → pyboost/pyboost_overload_functions_cpp_generator.py} +28 -15
  362. mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +88 -4
  363. mindspore/ops_generate/resources/__init__.py +0 -0
  364. mindspore/ops_generate/resources/resource_list.py +30 -0
  365. mindspore/ops_generate/resources/resource_loader.py +36 -0
  366. mindspore/ops_generate/resources/resource_manager.py +64 -0
  367. mindspore/ops_generate/resources/yaml_loader.py +88 -0
  368. mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
  369. mindspore/parallel/__init__.py +6 -2
  370. mindspore/parallel/_auto_parallel_context.py +140 -12
  371. mindspore/parallel/_cell_wrapper.py +132 -15
  372. mindspore/parallel/_parallel_serialization.py +95 -4
  373. mindspore/parallel/_ps_context.py +1 -1
  374. mindspore/parallel/_recovery_context.py +7 -2
  375. mindspore/parallel/_tensor.py +142 -18
  376. mindspore/parallel/_utils.py +198 -25
  377. mindspore/parallel/algo_parameter_config.py +3 -3
  378. mindspore/parallel/auto_parallel.py +732 -0
  379. mindspore/parallel/checkpoint_convert.py +159 -0
  380. mindspore/parallel/checkpoint_transform.py +658 -37
  381. mindspore/parallel/cluster/process_entity/_api.py +151 -19
  382. mindspore/parallel/cluster/run.py +1 -1
  383. mindspore/parallel/function/__init__.py +24 -0
  384. mindspore/parallel/function/reshard_func.py +258 -0
  385. mindspore/parallel/nn/__init__.py +25 -0
  386. mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
  387. mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
  388. mindspore/parallel/parameter_broadcast.py +24 -13
  389. mindspore/parallel/shard.py +137 -62
  390. mindspore/parallel/transform_safetensors.py +288 -95
  391. mindspore/pgodb140.dll +0 -0
  392. mindspore/pgort140.dll +0 -0
  393. mindspore/profiler/__init__.py +9 -5
  394. mindspore/profiler/analysis/parser/ascend_cann_parser.py +6 -2
  395. mindspore/profiler/analysis/parser/ms_framework_parser.py +4 -4
  396. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -4
  397. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +25 -0
  398. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  399. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +241 -86
  400. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +41 -2
  401. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +33 -35
  402. mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +7 -0
  403. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +8 -3
  404. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +141 -30
  405. mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +5 -6
  406. mindspore/profiler/common/ascend_msprof_exporter.py +5 -4
  407. mindspore/profiler/common/constant.py +12 -0
  408. mindspore/profiler/common/msprof_cmd_tool.py +42 -23
  409. mindspore/profiler/common/path_manager.py +24 -0
  410. mindspore/profiler/common/profiler_context.py +26 -2
  411. mindspore/profiler/common/profiler_meta_data.py +74 -0
  412. mindspore/profiler/common/profiler_parameters.py +59 -18
  413. mindspore/profiler/common/profiler_path_manager.py +66 -7
  414. mindspore/profiler/dynamic_profiler.py +112 -79
  415. mindspore/profiler/envprofiler.py +26 -1
  416. mindspore/profiler/experimental_config.py +197 -0
  417. mindspore/profiler/mstx.py +57 -14
  418. mindspore/profiler/platform/npu_profiler.py +33 -7
  419. mindspore/profiler/profiler.py +541 -45
  420. mindspore/profiler/profiler_action_controller.py +1 -1
  421. mindspore/profiler/profiler_interface.py +4 -0
  422. mindspore/profiler/schedule.py +57 -22
  423. mindspore/rewrite/api/node.py +15 -13
  424. mindspore/rewrite/api/symbol_tree.py +1 -1
  425. mindspore/run_check/_check_version.py +25 -14
  426. mindspore/run_check/run_check.py +1 -1
  427. mindspore/runtime/__init__.py +2 -2
  428. mindspore/runtime/executor.py +40 -11
  429. mindspore/runtime/memory.py +37 -13
  430. mindspore/safeguard/rewrite_obfuscation.py +12 -9
  431. mindspore/swresample-4.dll +0 -0
  432. mindspore/swscale-6.dll +0 -0
  433. mindspore/tbbmalloc.dll +0 -0
  434. mindspore/tinyxml2.dll +0 -0
  435. mindspore/train/__init__.py +8 -8
  436. mindspore/train/_utils.py +43 -9
  437. mindspore/train/amp.py +1 -1
  438. mindspore/train/callback/__init__.py +2 -2
  439. mindspore/train/callback/_callback.py +2 -16
  440. mindspore/train/callback/_checkpoint.py +24 -40
  441. mindspore/train/callback/_cluster_monitor.py +14 -18
  442. mindspore/train/callback/_flops_collector.py +2 -3
  443. mindspore/train/callback/_history.py +7 -4
  444. mindspore/train/callback/_lambda_callback.py +2 -2
  445. mindspore/train/callback/_landscape.py +0 -3
  446. mindspore/train/callback/_loss_monitor.py +2 -1
  447. mindspore/train/callback/_on_request_exit.py +6 -5
  448. mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
  449. mindspore/train/callback/_summary_collector.py +8 -13
  450. mindspore/train/callback/_time_monitor.py +2 -1
  451. mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -105
  452. mindspore/train/data_sink.py +25 -2
  453. mindspore/train/dataset_helper.py +4 -5
  454. mindspore/train/loss_scale_manager.py +8 -7
  455. mindspore/train/metrics/accuracy.py +3 -3
  456. mindspore/train/metrics/confusion_matrix.py +9 -9
  457. mindspore/train/metrics/error.py +3 -3
  458. mindspore/train/metrics/hausdorff_distance.py +4 -4
  459. mindspore/train/metrics/mean_surface_distance.py +3 -3
  460. mindspore/train/metrics/metric.py +0 -12
  461. mindspore/train/metrics/occlusion_sensitivity.py +4 -2
  462. mindspore/train/metrics/precision.py +8 -6
  463. mindspore/train/metrics/recall.py +9 -9
  464. mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
  465. mindspore/train/mind_ir_pb2.py +19 -12
  466. mindspore/train/model.py +262 -127
  467. mindspore/train/serialization.py +246 -988
  468. mindspore/train/summary/_summary_adapter.py +2 -2
  469. mindspore/train/summary/summary_record.py +1 -1
  470. mindspore/turbojpeg.dll +0 -0
  471. mindspore/utils/__init__.py +3 -2
  472. mindspore/utils/dryrun.py +4 -2
  473. mindspore/utils/hooks.py +81 -0
  474. mindspore/utils/runtime_execution_order_check.py +2 -0
  475. mindspore/utils/utils.py +138 -4
  476. mindspore/vcmeta.dll +0 -0
  477. mindspore/vcruntime140.dll +0 -0
  478. mindspore/vcruntime140_1.dll +0 -0
  479. mindspore/version.py +1 -1
  480. {mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/METADATA +2 -1
  481. {mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/RECORD +485 -440
  482. mindspore/_install_custom.py +0 -43
  483. mindspore/common/_register_for_adapter.py +0 -74
  484. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
  485. mindspore/ops/auto_generate/gen_arg_handler.py +0 -136
  486. mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
  487. mindspore/ops_generate/gen_constants.py +0 -190
  488. mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
  489. mindspore/ops_generate/ops_primitive_h_generator.py +0 -81
  490. /mindspore/ops_generate/{base_generator.py → common/base_generator.py} +0 -0
  491. {mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/WHEEL +0 -0
  492. {mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/entry_points.txt +0 -0
  493. {mindspore-2.5.0.dist-info → mindspore-2.6.0.dist-info}/top_level.txt +0 -0
@@ -25,12 +25,12 @@ from mindspore.train import amp
25
25
  from mindspore.train.amp import build_train_network
26
26
  from mindspore.train.loss_scale_manager import LossScaleManager, FixedLossScaleManager, DynamicLossScaleManager
27
27
  from mindspore.train.serialization import save_checkpoint, load_checkpoint, load_param_into_net, export, \
28
- load, parse_print, build_searched_strategy, merge_sliced_parameter, load_distributed_checkpoint, \
29
- async_ckpt_thread_status, restore_group_info_list, convert_model, obfuscate_model, export_split_mindir, \
30
- load_checkpoint_async, check_checkpoint, get_ckpt_path_with_strategy, ckpt_to_safetensors, safetensors_to_ckpt
28
+ load, parse_print, async_ckpt_thread_status, convert_model, export_split_mindir, \
29
+ load_checkpoint_async, check_checkpoint, get_ckpt_path_with_strategy, ckpt_to_safetensors, safetensors_to_ckpt, \
30
+ build_searched_strategy, merge_sliced_parameter, load_distributed_checkpoint, restore_group_info_list
31
31
  from mindspore.train.callback import Callback, LossMonitor, TimeMonitor, ModelCheckpoint, SummaryCollector, \
32
32
  CheckpointConfig, RunContext, LearningRateScheduler, SummaryLandscape, FlopsUtilizationCollector, \
33
- History, LambdaCallback, ReduceLROnPlateau, EarlyStopping, OnRequestExit, BackupAndRestore, TFTRegister
33
+ History, LambdaCallback, ReduceLROnPlateau, EarlyStopping, OnRequestExit, BackupAndRestore, TrainFaultTolerance
34
34
  from mindspore.train.summary import SummaryRecord
35
35
  from mindspore.train.train_thor import ConvertNetUtils, ConvertModelUtils
36
36
  from mindspore.train.metrics import *
@@ -38,10 +38,10 @@ from mindspore.train.data_sink import data_sink
38
38
 
39
39
  __all__ = ["Model", "DatasetHelper", "connect_network_with_dataset", "build_train_network", "LossScaleManager",
40
40
  "FixedLossScaleManager", "DynamicLossScaleManager", "save_checkpoint", "load_checkpoint", "check_checkpoint",
41
- "load_param_into_net", "export", "load", "export_split_mindir", "parse_print", "build_searched_strategy",
42
- "merge_sliced_parameter", "load_distributed_checkpoint", "async_ckpt_thread_status",
43
- "restore_group_info_list", "convert_model", "data_sink", "obfuscate_model", "load_checkpoint_async",
44
- "get_ckpt_path_with_strategy", "ckpt_to_safetensors", "safetensors_to_ckpt"]
41
+ "load_param_into_net", "export", "load", "export_split_mindir", "parse_print", "async_ckpt_thread_status",
42
+ "convert_model", "data_sink", "load_checkpoint_async", "get_ckpt_path_with_strategy", "ckpt_to_safetensors",
43
+ "safetensors_to_ckpt", "build_searched_strategy", "merge_sliced_parameter", "load_distributed_checkpoint",
44
+ "restore_group_info_list"]
45
45
  __all__.extend(callback.__all__)
46
46
  __all__.extend(summary.__all__)
47
47
  __all__.extend(train_thor.__all__)
mindspore/train/_utils.py CHANGED
@@ -16,6 +16,7 @@
16
16
  from __future__ import absolute_import
17
17
 
18
18
  import os
19
+ import sys
19
20
  import json
20
21
  from collections.abc import Iterable
21
22
 
@@ -23,7 +24,7 @@ import time
23
24
  import numpy as np
24
25
 
25
26
  from mindspore.common.tensor import Tensor
26
- from mindspore._c_expression import Tensor as Tensor_
27
+ from mindspore._c_expression import TensorPy as Tensor_
27
28
  from mindspore._c_expression import MSContext, ms_ctx_param
28
29
  from mindspore.common.dtype import dtype_to_nptype, pytype_to_dtype
29
30
  from mindspore.common import dtype as mstype
@@ -31,7 +32,7 @@ from mindspore import context
31
32
  from mindspore import log as logger
32
33
  from mindspore import _checkparam as Validator
33
34
  from mindspore.common.api import _cell_graph_executor
34
- from mindspore.communication import get_group_size
35
+ from mindspore.communication.management import get_rank, get_group_size
35
36
  from mindspore.train.mind_ir_pb2 import ModelProto as mindir_model
36
37
  from mindspore.train.checkpoint_pb2 import Checkpoint
37
38
  from mindspore.train.node_strategy_pb2 import ParallelStrategyMap as ckpt_strategy
@@ -64,6 +65,7 @@ def _get_types_and_shapes(dataset):
64
65
  dataset_shapes = dataset.output_shapes()
65
66
  return dataset_types, dataset_shapes
66
67
 
68
+
67
69
  def enable_data_broadcast():
68
70
  """Get status to indicate if enable dataset broadcast."""
69
71
  return MSContext.get_instance().get_param(ms_ctx_param.dataset_broadcast_opt_level) > 0
@@ -321,9 +323,15 @@ def parse_strategy_ckpt(file_name):
321
323
  def _get_strategy_opt_shard(param_redundancy_dict, parameter_layout_opt_shard):
322
324
  """Strategy ckpt append opt shard."""
323
325
  for key, value in parameter_layout_opt_shard.items():
324
- if value[1] not in (-1, 0):
325
- opt_para_num = value[1]
326
+ if value[1] != 0:
326
327
  param_redundancy_ranks = param_redundancy_dict.get(key)
328
+ if value[1] != -1:
329
+ opt_para_num = value[1]
330
+ elif param_redundancy_ranks:
331
+ opt_para_num = len(param_redundancy_ranks) * len(param_redundancy_ranks[0]) // value[0]
332
+ else:
333
+ raise ValueError(f"For get_parameter_redundancy, the format of the parallel communication domain for "
334
+ f"the optimizer is incorrect.")
327
335
  res = []
328
336
  for param_ranks in param_redundancy_ranks:
329
337
  if len(param_ranks) % opt_para_num == 0:
@@ -375,20 +383,40 @@ def _get_parameter_redundancy_without_opt_shard(parameter_layout, param_redundan
375
383
  param_redundancy_dict[key] = tuple(redundancy_list)
376
384
 
377
385
 
378
- def get_parameter_redundancy(layout_obj, initial_rank=0):
386
+ def _get_initial_rank(parameter_layout):
387
+ """Get the initial rank of pp."""
388
+ for k, _ in parameter_layout.items():
389
+ dev_matrix = parameter_layout[k][0]
390
+ break
391
+ dev_num = 1
392
+ if dev_matrix:
393
+ for i in dev_matrix:
394
+ dev_num *= i
395
+ rank_id = get_rank()
396
+ initial_rank = (rank_id // dev_num) * dev_num
397
+ return initial_rank
398
+
399
+
400
+ def _get_pp_size_from_redundancy_map(param_redundancy):
401
+ """Get pp size from redundancy map."""
402
+ for _, v in param_redundancy.items():
403
+ return len(v) * len(v[0])
404
+
405
+
406
+ def get_parameter_redundancy(layout_obj, initial_rank=None):
379
407
  """
380
408
  Get parameter redundancy map.
381
409
 
382
410
  Args:
383
411
  layout_obj (Union[str, layout): File name of `strategy.ckpt` or net.parameter_layout_dict.
384
- initial_rank (int): Start rank id for each pipeline. Default: 0.
412
+ initial_rank (int): Start rank id for each pipeline. Default: ``None``.
385
413
 
386
414
  Returns:
387
415
  Dict, dict of parameter redundancy info.
388
416
 
389
417
  Examples:
390
418
  >>> from mindspore.train.utils import get_parameter_redundancy
391
- >>> param_redundancy_dict = get_parameter_redundancy("/path/to/strategy.ckpt")
419
+ >>> param_redundancy_dict = get_parameter_redundancy("/path/to/strategy.ckpt", initial_rank=0)
392
420
  {'param1': ((0, 1, 2, 3, 4, 5, 6, 7),),
393
421
  'param2': ((0, 4, 8, 12), (1, 5, 9, 13), (2, 6, 10, 14), (3, 7, 11, 15)),
394
422
  'param3': ((0, 4, 8, 12), (1, 5, 9, 13), (2, 6, 10, 14), (3, 7, 11, 15)),
@@ -405,7 +433,8 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
405
433
  from mindspore.communication.management import get_process_group_ranks
406
434
  groups_ranks = (tuple(get_process_group_ranks()),)
407
435
  param_redundancy_dict = {param.name: groups_ranks for _, param in layout_obj.parameters_and_names()}
408
- return param_redundancy_dict
436
+ sorted_param_redundancy_dict = {key: param_redundancy_dict[key] for key in sorted(param_redundancy_dict.keys())}
437
+ return sorted_param_redundancy_dict
409
438
  else:
410
439
  parameter_layout = {}
411
440
  for k, v in layout_obj.items():
@@ -413,6 +442,9 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
413
442
 
414
443
  param_redundancy_dict = {}
415
444
 
445
+ if initial_rank is None:
446
+ initial_rank = _get_initial_rank(parameter_layout)
447
+
416
448
  _get_parameter_redundancy_without_opt_shard(parameter_layout, param_redundancy_dict, initial_rank)
417
449
 
418
450
  if isinstance(layout_obj, str):
@@ -420,7 +452,8 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
420
452
  else:
421
453
  _get_layout_opt_shard(layout_obj, param_redundancy_dict)
422
454
 
423
- return param_redundancy_dict
455
+ sorted_param_redundancy_dict = {key: param_redundancy_dict[key] for key in sorted(param_redundancy_dict.keys())}
456
+ return sorted_param_redundancy_dict
424
457
 
425
458
 
426
459
  def _collect_settings_by_rank(redundancy_map):
@@ -539,6 +572,7 @@ def _progress_bar(iterable, total=None):
539
572
  elapsed_time_str = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
540
573
  remaining_time_str = time.strftime("%H:%M:%S", time.gmtime(remaining_time))
541
574
 
575
+ sys.stdout.reconfigure(encoding="utf-8")
542
576
  print(f'\r{percent}%|{bar}|[{elapsed_time_str}<{remaining_time_str}]', end='')
543
577
  if iteration == total:
544
578
  print()
mindspore/train/amp.py CHANGED
@@ -638,7 +638,7 @@ def _add_loss_network(network, loss_fn, cast_model_type):
638
638
 
639
639
 
640
640
  def _is_grad_accumulation(mcell):
641
- if mcell.cls_name == "GradAccumulationCell":
641
+ if mcell.cls_name == "GradAccumulationCell" or mcell.cls_name == "GradAccumulation":
642
642
  return True
643
643
  for cell in mcell.cells():
644
644
  if _is_grad_accumulation(cell):
@@ -36,9 +36,9 @@ from mindspore.train.callback._reduce_lr_on_plateau import ReduceLROnPlateau
36
36
  from mindspore.train.callback._on_request_exit import OnRequestExit
37
37
  from mindspore.train.callback._backup_and_restore import BackupAndRestore
38
38
  from mindspore.train.callback._flops_collector import FlopsUtilizationCollector
39
- from mindspore.train.callback._tft_register import TFTRegister
39
+ from mindspore.train.callback._train_fault_tolerance import TrainFaultTolerance
40
40
 
41
41
  __all__ = ["Callback", "LossMonitor", "TimeMonitor", "ModelCheckpoint", "FlopsUtilizationCollector",
42
42
  "SummaryCollector", "CheckpointConfig", "RunContext", "LearningRateScheduler", "SummaryLandscape",
43
43
  "History", "LambdaCallback", "ReduceLROnPlateau", "EarlyStopping", "OnRequestExit", "BackupAndRestore",
44
- "TFTRegister"]
44
+ "TrainFaultTolerance"]
@@ -121,10 +121,7 @@ class Callback:
121
121
  When creating a custom Callback, model context information can be obtained in Callback
122
122
  methods by calling `RunContext.original_args()`, which is a dictionary varivable
123
123
  recording current attributes. Users can add custimized attributes to the information.
124
- Training process can also be stopped by calling `request_stop` method. For details
125
- of custom Callback, please check
126
- `Callback tutorial <https://www.mindspore.cn/docs/en/master/model_train/train_process/model/
127
- callback.html#customized-callback-mechanism>`_.
124
+ Training process can also be stopped by calling `request_stop` method.
128
125
 
129
126
  Examples:
130
127
  >>> import numpy as np
@@ -491,9 +488,7 @@ class RunContext:
491
488
 
492
489
  Callback objects not only can obtain the Model context information by calling by
493
490
  `RunContext.original_args()` and add extra attributes to the information, but also can stop the
494
- training process by calling `request_stop` method. For details of custom Callback,
495
- please check
496
- `Callback Mechanism <https://www.mindspore.cn/docs/en/master/model_train/train_process/model/callback.html>`_.
491
+ training process by calling `request_stop` method.
497
492
 
498
493
  `RunContext.original_args()` holds the model context information as a dictionary variable, and
499
494
  different attributes of the dictionary are stored in training or eval process. Details are as follows:
@@ -572,10 +567,6 @@ class RunContext:
572
567
 
573
568
  Returns:
574
569
  Dict, an object that holds the original arguments of model.
575
-
576
- Tutorial Examples:
577
- - `Callback Mechanism - Customized Callback Mechanism
578
- <https://mindspore.cn/docs/en/master/model_train/train_process/model/callback.html#customized-callback-mechanism>`_
579
570
  """
580
571
  return self._original_args
581
572
 
@@ -585,11 +576,6 @@ class RunContext:
585
576
 
586
577
  Callbacks can use this function to request stop of iterations.
587
578
  model.train() checks whether this is called or not.
588
-
589
- Tutorial Examples:
590
- - `Callback Mechanism - Customized Training Termination Time
591
- <https://mindspore.cn/docs/en/master/model_train/train_process/model/callback.html#
592
- customized-training-termination-time>`_
593
579
  """
594
580
  self._stop_requested = True
595
581
 
@@ -28,15 +28,12 @@ from mindspore.train.serialization import save_checkpoint, _save_graph, _wait_as
28
28
  _wait_async_thread_save_ckpt, _check_async_save
29
29
  from mindspore.parallel._cell_wrapper import destroy_allgather_cell
30
30
  from mindspore.parallel._recovery_context import _set_recovery_context, _get_recovery_context
31
- from mindspore.parallel._auto_parallel_context import _get_auto_parallel_context
32
- from mindspore.parallel._utils import _get_device_num
33
- from mindspore.communication.management import get_rank
34
- from mindspore.train._utils import get_parameter_redundancy, remove_param_redundancy
35
- from mindspore.train.callback._callback import Callback, set_cur_net
31
+ from mindspore.communication.management import get_rank, get_group_size
32
+ from mindspore.train._utils import get_parameter_redundancy, remove_param_redundancy, _get_pp_size_from_redundancy_map
33
+ from mindspore.train.callback._callback import Callback
36
34
  from mindspore.common.tensor import Tensor
37
35
  from mindspore.common.parameter import Parameter
38
36
  from mindspore.common.generator import Generator
39
- from mindspore.common.api import _cell_graph_executor
40
37
  from mindspore._c_expression import collect_host_info, get_clock_syscnt
41
38
 
42
39
  _cur_dir = os.getcwd()
@@ -87,7 +84,7 @@ def _chg_ckpt_file_name_if_same_exist(directory, prefix, exception=False):
87
84
  name_ext = os.path.splitext(filename)
88
85
  if exception and filename[-16:] != "_breakpoint.ckpt":
89
86
  continue
90
- if not exception and (name_ext[-1] != ".ckpt" or filename[-16:] == "_breakpoint.ckpt"):
87
+ if not exception and (name_ext[-1] not in (".ckpt", ".safetensors") or filename[-16:] == "_breakpoint.ckpt"):
91
88
  continue
92
89
  # find same prefix file
93
90
  if filename.find(prefix) == 0 and not filename[pre_len].isalpha():
@@ -106,10 +103,10 @@ def _chg_ckpt_file_name_if_same_exist(directory, prefix, exception=False):
106
103
  return prefix
107
104
 
108
105
 
109
- def _check_format_and_other_params(format, enc_key, enc_mode, crc_check=False, async_save=False, exception_save=False,
106
+ def _check_format_and_other_params(format, enc_key, enc_mode, crc_check=False, exception_save=False,
110
107
  map_param_inc=False, global_step_num=None):
111
- param_not_default = (enc_key is not None or enc_mode != "AES-GCM" or crc_check or async_save
112
- or exception_save or map_param_inc or global_step_num is not None)
108
+ param_not_default = (enc_key is not None or enc_mode != "AES-GCM" or crc_check or exception_save or map_param_inc
109
+ or global_step_num is not None)
113
110
  if format == "safetensors" and param_not_default:
114
111
  raise ValueError("For 'save_checkpoint', when format is 'safetensors', other param must be default.")
115
112
 
@@ -139,9 +136,9 @@ class CheckpointConfig:
139
136
  integrated_save (bool): Whether to merge and save the split Tensor in the automatic parallel scenario.
140
137
  Integrated save function is only supported in automatic parallel scene, not supported
141
138
  in manual parallel. Default: ``True`` .
142
- async_save (Union[bool, str]):Whether to use asynchronous saving of the checkpoint file, if True,
143
- the asynchronous thread is used by default. If the type is string,
144
- the method of asynchronous saving, it can be "process" or "thread".
139
+ async_save (Union[bool, str], optional):Whether to use asynchronous saving of the checkpoint file or
140
+ safetensors file, if True, the asynchronous thread is used by default. If the type
141
+ is string, the method of asynchronous saving, it can be "process" or "thread".
145
142
  Default: ``False`` .
146
143
  saved_network (Cell): Network to be saved in checkpoint file. If the saved_network has no relation
147
144
  with the network in training, the initial value of saved_network will be saved. Default: ``None`` .
@@ -261,8 +258,7 @@ class CheckpointConfig:
261
258
  self.enable_redundance = kwargs.get('enable_redundance', False)
262
259
  self.remove_redundancy = Validator.check_isinstance('remove_redundancy', remove_redundancy, bool)
263
260
 
264
- _check_format_and_other_params(format, enc_key, enc_mode, crc_check, async_save, exception_save,
265
- self._map_param_inc)
261
+ _check_format_and_other_params(format, enc_key, enc_mode, crc_check, exception_save, self._map_param_inc)
266
262
 
267
263
  @property
268
264
  def save_checkpoint_steps(self):
@@ -452,8 +448,9 @@ class ModelCheckpoint(Callback):
452
448
  Note:
453
449
  In the distributed training scenario, please specify different directories for each training process
454
450
  to save the checkpoint file. Otherwise, the training may fail.
455
- If this callback is used in the `model` function, the checkpoint file will saved
456
- parameters of the optimizer by default.
451
+ If this callback is used in the
452
+ `Model <https://www.mindspore.cn/docs/en/master/api_python/train/mindspore.train.Model.html>`_ function,
453
+ the checkpoint file will saved parameters of the optimizer by default.
457
454
 
458
455
  Args:
459
456
  prefix (Union[str, callable object]): The prefix name or callable object to generate name of checkpoint files.
@@ -514,7 +511,7 @@ class ModelCheckpoint(Callback):
514
511
  if callable(prefix):
515
512
  self._prefix_func = prefix
516
513
 
517
- if _get_recovery_context("enable_recovery"):
514
+ if context.get_context("device_target") == "GPU" and _get_recovery_context("enable_recovery"):
518
515
  _set_recovery_context(ckpt_path=self._directory)
519
516
 
520
517
  if config is None:
@@ -556,19 +553,17 @@ class ModelCheckpoint(Callback):
556
553
  from aiturbo.checkpoint import aiturbo_mindspore as aiturbo
557
554
  ckpt_storage_path = self._directory
558
555
  rank_id = get_rank()
559
- stage_num = _get_auto_parallel_context("pipeline_stages")
560
- stage_rank_num = _get_device_num() // stage_num
556
+ device_num = get_group_size()
561
557
  param_layout = cb_params.train_network.parameter_layout_dict
562
558
  if not param_layout:
563
- layout = {"stage_num": stage_num, "stage_rank_num": stage_rank_num, "stage_layout": None}
559
+ layout = {"stage_num": 1, "stage_rank_num": device_num, "stage_layout": None}
564
560
  aiturbo.init(ckpt_storage_path, rank_id, layout, None, False, None)
565
561
  else:
566
- device_num = _get_device_num()
567
- chunk_size = device_num // stage_num
568
- initial_rank = (rank_id // chunk_size) * chunk_size
569
- param_redundancy_dict = get_parameter_redundancy(param_layout, initial_rank)
562
+ param_redundancy_dict = get_parameter_redundancy(param_layout)
563
+ pp_size = _get_pp_size_from_redundancy_map(param_redundancy_dict)
564
+ stage_num = device_num // pp_size
570
565
  dp, _ = _get_dp_tp_from_layout(param_redundancy_dict)
571
- layout = {"stage_num": stage_num, "stage_rank_num": stage_rank_num,
566
+ layout = {"stage_num": stage_num, "stage_rank_num": pp_size,
572
567
  "stage_layout": param_redundancy_dict}
573
568
  single_params = remove_param_redundancy(param_redundancy_dict)
574
569
  single_params = {device_id: list(params) for device_id, params in single_params.items()}
@@ -684,12 +679,6 @@ class ModelCheckpoint(Callback):
684
679
  self._last_time_for_keep = time.time()
685
680
  self._last_triggered_step = cb_params.cur_step_num
686
681
 
687
- # TODO(MS_DISABLE_REF_MODE): Delete when remove MS_DISABLE_REF_MODE env.
688
- if context.get_context("enable_ge") and os.getenv('MS_DISABLE_REF_MODE') \
689
- and context.get_context("mode") == context.GRAPH_MODE:
690
- set_cur_net(cb_params.train_network)
691
- cb_params.train_network.add_flags(ge_sync_data=True)
692
- _cell_graph_executor(cb_params.train_network, phase='save')
693
682
  self._append_dict_content(cb_params.cur_epoch_num, cb_params.cur_step_num)
694
683
  network = self._config.saved_network if self._config.saved_network is not None else cb_params.train_network
695
684
  if os.getenv("AITURBO") == "1":
@@ -698,18 +687,13 @@ class ModelCheckpoint(Callback):
698
687
  crc_check=self._config.crc_check, incremental=self._map_param_inc,
699
688
  global_step_num=cb_params.cur_step_num)
700
689
  elif self._config.remove_redundancy:
701
- parallel_mode = context.get_auto_parallel_context("parallel_mode")
702
- if parallel_mode == "stand_alone":
690
+ if get_group_size() == 1:
703
691
  raise TypeError(f"The deduplication feature for saving checkpoint can only be used "
704
- f"in parallel scenarios, but got {parallel_mode}.")
692
+ f"in parallel scenarios, but got 'stand_alone'.")
705
693
  param_layout = network.parameter_layout_dict
706
694
  rank_id = get_rank()
707
695
  if param_layout:
708
- device_num = _get_device_num()
709
- stage_num = _get_auto_parallel_context("pipeline_stages")
710
- chunk_size = device_num // stage_num
711
- initial_rank = (rank_id // chunk_size) * chunk_size
712
- param_redundancy_dict = get_parameter_redundancy(param_layout, initial_rank)
696
+ param_redundancy_dict = get_parameter_redundancy(param_layout)
713
697
  single_params = remove_param_redundancy(param_redundancy_dict)
714
698
  save_param_names = single_params.get(rank_id)
715
699
  param_layout_set = set(param_layout.keys())
@@ -24,9 +24,8 @@ from threading import RLock
24
24
  from mindspore.train.callback._callback import Callback
25
25
  from mindspore.communication.management import get_rank, get_local_rank
26
26
  from mindspore import log as logger
27
- from mindspore.parallel._auto_parallel_context import _get_auto_parallel_context
28
27
  from mindspore.parallel._utils import _get_device_num
29
- from mindspore.train._utils import get_parameter_redundancy
28
+ from mindspore.train._utils import get_parameter_redundancy, _get_pp_size_from_redundancy_map
30
29
 
31
30
  _perf_mutex = RLock()
32
31
 
@@ -42,7 +41,7 @@ def _get_dp_tp_from_redundancy(redundancy_tuple):
42
41
  return dp, tp
43
42
 
44
43
 
45
- def _get_dp_tp_from_layout(parameter_layout_dict, initial_rank=0):
44
+ def _get_dp_tp_from_layout(parameter_layout_dict, initial_rank=None):
46
45
  """From layout dict get dp and tp"""
47
46
  tp = []
48
47
  dp = []
@@ -132,21 +131,9 @@ class ClusterMonitor(Callback):
132
131
  self.full_path = self.log_path + self.log_name
133
132
 
134
133
  self.write_dp_tp_flag = True
135
- self.initial_rank = 0
136
134
 
137
135
  def begin(self, run_context):
138
136
  _remove_pre_log()
139
- pp_num = _get_auto_parallel_context("pipeline_stages")
140
- device_num = _get_device_num()
141
-
142
- original_list = list(range(device_num))
143
- chunk_size = device_num // pp_num
144
- split_pp_lists = []
145
- for i in range(0, device_num, chunk_size):
146
- end_index = i + chunk_size if i + chunk_size <= device_num else device_num
147
- split_pp_lists.append(original_list[i:end_index])
148
-
149
- self.initial_rank = (self.global_rank // chunk_size) * chunk_size
150
137
  with _perf_mutex:
151
138
  dir_path = os.path.dirname(self.full_path)
152
139
  if not os.path.exists(dir_path):
@@ -157,8 +144,6 @@ class ClusterMonitor(Callback):
157
144
  with open(self.full_path, 'w') as file:
158
145
  log_message = f'UUID:{self.uuid_value}\nFRAMEWORK:{self.frame_work}\nGLOBAL RANKID:{self.global_rank}\n'
159
146
  file.write(log_message)
160
- for _, split_pp_list in enumerate(split_pp_lists):
161
- file.write(f'PP:{split_pp_list}\n')
162
147
  os.chmod(self.full_path, stat.S_IRUSR)
163
148
 
164
149
  def step_begin(self, run_context):
@@ -183,10 +168,21 @@ class ClusterMonitor(Callback):
183
168
  if self.enabled and self.enabled_dtp_group and self.write_dp_tp_flag:
184
169
  cb_params = run_context.original_args()
185
170
  param_layout_dict = cb_params.train_network.parameter_layout_dict
186
- dp, tp = _get_dp_tp_from_layout(param_layout_dict, self.initial_rank)
171
+ device_num = _get_device_num()
172
+ original_list = list(range(device_num))
173
+ param_redundancy_dict = get_parameter_redundancy(param_layout_dict)
174
+ pp_size = _get_pp_size_from_redundancy_map(param_redundancy_dict)
175
+ split_pp_lists = []
176
+ for i in range(0, device_num, pp_size):
177
+ end_index = i + pp_size if i + pp_size <= device_num else device_num
178
+ split_pp_lists.append(original_list[i:end_index])
179
+ dp, tp = _get_dp_tp_from_layout(param_layout_dict)
180
+
187
181
  with _perf_mutex:
188
182
  os.chmod(self.full_path, stat.S_IWUSR)
189
183
  with open(self.full_path, 'a') as file:
184
+ for _, split_pp_list in enumerate(split_pp_lists):
185
+ file.write(f'PP:{split_pp_list}\n')
190
186
  for dp_value in dp:
191
187
  file.write(f'dp:{dp_value}\n')
192
188
  for tp_value in tp:
@@ -89,7 +89,7 @@ class FlopsUtilizationCollector(Callback):
89
89
  Train per step time: 135.572 ms, mfu:0.47% hfu:0.47%
90
90
  Train per step time: 1.317 ms, mfu:48.59% hfu:48.59%
91
91
  """
92
- def __init__(self, data_size=None, computility=1, full_flops=True, enable_ma_collector=False):
92
+ def __init__(self, data_size, computility=1, full_flops=True, enable_ma_collector=False):
93
93
  super(FlopsUtilizationCollector, self).__init__()
94
94
  self.step_time = time.time()
95
95
  self.computility = computility
@@ -110,8 +110,7 @@ class FlopsUtilizationCollector(Callback):
110
110
  self.batch_step_size = None
111
111
  Validator.check_bool(full_flops, "full_flops")
112
112
  Validator.check_bool(enable_ma_collector, "enable_ma_collector")
113
- if data_size:
114
- Validator.check_positive_int(data_size, "data_size")
113
+ Validator.check_positive_int(data_size, "data_size")
115
114
 
116
115
  def step_begin(self, run_context):
117
116
  """
@@ -25,10 +25,13 @@ class History(Callback):
25
25
  """
26
26
  Records the network outputs and metrics information into a `History` object.
27
27
 
28
- The network outputs information will be the loss value if not custimizing the train network or eval network;
29
- if the custimized network returns a `Tensor` or `numpy.ndarray`, the mean value of network output
30
- will be recorded, if the custimized network returns a `tuple` or `list`, the first element of network
31
- outputs will be recorded.
28
+ - The network outputs information will be the loss value if not custimizing the train network or eval network;
29
+ - If the train network or eval network is custimized:
30
+
31
+ - if the custimized network returns a `Tensor` or `numpy.ndarray`, the mean value of network output
32
+ will be recorded.
33
+ - if the custimized network returns a `tuple` or `list`, the first element of network
34
+ outputs will be recorded.
32
35
 
33
36
  Note:
34
37
  Normally used in :func:`mindspore.train.Model.train` or :func:`mindspore.train.Model.fit`.
@@ -36,8 +36,8 @@ class LambdaCallback(Callback):
36
36
  on_train_step_end (Function): called at each train step end. Default: ``None`` .
37
37
  on_train_begin (Function): called at the beginning of model train. Default: ``None`` .
38
38
  on_train_end (Function): called at the end of model train. Default: ``None`` .
39
- on_eval_epoch_begin (Function): called at eval epoch begin. Default: ``None`` .
40
- on_eval_epoch_end (Function): called at eval epoch end. Default: ``None`` .
39
+ on_eval_epoch_begin (Function): called at each eval epoch begin. Default: ``None`` .
40
+ on_eval_epoch_end (Function): called at each eval epoch end. Default: ``None`` .
41
41
  on_eval_step_begin (Function): called at each eval step begin. Default: ``None`` .
42
42
  on_eval_step_end (Function): called at each eval step end. Default: ``None`` .
43
43
  on_eval_begin (Function): called at the beginning of model eval. Default: ``None`` .
@@ -256,9 +256,6 @@ class SummaryLandscape:
256
256
  """
257
257
  Clean the checkpoint.
258
258
 
259
- Tutorial Examples:
260
- - `Training Optimization Process Visualization
261
- <https://www.mindspore.cn/mindinsight/docs/en/master/landscape.html>`_
262
259
  """
263
260
  shutil.rmtree(self._ckpt_dir, ignore_errors=True)
264
261
 
@@ -93,7 +93,8 @@ class LossMonitor(Callback):
93
93
 
94
94
  def on_train_epoch_end(self, run_context):
95
95
  """
96
- When LossMonitor used in `model.fit`, print eval metrics at the end of epoch if current epoch
96
+ When LossMonitor used in :func:`mindspore.train.Model.fit`, print eval metrics
97
+ at the end of epoch if current epoch
97
98
  should do evaluation.
98
99
 
99
100
  Args:
@@ -26,6 +26,7 @@ from mindspore.common.tensor import Tensor
26
26
  from mindspore.train._utils import _make_directory
27
27
  from mindspore import _checkparam as Validator
28
28
  from mindspore.train.serialization import load_checkpoint, save_checkpoint, export
29
+ from mindspore.communication.management import get_group_size
29
30
  from mindspore.train.callback._callback import Callback
30
31
  from mindspore.parallel._utils import _get_parallel_mode
31
32
  from mindspore.context import ParallelMode
@@ -37,7 +38,7 @@ class OnRequestExit(Callback):
37
38
 
38
39
  Register OnRequestExit Callback before training, when the user want to exit the training process
39
40
  and save the training data, could send the registered exit signal 'sig' to the training process or modify the
40
- 'GracefulExit' that a key in the json file specified by the 'config_file' to '1'.
41
+ 'GracefulExit' that a key in the JSON file specified by the 'config_file' to '1'.
41
42
  After the training process executes the current step, saves the current training status,
42
43
  including checkpoint and mindir, and then exit the training process.
43
44
 
@@ -58,7 +59,7 @@ class OnRequestExit(Callback):
58
59
  ValueError: If the 'save_mindir' is not a bool.
59
60
  ValueError: If the 'file_name' is not a str.
60
61
  ValueError: If the 'directory' is not a str.
61
- ValueError: If the 'sig' is not an int or the 'sig' is signal.SIGKILL.
62
+ ValueError: If the 'sig' is not an int or the 'sig' is ``signal.SIGTERM``.
62
63
 
63
64
  Examples:
64
65
  >>> from mindspore import nn
@@ -92,10 +93,8 @@ class OnRequestExit(Callback):
92
93
  self.key = "GracefulExit"
93
94
  self.remote_config_file = config_file # used config file to save checkpoint and exit training process
94
95
  self.use_graceful = os.environ.get("MS_ENABLE_GRACEFUL_EXIT") == "1"
95
- self.is_distributed = _get_parallel_mode() != ParallelMode.STAND_ALONE
96
+ self.is_distributed = get_group_size() > 1
96
97
  self.integrated_save = True
97
- if self.is_distributed:
98
- self.integrated_save = _get_parallel_mode() == ParallelMode.AUTO_PARALLEL
99
98
  self.stop_train = False
100
99
  self.need_do_step_end = False
101
100
  if self.save_ckpt or self.save_mindir:
@@ -250,6 +249,8 @@ class OnRequestExit(Callback):
250
249
  else:
251
250
  global_step = int(call_params.network.optimizer.global_step.data)
252
251
  append_dict["global_step"] = global_step
252
+ if self.is_distributed:
253
+ self.integrated_save = _get_parallel_mode() == ParallelMode.AUTO_PARALLEL
253
254
  save_checkpoint(net, self.train_name, integrated_save=self.integrated_save,
254
255
  append_dict=append_dict)
255
256
  if self.save_mindir:
@@ -63,12 +63,17 @@ class ReduceLROnPlateau(Callback):
63
63
  will be reduced. Default: ``10`` .
64
64
  verbose (bool): If False: quiet, if True: print related information.
65
65
  Default: ``False`` .
66
- mode (str): one of `{'auto', 'min', 'max'}`. In "min" mode,
67
- the learning rate will be reduced when the
68
- quantity monitored has stopped decreasing; in "max" mode it will be
69
- reduced when the quantity monitored has stopped increasing; in "auto"
70
- mode, the direction is automatically inferred from the name of the
71
- monitored quantity. Default: ``'auto'`` .
66
+ mode (str): one of `{'auto', 'min', 'max'}`. Default: ``'auto'`` .
67
+
68
+ - In ``'min'`` mode,
69
+ the learning rate will be reduced when the
70
+ quantity monitored has stopped decreasing.
71
+ - In ``'max'`` mode it will be
72
+ reduced when the quantity monitored has stopped increasing.
73
+ - In ``'auto'``
74
+ mode, the direction is automatically inferred from the name of the
75
+ monitored quantity.
76
+
72
77
  min_delta (float): threshold for measuring the new optimum, to only focus on
73
78
  significant changes. Default: ``1e-4`` .
74
79
  cooldown (int): number of epochs to wait before resuming normal operation after