PyPI - mindspore - Versions diffs - 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl - Mend

mindspore 2.5.0__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (491) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +6 -4
mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +3 -0
mindspore/_checkparam.py +3 -33
mindspore/_deprecated/__init__.py +17 -0
mindspore/_deprecated/jit.py +198 -0
mindspore/_extends/builtin_operations.py +1 -1
mindspore/_extends/parse/__init__.py +6 -7
mindspore/_extends/parse/compile_config.py +19 -0
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +22 -3
mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
mindspore/_extends/parse/parser.py +24 -193
mindspore/_extends/parse/resources.py +1 -5
mindspore/_extends/parse/standard_method.py +97 -74
mindspore/_extends/pijit/__init__.py +2 -2
mindspore/_extends/pijit/pijit_func_white_list.py +16 -11
mindspore/_extends/pijit/tensor_func_list.py +27 -0
mindspore/_extends/utils.py +1 -1
mindspore/amp.py +4 -4
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/__init__.py +2 -2
mindspore/boost/base.py +3 -7
mindspore/boost/boost_cell_wrapper.py +2 -2
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +4 -3
mindspore/common/_grad_function.py +56 -0
mindspore/common/_pijit_context.py +14 -5
mindspore/common/_register_for_tensor.py +1 -1
mindspore/common/_stub_tensor.py +5 -10
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +1915 -3287
mindspore/common/api.py +341 -354
mindspore/common/auto_dynamic_shape.py +41 -44
mindspore/common/dtype.py +5 -2
mindspore/common/dump.py +7 -5
mindspore/common/file_system.py +3 -0
mindspore/common/hook_handle.py +5 -3
mindspore/common/initializer.py +10 -6
mindspore/common/jit_begin_end.py +94 -0
mindspore/common/jit_config.py +6 -1
mindspore/common/jit_context.py +76 -0
mindspore/common/jit_trace.py +378 -0
mindspore/common/lazy_inline.py +2 -2
mindspore/common/mutable.py +5 -4
mindspore/common/parameter.py +106 -39
mindspore/common/seed.py +2 -2
mindspore/common/sparse_tensor.py +23 -17
mindspore/common/tensor.py +297 -714
mindspore/communication/__init__.py +7 -5
mindspore/communication/_comm_helper.py +47 -2
mindspore/communication/comm_func.py +70 -53
mindspore/communication/management.py +83 -17
mindspore/context.py +214 -560
mindspore/dataset/__init__.py +44 -20
mindspore/dataset/audio/__init__.py +2 -8
mindspore/dataset/audio/transforms.py +3 -17
mindspore/dataset/core/config.py +3 -3
mindspore/dataset/engine/cache_client.py +1 -1
mindspore/dataset/engine/datasets.py +102 -120
mindspore/dataset/engine/datasets_audio.py +22 -22
mindspore/dataset/engine/datasets_standard_format.py +43 -24
mindspore/dataset/engine/datasets_text.py +78 -85
mindspore/dataset/engine/datasets_user_defined.py +108 -76
mindspore/dataset/engine/datasets_vision.py +111 -108
mindspore/dataset/engine/iterators.py +5 -3
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
mindspore/dataset/engine/samplers.py +279 -57
mindspore/dataset/engine/serializer_deserializer.py +2 -1
mindspore/dataset/engine/validators.py +10 -0
mindspore/dataset/text/__init__.py +7 -6
mindspore/dataset/text/transforms.py +6 -5
mindspore/dataset/text/utils.py +3 -3
mindspore/dataset/transforms/__init__.py +0 -9
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/utils/browse_dataset.py +1 -1
mindspore/dataset/vision/__init__.py +2 -9
mindspore/dataset/vision/transforms.py +202 -158
mindspore/dataset/vision/utils.py +7 -5
mindspore/device_context/ascend/op_debug.py +60 -1
mindspore/device_context/ascend/op_tuning.py +0 -4
mindspore/device_manager.py +39 -3
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/es/embedding_service.py +35 -27
mindspore/experimental/map_parameter.py +4 -4
mindspore/experimental/optim/adadelta.py +22 -26
mindspore/experimental/optim/adagrad.py +4 -4
mindspore/experimental/optim/adam.py +4 -0
mindspore/experimental/optim/adamax.py +4 -4
mindspore/experimental/optim/adamw.py +4 -0
mindspore/experimental/optim/asgd.py +1 -1
mindspore/experimental/optim/lr_scheduler.py +40 -22
mindspore/experimental/optim/radam.py +5 -5
mindspore/experimental/optim/rprop.py +1 -1
mindspore/experimental/optim/sgd.py +1 -1
mindspore/hal/contiguous_tensors_handle.py +6 -10
mindspore/hal/device.py +55 -81
mindspore/hal/event.py +38 -55
mindspore/hal/memory.py +93 -144
mindspore/hal/stream.py +81 -125
mindspore/include/dataset/constants.h +7 -4
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +40 -2
mindspore/mindrecord/__init__.py +20 -7
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/__init__.py +131 -700
mindspore/mint/distributed/__init__.py +5 -1
mindspore/mint/distributed/distributed.py +194 -109
mindspore/mint/linalg/__init__.py +2 -0
mindspore/mint/nn/__init__.py +280 -18
mindspore/mint/nn/functional.py +282 -64
mindspore/mint/nn/layer/__init__.py +4 -0
mindspore/mint/nn/layer/_functions.py +7 -3
mindspore/mint/nn/layer/activation.py +120 -13
mindspore/mint/nn/layer/conv.py +218 -24
mindspore/mint/nn/layer/normalization.py +15 -16
mindspore/mint/nn/layer/padding.py +1 -1
mindspore/mint/nn/layer/pooling.py +66 -1
mindspore/mint/optim/__init__.py +2 -1
mindspore/mint/optim/sgd.py +171 -0
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/nn/__init__.py +4 -1
mindspore/nn/cell.py +1250 -176
mindspore/nn/layer/activation.py +23 -21
mindspore/nn/layer/basic.py +22 -16
mindspore/nn/layer/container.py +1 -1
mindspore/nn/layer/conv.py +22 -17
mindspore/nn/layer/embedding.py +9 -8
mindspore/nn/layer/normalization.py +48 -42
mindspore/nn/layer/pooling.py +75 -31
mindspore/nn/layer/transformer.py +11 -10
mindspore/nn/learning_rate_schedule.py +4 -2
mindspore/nn/loss/loss.py +27 -19
mindspore/nn/optim/ada_grad.py +6 -5
mindspore/nn/optim/adadelta.py +9 -7
mindspore/nn/optim/adafactor.py +1 -1
mindspore/nn/optim/adam.py +16 -12
mindspore/nn/optim/adamax.py +8 -7
mindspore/nn/optim/adasum.py +5 -5
mindspore/nn/optim/asgd.py +1 -1
mindspore/nn/optim/ftrl.py +11 -9
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/lazyadam.py +12 -10
mindspore/nn/optim/momentum.py +7 -6
mindspore/nn/optim/optimizer.py +2 -2
mindspore/nn/optim/proximal_ada_grad.py +12 -10
mindspore/nn/optim/rmsprop.py +13 -12
mindspore/nn/optim/rprop.py +9 -7
mindspore/nn/optim/sgd.py +9 -6
mindspore/nn/optim/tft_wrapper.py +5 -2
mindspore/nn/probability/bijector/bijector.py +17 -11
mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
mindspore/nn/probability/bijector/invert.py +2 -2
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +3 -2
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +1 -1
mindspore/nn/probability/distribution/cauchy.py +4 -2
mindspore/nn/probability/distribution/exponential.py +6 -7
mindspore/nn/probability/distribution/gamma.py +2 -2
mindspore/nn/probability/distribution/gumbel.py +2 -2
mindspore/nn/probability/distribution/half_normal.py +5 -3
mindspore/nn/probability/distribution/logistic.py +5 -3
mindspore/nn/probability/distribution/poisson.py +1 -1
mindspore/nn/probability/distribution/uniform.py +5 -3
mindspore/nn/reinforcement/_tensors_queue.py +1 -1
mindspore/nn/reinforcement/tensor_array.py +1 -1
mindspore/nn/wrap/__init__.py +6 -6
mindspore/nn/wrap/cell_wrapper.py +178 -117
mindspore/nn/wrap/grad_reducer.py +45 -36
mindspore/nn/wrap/loss_scale.py +3 -3
mindspore/numpy/array_creations.py +3 -3
mindspore/numpy/array_ops.py +1 -1
mindspore/numpy/math_ops.py +4 -4
mindspore/numpy/utils.py +1 -2
mindspore/numpy/utils_const.py +1 -2
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +3 -2
mindspore/ops/_grad_experimental/grad_comm_ops.py +18 -3
mindspore/ops/_grad_experimental/grad_debug_ops.py +8 -1
mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
mindspore/ops/_register_for_op.py +0 -11
mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -4
mindspore/ops/_vmap/vmap_array_ops.py +7 -6
mindspore/ops/_vmap/vmap_grad_nn_ops.py +2 -1
mindspore/ops/_vmap/vmap_math_ops.py +4 -7
mindspore/ops/_vmap/vmap_nn_ops.py +9 -8
mindspore/ops/auto_generate/__init__.py +4 -3
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +102 -49
mindspore/ops/auto_generate/gen_extend_func.py +281 -135
mindspore/ops/auto_generate/gen_ops_def.py +2574 -2326
mindspore/ops/auto_generate/gen_ops_prim.py +8566 -2755
mindspore/ops/auto_generate/pyboost_inner_prim.py +106 -76
mindspore/ops/composite/__init__.py +2 -1
mindspore/ops/composite/base.py +19 -24
mindspore/ops/composite/math_ops.py +6 -16
mindspore/ops/composite/multitype_ops/__init__.py +5 -2
mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -3
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +2 -1
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
mindspore/ops/function/__init__.py +28 -2
mindspore/ops/function/_add_attr_func.py +58 -0
mindspore/ops/function/array_func.py +1629 -2345
mindspore/ops/function/clip_func.py +38 -45
mindspore/ops/function/debug_func.py +36 -44
mindspore/ops/function/grad/__init__.py +1 -0
mindspore/ops/function/grad/grad_func.py +104 -71
mindspore/ops/function/image_func.py +1 -1
mindspore/ops/function/linalg_func.py +46 -78
mindspore/ops/function/math_func.py +3035 -3705
mindspore/ops/function/nn_func.py +676 -241
mindspore/ops/function/other_func.py +159 -1
mindspore/ops/function/parameter_func.py +17 -30
mindspore/ops/function/random_func.py +204 -361
mindspore/ops/function/reshard_func.py +4 -70
mindspore/ops/function/sparse_func.py +3 -3
mindspore/ops/function/sparse_unary_func.py +5 -5
mindspore/ops/function/spectral_func.py +25 -58
mindspore/ops/function/vmap_func.py +24 -17
mindspore/ops/functional.py +6 -4
mindspore/ops/functional_overload.py +547 -4
mindspore/ops/op_info_register.py +32 -244
mindspore/ops/operations/__init__.py +10 -5
mindspore/ops/operations/_custom_ops_utils.py +247 -0
mindspore/ops/operations/_grad_ops.py +1 -10
mindspore/ops/operations/_inner_ops.py +5 -76
mindspore/ops/operations/_ms_kernel.py +4 -10
mindspore/ops/operations/_rl_inner_ops.py +1 -1
mindspore/ops/operations/_scalar_ops.py +3 -2
mindspore/ops/operations/_sequence_ops.py +1 -1
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/array_ops.py +37 -22
mindspore/ops/operations/comm_ops.py +150 -107
mindspore/ops/operations/custom_ops.py +221 -23
mindspore/ops/operations/debug_ops.py +115 -16
mindspore/ops/operations/inner_ops.py +1 -1
mindspore/ops/operations/linalg_ops.py +1 -58
mindspore/ops/operations/manually_defined/_inner.py +1 -1
mindspore/ops/operations/manually_defined/ops_def.py +746 -79
mindspore/ops/operations/math_ops.py +21 -18
mindspore/ops/operations/nn_ops.py +65 -191
mindspore/ops/operations/other_ops.py +62 -9
mindspore/ops/operations/random_ops.py +13 -7
mindspore/ops/operations/reshard_ops.py +1 -1
mindspore/ops/operations/sparse_ops.py +2 -2
mindspore/ops/primitive.py +43 -32
mindspore/ops/tensor_method.py +232 -13
mindspore/ops_generate/__init__.py +0 -5
mindspore/ops_generate/aclnn/__init__.py +0 -0
mindspore/ops_generate/{aclnn_kernel_register_auto_cc_generator.py → aclnn/aclnn_kernel_register_auto_cc_generator.py} +43 -18
mindspore/ops_generate/{gen_aclnn_implement.py → aclnn/gen_aclnn_implement.py} +49 -51
mindspore/ops_generate/api/__init__.py +0 -0
mindspore/ops_generate/{add_tensor_docs_generator.py → api/add_tensor_docs_generator.py} +9 -7
mindspore/ops_generate/{cpp_create_prim_instance_helper_generator.py → api/cpp_create_prim_instance_helper_generator.py} +6 -9
mindspore/ops_generate/{functional_map_cpp_generator.py → api/functional_map_cpp_generator.py} +25 -12
mindspore/ops_generate/{functional_overload_py_generator.py → api/functional_overload_py_generator.py} +8 -6
mindspore/ops_generate/{functions_cc_generator.py → api/functions_cc_generator.py} +14 -10
mindspore/ops_generate/api/gen_api.py +103 -0
mindspore/ops_generate/{op_api_proto.py → api/op_api_proto.py} +98 -69
mindspore/ops_generate/{tensor_func_reg_cpp_generator.py → api/tensor_func_reg_cpp_generator.py} +82 -43
mindspore/ops_generate/common/__init__.py +0 -0
mindspore/ops_generate/common/gen_constants.py +91 -0
mindspore/ops_generate/{gen_utils.py → common/gen_utils.py} +72 -19
mindspore/ops_generate/{op_proto.py → common/op_proto.py} +64 -1
mindspore/ops_generate/{template.py → common/template.py} +96 -84
mindspore/ops_generate/gen_ops.py +23 -325
mindspore/ops_generate/op_def/__init__.py +0 -0
mindspore/ops_generate/op_def/gen_op_def.py +90 -0
mindspore/ops_generate/{lite_ops_cpp_generator.py → op_def/lite_ops_cpp_generator.py} +47 -11
mindspore/ops_generate/{ops_def_cc_generator.py → op_def/ops_def_cc_generator.py} +18 -7
mindspore/ops_generate/{ops_def_h_generator.py → op_def/ops_def_h_generator.py} +5 -5
mindspore/ops_generate/{ops_name_h_generator.py → op_def/ops_name_h_generator.py} +30 -15
mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
mindspore/ops_generate/op_def_py/__init__.py +0 -0
mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
mindspore/ops_generate/{op_def_py_generator.py → op_def_py/op_def_py_generator.py} +6 -5
mindspore/ops_generate/{op_prim_py_generator.py → op_def_py/op_prim_py_generator.py} +24 -15
mindspore/ops_generate/pyboost/__init__.py +0 -0
mindspore/ops_generate/{auto_grad_impl_cc_generator.py → pyboost/auto_grad_impl_cc_generator.py} +11 -7
mindspore/ops_generate/{auto_grad_reg_cc_generator.py → pyboost/auto_grad_reg_cc_generator.py} +7 -7
mindspore/ops_generate/{gen_pyboost_func.py → pyboost/gen_pyboost_func.py} +40 -16
mindspore/ops_generate/{op_template_parser.py → pyboost/op_template_parser.py} +105 -24
mindspore/ops_generate/{pyboost_functions_cpp_generator.py → pyboost/pyboost_functions_cpp_generator.py} +55 -18
mindspore/ops_generate/{pyboost_functions_h_generator.py → pyboost/pyboost_functions_h_generator.py} +42 -10
mindspore/ops_generate/{pyboost_functions_py_generator.py → pyboost/pyboost_functions_py_generator.py} +6 -6
mindspore/ops_generate/{pyboost_grad_function_cpp_generator.py → pyboost/pyboost_grad_function_cpp_generator.py} +11 -10
mindspore/ops_generate/{pyboost_inner_prim_generator.py → pyboost/pyboost_inner_prim_generator.py} +8 -7
mindspore/ops_generate/{pyboost_native_grad_functions_generator.py → pyboost/pyboost_native_grad_functions_generator.py} +14 -10
mindspore/ops_generate/{pyboost_op_cpp_code_generator.py → pyboost/pyboost_op_cpp_code_generator.py} +140 -53
mindspore/ops_generate/{pyboost_overload_functions_cpp_generator.py → pyboost/pyboost_overload_functions_cpp_generator.py} +28 -15
mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +88 -4
mindspore/ops_generate/resources/__init__.py +0 -0
mindspore/ops_generate/resources/resource_list.py +30 -0
mindspore/ops_generate/resources/resource_loader.py +36 -0
mindspore/ops_generate/resources/resource_manager.py +64 -0
mindspore/ops_generate/resources/yaml_loader.py +88 -0
mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
mindspore/parallel/__init__.py +6 -2
mindspore/parallel/_auto_parallel_context.py +133 -6
mindspore/parallel/_cell_wrapper.py +130 -15
mindspore/parallel/_parallel_serialization.py +95 -4
mindspore/parallel/_ps_context.py +1 -1
mindspore/parallel/_recovery_context.py +7 -2
mindspore/parallel/_tensor.py +142 -18
mindspore/parallel/_utils.py +198 -25
mindspore/parallel/algo_parameter_config.py +3 -3
mindspore/parallel/auto_parallel.py +732 -0
mindspore/parallel/checkpoint_convert.py +159 -0
mindspore/parallel/checkpoint_transform.py +656 -37
mindspore/parallel/cluster/process_entity/_api.py +151 -19
mindspore/parallel/cluster/run.py +1 -1
mindspore/parallel/function/__init__.py +24 -0
mindspore/parallel/function/reshard_func.py +259 -0
mindspore/parallel/nn/__init__.py +25 -0
mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
mindspore/parallel/parameter_broadcast.py +24 -13
mindspore/parallel/shard.py +137 -61
mindspore/parallel/transform_safetensors.py +287 -95
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +9 -5
mindspore/profiler/analysis/parser/ascend_cann_parser.py +6 -2
mindspore/profiler/analysis/parser/ms_framework_parser.py +4 -4
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -4
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +22 -0
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +241 -86
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +41 -2
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +33 -35
mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +7 -0
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +8 -3
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +141 -30
mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +5 -6
mindspore/profiler/common/ascend_msprof_exporter.py +5 -4
mindspore/profiler/common/constant.py +12 -0
mindspore/profiler/common/msprof_cmd_tool.py +42 -23
mindspore/profiler/common/path_manager.py +24 -0
mindspore/profiler/common/profiler_context.py +26 -2
mindspore/profiler/common/profiler_meta_data.py +74 -0
mindspore/profiler/common/profiler_parameters.py +59 -18
mindspore/profiler/common/profiler_path_manager.py +66 -7
mindspore/profiler/dynamic_profiler.py +112 -79
mindspore/profiler/envprofiler.py +26 -1
mindspore/profiler/experimental_config.py +197 -0
mindspore/profiler/mstx.py +57 -14
mindspore/profiler/platform/npu_profiler.py +33 -7
mindspore/profiler/profiler.py +541 -45
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +4 -0
mindspore/profiler/schedule.py +57 -22
mindspore/rewrite/api/node.py +15 -13
mindspore/rewrite/api/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +25 -14
mindspore/run_check/run_check.py +1 -1
mindspore/runtime/__init__.py +2 -2
mindspore/runtime/executor.py +40 -11
mindspore/runtime/memory.py +25 -8
mindspore/safeguard/rewrite_obfuscation.py +12 -9
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +8 -8
mindspore/train/_utils.py +35 -7
mindspore/train/amp.py +1 -1
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +2 -16
mindspore/train/callback/_checkpoint.py +24 -40
mindspore/train/callback/_cluster_monitor.py +14 -18
mindspore/train/callback/_flops_collector.py +2 -3
mindspore/train/callback/_history.py +7 -4
mindspore/train/callback/_lambda_callback.py +2 -2
mindspore/train/callback/_landscape.py +0 -3
mindspore/train/callback/_loss_monitor.py +2 -1
mindspore/train/callback/_on_request_exit.py +6 -5
mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
mindspore/train/callback/_summary_collector.py +8 -13
mindspore/train/callback/_time_monitor.py +2 -1
mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +179 -103
mindspore/train/data_sink.py +25 -2
mindspore/train/dataset_helper.py +4 -5
mindspore/train/loss_scale_manager.py +8 -7
mindspore/train/metrics/accuracy.py +3 -3
mindspore/train/metrics/confusion_matrix.py +9 -9
mindspore/train/metrics/error.py +3 -3
mindspore/train/metrics/hausdorff_distance.py +4 -4
mindspore/train/metrics/mean_surface_distance.py +3 -3
mindspore/train/metrics/metric.py +0 -12
mindspore/train/metrics/occlusion_sensitivity.py +4 -2
mindspore/train/metrics/precision.py +8 -6
mindspore/train/metrics/recall.py +9 -9
mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
mindspore/train/mind_ir_pb2.py +19 -12
mindspore/train/model.py +176 -103
mindspore/train/serialization.py +246 -988
mindspore/train/summary/_summary_adapter.py +2 -2
mindspore/train/summary/summary_record.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +3 -2
mindspore/utils/dryrun.py +4 -2
mindspore/utils/hooks.py +81 -0
mindspore/utils/utils.py +138 -4
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +2 -1
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +483 -438
mindspore/_install_custom.py +0 -43
mindspore/common/_register_for_adapter.py +0 -74
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
mindspore/ops/auto_generate/gen_arg_handler.py +0 -136
mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
mindspore/ops_generate/gen_constants.py +0 -190
mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
mindspore/ops_generate/ops_primitive_h_generator.py +0 -81
/mindspore/ops_generate/{base_generator.py → common/base_generator.py} +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +0 -0
{mindspore-2.5.0.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0

mindspore/train/_utils.py CHANGED Viewed

@@ -16,6 +16,7 @@
 from __future__ import absolute_import
 import os
+import sys
 import json
 from collections.abc import Iterable
@@ -23,7 +24,7 @@ import time
 import numpy as np
 from mindspore.common.tensor import Tensor
-from mindspore._c_expression import Tensor as Tensor_
+from mindspore._c_expression import TensorPy as Tensor_
 from mindspore._c_expression import MSContext, ms_ctx_param
 from mindspore.common.dtype import dtype_to_nptype, pytype_to_dtype
 from mindspore.common import dtype as mstype
@@ -31,7 +32,7 @@ from mindspore import context
 from mindspore import log as logger
 from mindspore import _checkparam as Validator
 from mindspore.common.api import _cell_graph_executor
-from mindspore.communication import get_group_size
+from mindspore.communication.management import get_rank, get_group_size
 from mindspore.train.mind_ir_pb2 import ModelProto as mindir_model
 from mindspore.train.checkpoint_pb2 import Checkpoint
 from mindspore.train.node_strategy_pb2 import ParallelStrategyMap as ckpt_strategy
@@ -64,6 +65,7 @@ def _get_types_and_shapes(dataset):
     dataset_shapes = dataset.output_shapes()
     return dataset_types, dataset_shapes
 def enable_data_broadcast():
     """Get status to indicate if enable dataset broadcast."""
     return MSContext.get_instance().get_param(ms_ctx_param.dataset_broadcast_opt_level) > 0
@@ -375,20 +377,40 @@ def _get_parameter_redundancy_without_opt_shard(parameter_layout, param_redundan
         param_redundancy_dict[key] = tuple(redundancy_list)
-def get_parameter_redundancy(layout_obj, initial_rank=0):
+def _get_initial_rank(parameter_layout):
+    """Get the initial rank of pp."""
+    for k, _ in parameter_layout.items():
+        dev_matrix = parameter_layout[k][0]
+        break
+    dev_num = 1
+    if dev_matrix:
+        for i in dev_matrix:
+            dev_num *= i
+    rank_id = get_rank()
+    initial_rank = (rank_id // dev_num) * dev_num
+    return initial_rank
+def _get_pp_size_from_redundancy_map(param_redundancy):
+    """Get pp size from redundancy map."""
+    for _, v in param_redundancy.items():
+        return len(v) * len(v[0])
+def get_parameter_redundancy(layout_obj, initial_rank=None):
     """
     Get parameter redundancy map.
     Args:
         layout_obj (Union[str, layout): File name of `strategy.ckpt` or net.parameter_layout_dict.
-        initial_rank (int): Start rank id for each pipeline. Default: 0.
+        initial_rank (int): Start rank id for each pipeline. Default: ``None``.
     Returns:
         Dict, dict of parameter redundancy info.
     Examples:
         >>> from mindspore.train.utils import get_parameter_redundancy
-        >>> param_redundancy_dict = get_parameter_redundancy("/path/to/strategy.ckpt")
+        >>> param_redundancy_dict = get_parameter_redundancy("/path/to/strategy.ckpt", initial_rank=0)
         {'param1': ((0, 1, 2, 3, 4, 5, 6, 7),),
          'param2': ((0, 4, 8, 12), (1, 5, 9, 13), (2, 6, 10, 14), (3, 7, 11, 15)),
          'param3': ((0, 4, 8, 12), (1, 5, 9, 13), (2, 6, 10, 14), (3, 7, 11, 15)),
@@ -405,7 +427,8 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
         from mindspore.communication.management import get_process_group_ranks
         groups_ranks = (tuple(get_process_group_ranks()),)
         param_redundancy_dict = {param.name: groups_ranks for _, param in layout_obj.parameters_and_names()}
-        return param_redundancy_dict
+        sorted_param_redundancy_dict = {key: param_redundancy_dict[key] for key in sorted(param_redundancy_dict.keys())}
+        return sorted_param_redundancy_dict
     else:
         parameter_layout = {}
         for k, v in layout_obj.items():
@@ -413,6 +436,9 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
     param_redundancy_dict = {}
+    if initial_rank is None:
+        initial_rank = _get_initial_rank(parameter_layout)
     _get_parameter_redundancy_without_opt_shard(parameter_layout, param_redundancy_dict, initial_rank)
     if isinstance(layout_obj, str):
@@ -420,7 +446,8 @@ def get_parameter_redundancy(layout_obj, initial_rank=0):
     else:
         _get_layout_opt_shard(layout_obj, param_redundancy_dict)
-    return param_redundancy_dict
+    sorted_param_redundancy_dict = {key: param_redundancy_dict[key] for key in sorted(param_redundancy_dict.keys())}
+    return sorted_param_redundancy_dict
 def _collect_settings_by_rank(redundancy_map):
@@ -539,6 +566,7 @@ def _progress_bar(iterable, total=None):
         elapsed_time_str = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
         remaining_time_str = time.strftime("%H:%M:%S", time.gmtime(remaining_time))
+        sys.stdout.reconfigure(encoding="utf-8")
         print(f'\r{percent}%|{bar}|[{elapsed_time_str}<{remaining_time_str}]', end='')
         if iteration == total:
             print()

mindspore/train/amp.py CHANGED Viewed

@@ -638,7 +638,7 @@ def _add_loss_network(network, loss_fn, cast_model_type):
 def _is_grad_accumulation(mcell):
-    if mcell.cls_name == "GradAccumulationCell":
+    if mcell.cls_name == "GradAccumulationCell" or mcell.cls_name == "GradAccumulation":
         return True
     for cell in mcell.cells():
         if _is_grad_accumulation(cell):

mindspore/train/callback/__init__.py CHANGED Viewed

@@ -36,9 +36,9 @@ from mindspore.train.callback._reduce_lr_on_plateau import ReduceLROnPlateau
 from mindspore.train.callback._on_request_exit import OnRequestExit
 from mindspore.train.callback._backup_and_restore import BackupAndRestore
 from mindspore.train.callback._flops_collector import FlopsUtilizationCollector
-from mindspore.train.callback._tft_register import TFTRegister
+from mindspore.train.callback._train_fault_tolerance import TrainFaultTolerance
 __all__ = ["Callback", "LossMonitor", "TimeMonitor", "ModelCheckpoint", "FlopsUtilizationCollector",
            "SummaryCollector", "CheckpointConfig", "RunContext", "LearningRateScheduler", "SummaryLandscape",
            "History", "LambdaCallback", "ReduceLROnPlateau", "EarlyStopping", "OnRequestExit", "BackupAndRestore",
-           "TFTRegister"]
+           "TrainFaultTolerance"]

mindspore/train/callback/_callback.py CHANGED Viewed

@@ -121,10 +121,7 @@ class Callback:
     When creating a custom Callback, model context information can be obtained in Callback
     methods by calling `RunContext.original_args()`, which is a dictionary varivable
     recording current attributes. Users can add custimized attributes to the information.
-    Training process can also be stopped by calling `request_stop` method. For details
-    of custom Callback, please check
-    `Callback tutorial <https://www.mindspore.cn/docs/en/master/model_train/train_process/model/
-    callback.html#customized-callback-mechanism>`_.
+    Training process can also be stopped by calling `request_stop` method.
     Examples:
         >>> import numpy as np
@@ -491,9 +488,7 @@ class RunContext:
     Callback objects not only can obtain the Model context information by calling by
     `RunContext.original_args()` and add extra attributes to the information, but also can stop the
-    training process by calling `request_stop` method. For details of custom Callback,
-    please check
-    `Callback Mechanism <https://www.mindspore.cn/docs/en/master/model_train/train_process/model/callback.html>`_.
+    training process by calling `request_stop` method.
     `RunContext.original_args()` holds the model context information as a dictionary variable, and
     different attributes of the dictionary are stored in training or eval process. Details are as follows:
@@ -572,10 +567,6 @@ class RunContext:
         Returns:
            Dict, an object that holds the original arguments of model.
-        Tutorial Examples:
-            - `Callback Mechanism - Customized Callback Mechanism
-              <https://mindspore.cn/docs/en/master/model_train/train_process/model/callback.html#customized-callback-mechanism>`_
         """
         return self._original_args
@@ -585,11 +576,6 @@ class RunContext:
         Callbacks can use this function to request stop of iterations.
         model.train() checks whether this is called or not.
-        Tutorial Examples:
-            - `Callback Mechanism - Customized Training Termination Time
-              <https://mindspore.cn/docs/en/master/model_train/train_process/model/callback.html#
-              customized-training-termination-time>`_
         """
         self._stop_requested = True

mindspore/train/callback/_checkpoint.py CHANGED Viewed

@@ -28,15 +28,12 @@ from mindspore.train.serialization import save_checkpoint, _save_graph, _wait_as
     _wait_async_thread_save_ckpt, _check_async_save
 from mindspore.parallel._cell_wrapper import destroy_allgather_cell
 from mindspore.parallel._recovery_context import _set_recovery_context, _get_recovery_context
-from mindspore.parallel._auto_parallel_context import _get_auto_parallel_context
-from mindspore.parallel._utils import _get_device_num
-from mindspore.communication.management import get_rank
-from mindspore.train._utils import get_parameter_redundancy, remove_param_redundancy
-from mindspore.train.callback._callback import Callback, set_cur_net
+from mindspore.communication.management import get_rank, get_group_size
+from mindspore.train._utils import get_parameter_redundancy, remove_param_redundancy, _get_pp_size_from_redundancy_map
+from mindspore.train.callback._callback import Callback
 from mindspore.common.tensor import Tensor
 from mindspore.common.parameter import Parameter
 from mindspore.common.generator import Generator
-from mindspore.common.api import _cell_graph_executor
 from mindspore._c_expression import collect_host_info, get_clock_syscnt
 _cur_dir = os.getcwd()
@@ -87,7 +84,7 @@ def _chg_ckpt_file_name_if_same_exist(directory, prefix, exception=False):
         name_ext = os.path.splitext(filename)
         if exception and filename[-16:] != "_breakpoint.ckpt":
             continue
-        if not exception and (name_ext[-1] != ".ckpt" or filename[-16:] == "_breakpoint.ckpt"):
+        if not exception and (name_ext[-1] not in (".ckpt", ".safetensors") or filename[-16:] == "_breakpoint.ckpt"):
             continue
         # find same prefix file
         if filename.find(prefix) == 0 and not filename[pre_len].isalpha():
@@ -106,10 +103,10 @@ def _chg_ckpt_file_name_if_same_exist(directory, prefix, exception=False):
     return prefix
-def _check_format_and_other_params(format, enc_key, enc_mode, crc_check=False, async_save=False, exception_save=False,
+def _check_format_and_other_params(format, enc_key, enc_mode, crc_check=False, exception_save=False,
                                    map_param_inc=False, global_step_num=None):
-    param_not_default = (enc_key is not None or enc_mode != "AES-GCM" or crc_check or async_save
-                         or exception_save or map_param_inc or global_step_num is not None)
+    param_not_default = (enc_key is not None or enc_mode != "AES-GCM" or crc_check or exception_save or map_param_inc
+                         or global_step_num is not None)
     if format == "safetensors" and param_not_default:
         raise ValueError("For 'save_checkpoint', when format is 'safetensors', other param must be default.")
@@ -139,9 +136,9 @@ class CheckpointConfig:
         integrated_save (bool): Whether to merge and save the split Tensor in the automatic parallel scenario.
             Integrated save function is only supported in automatic parallel scene, not supported
             in manual parallel. Default: ``True`` .
-        async_save (Union[bool, str]):Whether to use asynchronous saving of the checkpoint file, if True,
-                                    the asynchronous thread is used by default. If the type is string,
-                                    the method of asynchronous saving, it can be "process" or "thread".
+        async_save (Union[bool, str], optional):Whether to use asynchronous saving of the checkpoint file or
+                                    safetensors file, if True, the asynchronous thread is used by default. If the type
+                                    is string, the method of asynchronous saving, it can be "process" or "thread".
                                     Default: ``False`` .
         saved_network (Cell): Network to be saved in checkpoint file. If the saved_network has no relation
             with the network in training, the initial value of saved_network will be saved. Default: ``None`` .
@@ -261,8 +258,7 @@ class CheckpointConfig:
         self.enable_redundance = kwargs.get('enable_redundance', False)
         self.remove_redundancy = Validator.check_isinstance('remove_redundancy', remove_redundancy, bool)
-        _check_format_and_other_params(format, enc_key, enc_mode, crc_check, async_save, exception_save,
-                                       self._map_param_inc)
+        _check_format_and_other_params(format, enc_key, enc_mode, crc_check, exception_save, self._map_param_inc)
     @property
     def save_checkpoint_steps(self):
@@ -452,8 +448,9 @@ class ModelCheckpoint(Callback):
     Note:
         In the distributed training scenario, please specify different directories for each training process
         to save the checkpoint file. Otherwise, the training may fail.
-        If this callback is used in the `model` function, the checkpoint file will saved
-        parameters of the optimizer by default.
+        If this callback is used in the
+        `Model <https://www.mindspore.cn/docs/en/master/api_python/train/mindspore.train.Model.html>`_ function,
+        the checkpoint file will saved parameters of the optimizer by default.
     Args:
         prefix (Union[str, callable object]): The prefix name or callable object to generate name of checkpoint files.
@@ -514,7 +511,7 @@ class ModelCheckpoint(Callback):
         if callable(prefix):
             self._prefix_func = prefix
-        if _get_recovery_context("enable_recovery"):
+        if context.get_context("device_target") == "GPU" and _get_recovery_context("enable_recovery"):
             _set_recovery_context(ckpt_path=self._directory)
         if config is None:
@@ -556,19 +553,17 @@ class ModelCheckpoint(Callback):
             from aiturbo.checkpoint import aiturbo_mindspore as aiturbo
             ckpt_storage_path = self._directory
             rank_id = get_rank()
-            stage_num = _get_auto_parallel_context("pipeline_stages")
-            stage_rank_num = _get_device_num() // stage_num
+            device_num = get_group_size()
             param_layout = cb_params.train_network.parameter_layout_dict
             if not param_layout:
-                layout = {"stage_num": stage_num, "stage_rank_num": stage_rank_num, "stage_layout": None}
+                layout = {"stage_num": 1, "stage_rank_num": device_num, "stage_layout": None}
                 aiturbo.init(ckpt_storage_path, rank_id, layout, None, False, None)
             else:
-                device_num = _get_device_num()
-                chunk_size = device_num // stage_num
-                initial_rank = (rank_id // chunk_size) * chunk_size
-                param_redundancy_dict = get_parameter_redundancy(param_layout, initial_rank)
+                param_redundancy_dict = get_parameter_redundancy(param_layout)
+                pp_size = _get_pp_size_from_redundancy_map(param_redundancy_dict)
+                stage_num = device_num // pp_size
                 dp, _ = _get_dp_tp_from_layout(param_redundancy_dict)
-                layout = {"stage_num": stage_num, "stage_rank_num": stage_rank_num,
+                layout = {"stage_num": stage_num, "stage_rank_num": pp_size,
                           "stage_layout": param_redundancy_dict}
                 single_params = remove_param_redundancy(param_redundancy_dict)
                 single_params = {device_id: list(params) for device_id, params in single_params.items()}
@@ -684,12 +679,6 @@ class ModelCheckpoint(Callback):
             self._last_time_for_keep = time.time()
             self._last_triggered_step = cb_params.cur_step_num
-            # TODO(MS_DISABLE_REF_MODE): Delete when remove MS_DISABLE_REF_MODE env.
-            if context.get_context("enable_ge") and os.getenv('MS_DISABLE_REF_MODE') \
-                    and context.get_context("mode") == context.GRAPH_MODE:
-                set_cur_net(cb_params.train_network)
-                cb_params.train_network.add_flags(ge_sync_data=True)
-                _cell_graph_executor(cb_params.train_network, phase='save')
             self._append_dict_content(cb_params.cur_epoch_num, cb_params.cur_step_num)
             network = self._config.saved_network if self._config.saved_network is not None else cb_params.train_network
             if os.getenv("AITURBO") == "1":
@@ -698,18 +687,13 @@ class ModelCheckpoint(Callback):
                                 crc_check=self._config.crc_check, incremental=self._map_param_inc,
                                 global_step_num=cb_params.cur_step_num)
             elif self._config.remove_redundancy:
-                parallel_mode = context.get_auto_parallel_context("parallel_mode")
-                if parallel_mode == "stand_alone":
+                if get_group_size() == 1:
                     raise TypeError(f"The deduplication feature for saving checkpoint can only be used "
-                                    f"in parallel scenarios, but got {parallel_mode}.")
+                                    f"in parallel scenarios, but got 'stand_alone'.")
                 param_layout = network.parameter_layout_dict
                 rank_id = get_rank()
                 if param_layout:
-                    device_num = _get_device_num()
-                    stage_num = _get_auto_parallel_context("pipeline_stages")
-                    chunk_size = device_num // stage_num
-                    initial_rank = (rank_id // chunk_size) * chunk_size
-                    param_redundancy_dict = get_parameter_redundancy(param_layout, initial_rank)
+                    param_redundancy_dict = get_parameter_redundancy(param_layout)
                     single_params = remove_param_redundancy(param_redundancy_dict)
                     save_param_names = single_params.get(rank_id)
                     param_layout_set = set(param_layout.keys())

mindspore/train/callback/_cluster_monitor.py CHANGED Viewed

@@ -24,9 +24,8 @@ from threading import RLock
 from mindspore.train.callback._callback import Callback
 from mindspore.communication.management import get_rank, get_local_rank
 from mindspore import log as logger
-from mindspore.parallel._auto_parallel_context import _get_auto_parallel_context
 from mindspore.parallel._utils import _get_device_num
-from mindspore.train._utils import get_parameter_redundancy
+from mindspore.train._utils import get_parameter_redundancy, _get_pp_size_from_redundancy_map
 _perf_mutex = RLock()
@@ -42,7 +41,7 @@ def _get_dp_tp_from_redundancy(redundancy_tuple):
     return dp, tp
-def _get_dp_tp_from_layout(parameter_layout_dict, initial_rank=0):
+def _get_dp_tp_from_layout(parameter_layout_dict, initial_rank=None):
     """From layout dict get dp and tp"""
     tp = []
     dp = []
@@ -132,21 +131,9 @@ class ClusterMonitor(Callback):
         self.full_path = self.log_path + self.log_name
         self.write_dp_tp_flag = True
-        self.initial_rank = 0
     def begin(self, run_context):
         _remove_pre_log()
-        pp_num = _get_auto_parallel_context("pipeline_stages")
-        device_num = _get_device_num()
-        original_list = list(range(device_num))
-        chunk_size = device_num // pp_num
-        split_pp_lists = []
-        for i in range(0, device_num, chunk_size):
-            end_index = i + chunk_size if i + chunk_size <= device_num else device_num
-            split_pp_lists.append(original_list[i:end_index])
-        self.initial_rank = (self.global_rank // chunk_size) * chunk_size
         with _perf_mutex:
             dir_path = os.path.dirname(self.full_path)
             if not os.path.exists(dir_path):
@@ -157,8 +144,6 @@ class ClusterMonitor(Callback):
             with open(self.full_path, 'w') as file:
                 log_message = f'UUID:{self.uuid_value}\nFRAMEWORK:{self.frame_work}\nGLOBAL RANKID:{self.global_rank}\n'
                 file.write(log_message)
-                for _, split_pp_list in enumerate(split_pp_lists):
-                    file.write(f'PP:{split_pp_list}\n')
             os.chmod(self.full_path, stat.S_IRUSR)
     def step_begin(self, run_context):
@@ -183,10 +168,21 @@ class ClusterMonitor(Callback):
         if self.enabled and self.enabled_dtp_group and self.write_dp_tp_flag:
             cb_params = run_context.original_args()
             param_layout_dict = cb_params.train_network.parameter_layout_dict
-            dp, tp = _get_dp_tp_from_layout(param_layout_dict, self.initial_rank)
+            device_num = _get_device_num()
+            original_list = list(range(device_num))
+            param_redundancy_dict = get_parameter_redundancy(param_layout_dict)
+            pp_size = _get_pp_size_from_redundancy_map(param_redundancy_dict)
+            split_pp_lists = []
+            for i in range(0, device_num, pp_size):
+                end_index = i + pp_size if i + pp_size <= device_num else device_num
+                split_pp_lists.append(original_list[i:end_index])
+            dp, tp = _get_dp_tp_from_layout(param_layout_dict)
             with _perf_mutex:
                 os.chmod(self.full_path, stat.S_IWUSR)
                 with open(self.full_path, 'a') as file:
+                    for _, split_pp_list in enumerate(split_pp_lists):
+                        file.write(f'PP:{split_pp_list}\n')
                     for dp_value in dp:
                         file.write(f'dp:{dp_value}\n')
                     for tp_value in tp:

mindspore/train/callback/_flops_collector.py CHANGED Viewed

@@ -89,7 +89,7 @@ class FlopsUtilizationCollector(Callback):
         Train per step time: 135.572 ms, mfu:0.47% hfu:0.47%
         Train per step time: 1.317 ms, mfu:48.59% hfu:48.59%
     """
-    def __init__(self, data_size=None, computility=1, full_flops=True, enable_ma_collector=False):
+    def __init__(self, data_size, computility=1, full_flops=True, enable_ma_collector=False):
         super(FlopsUtilizationCollector, self).__init__()
         self.step_time = time.time()
         self.computility = computility
@@ -110,8 +110,7 @@ class FlopsUtilizationCollector(Callback):
         self.batch_step_size = None
         Validator.check_bool(full_flops, "full_flops")
         Validator.check_bool(enable_ma_collector, "enable_ma_collector")
-        if data_size:
-            Validator.check_positive_int(data_size, "data_size")
+        Validator.check_positive_int(data_size, "data_size")
     def step_begin(self, run_context):
         """

mindspore/train/callback/_history.py CHANGED Viewed

@@ -25,10 +25,13 @@ class History(Callback):
     """
     Records the network outputs and metrics information into a `History` object.
-    The network outputs information will be the loss value if not custimizing the train network or eval network;
-    if the custimized network returns a `Tensor` or `numpy.ndarray`, the mean value of network output
-    will be recorded, if the custimized network returns a `tuple` or `list`, the first element of network
-    outputs will be recorded.
+    - The network outputs information will be the loss value if not custimizing the train network or eval network;
+    - If the train network or eval network is custimized:
+      - if the custimized network returns a `Tensor` or `numpy.ndarray`, the mean value of network output
+        will be recorded.
+      - if the custimized network returns a `tuple` or `list`, the first element of network
+        outputs will be recorded.
     Note:
         Normally used in :func:`mindspore.train.Model.train` or :func:`mindspore.train.Model.fit`.

mindspore/train/callback/_lambda_callback.py CHANGED Viewed

@@ -36,8 +36,8 @@ class LambdaCallback(Callback):
         on_train_step_end (Function): called at each train step end. Default: ``None`` .
         on_train_begin (Function): called at the beginning of model train. Default: ``None`` .
         on_train_end (Function): called at the end of model train. Default: ``None`` .
-        on_eval_epoch_begin (Function): called at eval epoch begin. Default: ``None`` .
-        on_eval_epoch_end (Function): called at eval epoch end. Default: ``None`` .
+        on_eval_epoch_begin (Function): called at each eval epoch begin. Default: ``None`` .
+        on_eval_epoch_end (Function): called at each eval epoch end. Default: ``None`` .
         on_eval_step_begin (Function): called at each eval step begin. Default: ``None`` .
         on_eval_step_end (Function): called at each eval step end. Default: ``None`` .
         on_eval_begin (Function): called at the beginning of model eval. Default: ``None`` .

mindspore/train/callback/_landscape.py CHANGED Viewed

@@ -256,9 +256,6 @@ class SummaryLandscape:
         """
         Clean the checkpoint.
-        Tutorial Examples:
-            - `Training Optimization Process Visualization
-              <https://www.mindspore.cn/mindinsight/docs/en/master/landscape.html>`_
         """
         shutil.rmtree(self._ckpt_dir, ignore_errors=True)

mindspore/train/callback/_loss_monitor.py CHANGED Viewed

@@ -93,7 +93,8 @@ class LossMonitor(Callback):
     def on_train_epoch_end(self, run_context):
         """
-        When LossMonitor used in `model.fit`, print eval metrics at the end of epoch if current epoch
+        When LossMonitor used in :func:`mindspore.train.Model.fit`, print eval metrics
+        at the end of epoch if current epoch
         should do evaluation.
         Args:

mindspore/train/callback/_on_request_exit.py CHANGED Viewed

@@ -26,6 +26,7 @@ from mindspore.common.tensor import Tensor
 from mindspore.train._utils import _make_directory
 from mindspore import _checkparam as Validator
 from mindspore.train.serialization import load_checkpoint, save_checkpoint, export
+from mindspore.communication.management import get_group_size
 from mindspore.train.callback._callback import Callback
 from mindspore.parallel._utils import _get_parallel_mode
 from mindspore.context import ParallelMode
@@ -37,7 +38,7 @@ class OnRequestExit(Callback):
     Register OnRequestExit Callback before training, when the user want to exit the training process
     and save the training data, could send the registered exit signal 'sig' to the training process or modify the
-    'GracefulExit' that a key in the json file specified by the 'config_file' to '1'.
+    'GracefulExit' that a key in the JSON file specified by the 'config_file' to '1'.
     After the training process executes the current step, saves the current training status,
     including checkpoint and mindir, and then exit the training process.
@@ -58,7 +59,7 @@ class OnRequestExit(Callback):
         ValueError: If the 'save_mindir' is not a bool.
         ValueError: If the 'file_name' is not a str.
         ValueError: If the 'directory' is not a str.
-        ValueError: If the 'sig' is not an int or the 'sig' is signal.SIGKILL.
+        ValueError: If the 'sig' is not an int or the 'sig' is ``signal.SIGTERM``.
     Examples:
         >>> from mindspore import nn
@@ -92,10 +93,8 @@ class OnRequestExit(Callback):
         self.key = "GracefulExit"
         self.remote_config_file = config_file  # used config file to save checkpoint and exit training process
         self.use_graceful = os.environ.get("MS_ENABLE_GRACEFUL_EXIT") == "1"
-        self.is_distributed = _get_parallel_mode() != ParallelMode.STAND_ALONE
+        self.is_distributed = get_group_size() > 1
         self.integrated_save = True
-        if self.is_distributed:
-            self.integrated_save = _get_parallel_mode() == ParallelMode.AUTO_PARALLEL
         self.stop_train = False
         self.need_do_step_end = False
         if self.save_ckpt or self.save_mindir:
@@ -250,6 +249,8 @@ class OnRequestExit(Callback):
                         else:
                             global_step = int(call_params.network.optimizer.global_step.data)
                         append_dict["global_step"] = global_step
+                        if self.is_distributed:
+                            self.integrated_save = _get_parallel_mode() == ParallelMode.AUTO_PARALLEL
                         save_checkpoint(net, self.train_name, integrated_save=self.integrated_save,
                                         append_dict=append_dict)
                     if self.save_mindir:

mindspore/train/callback/_reduce_lr_on_plateau.py CHANGED Viewed

@@ -63,12 +63,17 @@ class ReduceLROnPlateau(Callback):
             will be reduced. Default: ``10`` .
         verbose (bool): If False: quiet, if True: print related information.
             Default: ``False`` .
-        mode (str): one of `{'auto', 'min', 'max'}`. In "min" mode,
-            the learning rate will be reduced when the
-            quantity monitored has stopped decreasing; in "max" mode it will be
-            reduced when the quantity monitored has stopped increasing; in "auto"
-            mode, the direction is automatically inferred from the name of the
-            monitored quantity. Default: ``'auto'`` .
+        mode (str): one of `{'auto', 'min', 'max'}`. Default: ``'auto'`` .
+            - In ``'min'`` mode,
+              the learning rate will be reduced when the
+              quantity monitored has stopped decreasing.
+            - In ``'max'`` mode it will be
+              reduced when the quantity monitored has stopped increasing.
+            - In ``'auto'``
+              mode, the direction is automatically inferred from the name of the
+              monitored quantity.
         min_delta (float): threshold for measuring the new optimum, to only focus on
             significant changes. Default: ``1e-4`` .
         cooldown (int): number of epochs to wait before resuming normal operation after

mindspore/train/callback/_summary_collector.py CHANGED Viewed

@@ -107,22 +107,18 @@ class SummaryCollector(Callback):
               The first output will be treated as the loss and it will be averaged. Default: ``True`` .
             - collect_graph (bool): Whether to collect the computational graph. Currently, only
               training computational graph is collected. Default: ``True`` .
-            - collect_train_lineage (bool): Whether to collect lineage data for the training phase,
-              this field will be displayed on the `lineage page \
-              <https://www.mindspore.cn/mindinsight/docs/en/master/lineage_and_scalars_comparison.html>`_
-              of MindInsight. Default: ``True`` .
-            - collect_eval_lineage (bool): Whether to collect lineage data for the evaluation phase,
-              this field will be displayed on the `lineage page
-              <https://www.mindspore.cn/mindinsight/docs/en/master/lineage_and_scalars_comparison.html>`_
-              of MindInsight. Default: ``True`` .
+            - collect_train_lineage (bool): Whether to collect lineage data for the training phase.
+              Default: ``True`` .
+            - collect_eval_lineage (bool): Whether to collect lineage data for the evaluation phase.
+              Default: ``True`` .
             - collect_input_data (bool): Whether to collect dataset for each training.
               Currently only image data is supported.
               If there are multiple columns of data in the dataset, the first column should be image data.
               Default: ``True`` .
             - collect_dataset_graph (bool): Whether to collect dataset graph for the training phase.
               Default: ``True`` .
-            - histogram_regular (Union[str, None]): Collect weight and bias for parameter distribution page
-              and displayed in MindInsight. This field allows regular strings to control which parameters to collect.
+            - histogram_regular (Union[str, None]): Collect weight and bias for parameter distribution page.
+              This field allows regular strings to control which parameters to collect.
               It is not recommended to collect too many parameters at once, as it can affect performance.
               Note that if you collect too many parameters and run out of memory, the training will fail.
               Default: ``None`` , it means only the first five parameters are collected.
@@ -153,8 +149,7 @@ class SummaryCollector(Callback):
             True: it means that after specified data is set, non-specified data is collected as the default behavior.
             False: it means that after specified data is set, only the specified data is collected,
             and the others are not collected. Default: ``True`` .
-        custom_lineage_data (Union[dict, None]): Allows you to customize the data and present it on the MingInsight
-            `lineage page <https://www.mindspore.cn/mindinsight/docs/en/master/lineage_and_scalars_comparison.html>`_ .
+        custom_lineage_data (Union[dict, None]): Allows you to customize the data.
             In the custom data, the type of the key supports str, and the type of value supports str, int
             and float. Default: ``None`` , it means there is no custom data.
         collect_tensor_freq (Optional[int]): The same semantics as the `collect_freq`, but controls TensorSummary only.
@@ -168,7 +163,7 @@ class SummaryCollector(Callback):
             affect the number of steps TensorSummary will be collected.
             Default: ``None`` , which means to follow the behavior as described above.
         max_file_size (Optional[int]): The maximum size in bytes of each file that can be written to the disk.
-            For example, to write not larger than 4GB, specify `max_file_size=4*1024**3`.
+            For example, to write not larger than 4GB, specify `max_file_size=4*1024*3`.
             Default: ``None`` , which means no limit.
         export_options (Union[None, dict]): Perform custom operations on the export data.
             Note that the size of export files is not limited by the max_file_size.

mindspore/train/callback/_time_monitor.py CHANGED Viewed

@@ -28,7 +28,8 @@ class TimeMonitor(Callback):
     Args:
         data_size (int): How many steps are the intervals between print information each time.
             if the program get `batch_num` during training, `data_size` will be set to `batch_num`,
-            otherwise `data_size` will be used. Default: ``None`` .
+            otherwise `data_size` will be used. If the program does not get `batch_num` during training,
+            meanwhile `data_size` does not set, the program will report an error. Default: ``None`` .
         data_time (bool): Whether to show the average time of fetching data in Host.
             Note that data fetch and network compute are processed sequentially in non dataset sink mode, while