PyPI - mindspore - Versions diffs - 2.4.10__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl - Mend

mindspore 2.4.10__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (602) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +13 -6
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +3 -0
mindspore/_checkparam.py +3 -38
mindspore/_deprecated/__init__.py +17 -0
mindspore/_deprecated/jit.py +198 -0
mindspore/_extends/builtin_operations.py +1 -1
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/__init__.py +6 -7
mindspore/_extends/parse/compile_config.py +83 -0
mindspore/_extends/parse/deprecated/__init__.py +0 -0
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
mindspore/_extends/parse/parser.py +47 -198
mindspore/_extends/parse/resources.py +1 -5
mindspore/_extends/parse/standard_method.py +229 -99
mindspore/_extends/pijit/__init__.py +2 -2
mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
mindspore/_extends/pijit/tensor_func_list.py +27 -0
mindspore/_extends/utils.py +1 -1
mindspore/amp.py +11 -5
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/__init__.py +2 -2
mindspore/boost/base.py +3 -7
mindspore/boost/boost_cell_wrapper.py +138 -43
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +6 -3
mindspore/common/_grad_function.py +56 -0
mindspore/common/_pijit_context.py +14 -5
mindspore/common/_register_for_tensor.py +1 -2
mindspore/common/_stub_tensor.py +30 -14
mindspore/common/_tensor_cpp_method.py +17 -0
mindspore/common/_tensor_docs.py +4760 -0
mindspore/common/api.py +480 -372
mindspore/common/auto_dynamic_shape.py +41 -44
mindspore/common/dtype.py +39 -36
mindspore/common/dump.py +9 -6
mindspore/common/file_system.py +9 -1
mindspore/common/generator.py +5 -0
mindspore/common/hook_handle.py +6 -2
mindspore/common/initializer.py +13 -10
mindspore/common/jit_begin_end.py +94 -0
mindspore/common/jit_config.py +6 -1
mindspore/common/jit_context.py +76 -0
mindspore/common/jit_trace.py +378 -0
mindspore/common/lazy_inline.py +9 -3
mindspore/common/mindir_util.py +10 -2
mindspore/common/mutable.py +5 -4
mindspore/common/parameter.py +135 -52
mindspore/common/seed.py +2 -2
mindspore/common/sparse_tensor.py +23 -17
mindspore/common/tensor.py +975 -1981
mindspore/communication/__init__.py +7 -5
mindspore/communication/_comm_helper.py +52 -2
mindspore/communication/comm_func.py +240 -181
mindspore/communication/management.py +95 -26
mindspore/context.py +324 -573
mindspore/dataset/__init__.py +65 -37
mindspore/dataset/audio/__init__.py +2 -8
mindspore/dataset/audio/transforms.py +3 -17
mindspore/dataset/callback/ds_callback.py +2 -1
mindspore/dataset/core/config.py +87 -6
mindspore/dataset/engine/cache_admin.py +3 -3
mindspore/dataset/engine/cache_client.py +6 -5
mindspore/dataset/engine/datasets.py +292 -267
mindspore/dataset/engine/datasets_audio.py +22 -8
mindspore/dataset/engine/datasets_standard_format.py +46 -27
mindspore/dataset/engine/datasets_text.py +78 -48
mindspore/dataset/engine/datasets_user_defined.py +183 -117
mindspore/dataset/engine/datasets_vision.py +120 -44
mindspore/dataset/engine/iterators.py +283 -63
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
mindspore/dataset/engine/obs/util.py +8 -0
mindspore/dataset/engine/queue.py +40 -0
mindspore/dataset/engine/samplers.py +289 -43
mindspore/dataset/engine/serializer_deserializer.py +3 -2
mindspore/dataset/engine/validators.py +53 -11
mindspore/dataset/text/__init__.py +7 -6
mindspore/dataset/text/transforms.py +6 -5
mindspore/dataset/text/utils.py +3 -3
mindspore/dataset/transforms/__init__.py +0 -9
mindspore/dataset/transforms/py_transforms_util.py +17 -0
mindspore/dataset/transforms/transforms.py +31 -14
mindspore/dataset/utils/browse_dataset.py +1 -1
mindspore/dataset/vision/__init__.py +2 -9
mindspore/dataset/vision/transforms.py +202 -158
mindspore/dataset/vision/utils.py +7 -5
mindspore/dataset/vision/validators.py +1 -2
mindspore/device_context/__init__.py +21 -0
mindspore/device_context/ascend/__init__.py +25 -0
mindspore/device_context/ascend/device.py +72 -0
mindspore/device_context/ascend/op_debug.py +153 -0
mindspore/device_context/ascend/op_precision.py +193 -0
mindspore/device_context/ascend/op_tuning.py +123 -0
mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
mindspore/device_context/cpu/device.py +62 -0
mindspore/device_context/cpu/op_tuning.py +43 -0
mindspore/device_context/gpu/__init__.py +21 -0
mindspore/device_context/gpu/device.py +70 -0
mindspore/device_context/gpu/op_precision.py +67 -0
mindspore/device_context/gpu/op_tuning.py +175 -0
mindspore/device_manager.py +170 -0
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/es/embedding_service.py +35 -27
mindspore/experimental/llm_boost/__init__.py +1 -0
mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +209 -0
mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
mindspore/experimental/llm_boost/register.py +1 -0
mindspore/experimental/map_parameter.py +4 -4
mindspore/experimental/optim/adadelta.py +6 -6
mindspore/experimental/optim/adagrad.py +4 -4
mindspore/experimental/optim/adam.py +7 -0
mindspore/experimental/optim/adamax.py +4 -4
mindspore/experimental/optim/adamw.py +4 -0
mindspore/experimental/optim/asgd.py +1 -1
mindspore/experimental/optim/lr_scheduler.py +73 -46
mindspore/experimental/optim/radam.py +34 -31
mindspore/experimental/optim/rprop.py +1 -1
mindspore/experimental/optim/sgd.py +1 -1
mindspore/hal/contiguous_tensors_handle.py +6 -10
mindspore/hal/device.py +55 -53
mindspore/hal/event.py +52 -52
mindspore/hal/memory.py +179 -120
mindspore/hal/stream.py +150 -109
mindspore/include/api/context.h +0 -1
mindspore/include/dataset/constants.h +7 -4
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +50 -0
mindspore/mindrecord/__init__.py +21 -8
mindspore/mindrecord/config.py +17 -316
mindspore/mindrecord/filereader.py +1 -9
mindspore/mindrecord/filewriter.py +5 -15
mindspore/mindrecord/mindpage.py +1 -9
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/__init__.py +798 -761
mindspore/mint/distributed/__init__.py +70 -4
mindspore/mint/distributed/distributed.py +2679 -44
mindspore/mint/linalg/__init__.py +8 -0
mindspore/mint/nn/__init__.py +743 -22
mindspore/mint/nn/functional.py +716 -23
mindspore/mint/nn/layer/__init__.py +21 -4
mindspore/mint/nn/layer/_functions.py +334 -0
mindspore/mint/nn/layer/activation.py +276 -1
mindspore/mint/nn/layer/basic.py +123 -0
mindspore/mint/nn/layer/conv.py +933 -0
mindspore/mint/nn/layer/normalization.py +223 -28
mindspore/mint/nn/layer/padding.py +797 -0
mindspore/mint/nn/layer/pooling.py +235 -0
mindspore/mint/optim/__init__.py +3 -1
mindspore/mint/optim/adam.py +223 -0
mindspore/mint/optim/adamw.py +26 -19
mindspore/mint/optim/sgd.py +171 -0
mindspore/mint/special/__init__.py +2 -1
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/multiprocessing/__init__.py +5 -0
mindspore/nn/__init__.py +4 -1
mindspore/nn/cell.py +1373 -192
mindspore/nn/dynamic_lr.py +2 -1
mindspore/nn/layer/activation.py +29 -27
mindspore/nn/layer/basic.py +51 -35
mindspore/nn/layer/channel_shuffle.py +3 -3
mindspore/nn/layer/container.py +1 -1
mindspore/nn/layer/conv.py +53 -42
mindspore/nn/layer/embedding.py +12 -11
mindspore/nn/layer/normalization.py +56 -49
mindspore/nn/layer/padding.py +4 -3
mindspore/nn/layer/pooling.py +120 -42
mindspore/nn/layer/rnn_cells.py +1 -1
mindspore/nn/layer/rnns.py +2 -1
mindspore/nn/layer/timedistributed.py +5 -5
mindspore/nn/layer/transformer.py +59 -36
mindspore/nn/learning_rate_schedule.py +8 -4
mindspore/nn/loss/loss.py +58 -55
mindspore/nn/optim/ada_grad.py +7 -5
mindspore/nn/optim/adadelta.py +11 -9
mindspore/nn/optim/adafactor.py +1 -1
mindspore/nn/optim/adam.py +19 -15
mindspore/nn/optim/adamax.py +8 -7
mindspore/nn/optim/adasum.py +5 -5
mindspore/nn/optim/asgd.py +3 -1
mindspore/nn/optim/ftrl.py +11 -9
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/lars.py +1 -4
mindspore/nn/optim/lazyadam.py +12 -10
mindspore/nn/optim/momentum.py +7 -6
mindspore/nn/optim/optimizer.py +3 -3
mindspore/nn/optim/proximal_ada_grad.py +12 -10
mindspore/nn/optim/rmsprop.py +13 -12
mindspore/nn/optim/rprop.py +11 -9
mindspore/nn/optim/sgd.py +9 -6
mindspore/nn/optim/tft_wrapper.py +5 -2
mindspore/nn/optim/thor.py +2 -1
mindspore/nn/probability/bijector/bijector.py +17 -11
mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
mindspore/nn/probability/bijector/invert.py +2 -2
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +3 -2
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +1 -1
mindspore/nn/probability/distribution/cauchy.py +4 -2
mindspore/nn/probability/distribution/exponential.py +6 -7
mindspore/nn/probability/distribution/gamma.py +2 -2
mindspore/nn/probability/distribution/gumbel.py +2 -2
mindspore/nn/probability/distribution/half_normal.py +5 -3
mindspore/nn/probability/distribution/logistic.py +5 -3
mindspore/nn/probability/distribution/poisson.py +1 -1
mindspore/nn/probability/distribution/uniform.py +5 -3
mindspore/nn/reinforcement/_tensors_queue.py +1 -1
mindspore/nn/reinforcement/tensor_array.py +1 -1
mindspore/nn/utils/init.py +13 -11
mindspore/nn/wrap/__init__.py +6 -6
mindspore/nn/wrap/cell_wrapper.py +181 -122
mindspore/nn/wrap/grad_reducer.py +45 -36
mindspore/nn/wrap/loss_scale.py +6 -7
mindspore/numpy/array_creations.py +63 -65
mindspore/numpy/array_ops.py +149 -144
mindspore/numpy/logic_ops.py +41 -42
mindspore/numpy/math_ops.py +361 -359
mindspore/numpy/utils.py +17 -18
mindspore/numpy/utils_const.py +5 -6
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +5 -3
mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
mindspore/ops/_op_impl/cpu/__init__.py +1 -0
mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
mindspore/ops/_register_for_op.py +0 -11
mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
mindspore/ops/_vmap/vmap_array_ops.py +52 -25
mindspore/ops/_vmap/vmap_base.py +0 -2
mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
mindspore/ops/_vmap/vmap_math_ops.py +15 -16
mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
mindspore/ops/auto_generate/__init__.py +4 -3
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +258 -46
mindspore/ops/auto_generate/gen_extend_func.py +757 -185
mindspore/ops/auto_generate/gen_ops_def.py +4197 -2243
mindspore/ops/auto_generate/gen_ops_prim.py +16976 -6055
mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
mindspore/ops/composite/__init__.py +2 -1
mindspore/ops/composite/base.py +20 -25
mindspore/ops/composite/math_ops.py +6 -16
mindspore/ops/composite/multitype_ops/__init__.py +5 -2
mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
mindspore/ops/function/__init__.py +40 -2
mindspore/ops/function/_add_attr_func.py +58 -0
mindspore/ops/function/array_func.py +2089 -2403
mindspore/ops/function/clip_func.py +80 -23
mindspore/ops/function/debug_func.py +57 -57
mindspore/ops/function/grad/__init__.py +1 -0
mindspore/ops/function/grad/grad_func.py +104 -71
mindspore/ops/function/image_func.py +2 -2
mindspore/ops/function/linalg_func.py +47 -78
mindspore/ops/function/math_func.py +4351 -3813
mindspore/ops/function/nn_func.py +1712 -637
mindspore/ops/function/other_func.py +159 -1
mindspore/ops/function/parameter_func.py +18 -84
mindspore/ops/function/random_func.py +452 -387
mindspore/ops/function/reshard_func.py +4 -70
mindspore/ops/function/sparse_func.py +3 -3
mindspore/ops/function/sparse_unary_func.py +6 -6
mindspore/ops/function/spectral_func.py +25 -58
mindspore/ops/function/vmap_func.py +26 -18
mindspore/ops/functional.py +23 -7
mindspore/ops/functional_overload.py +1548 -0
mindspore/ops/op_info_register.py +32 -244
mindspore/ops/operations/__init__.py +23 -15
mindspore/ops/operations/_custom_ops_utils.py +235 -0
mindspore/ops/operations/_embedding_cache_ops.py +4 -4
mindspore/ops/operations/_grad_ops.py +2 -43
mindspore/ops/operations/_infer_ops.py +2 -1
mindspore/ops/operations/_inner_ops.py +43 -84
mindspore/ops/operations/_ms_kernel.py +4 -10
mindspore/ops/operations/_rl_inner_ops.py +1 -1
mindspore/ops/operations/_scalar_ops.py +3 -2
mindspore/ops/operations/_sequence_ops.py +1 -1
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/array_ops.py +81 -324
mindspore/ops/operations/comm_ops.py +154 -108
mindspore/ops/operations/custom_ops.py +298 -87
mindspore/ops/operations/debug_ops.py +157 -59
mindspore/ops/operations/inner_ops.py +7 -5
mindspore/ops/operations/linalg_ops.py +1 -57
mindspore/ops/operations/manually_defined/_inner.py +1 -1
mindspore/ops/operations/manually_defined/ops_def.py +928 -180
mindspore/ops/operations/math_ops.py +32 -234
mindspore/ops/operations/nn_ops.py +212 -531
mindspore/ops/operations/other_ops.py +62 -9
mindspore/ops/operations/random_ops.py +13 -7
mindspore/ops/operations/reshard_ops.py +1 -1
mindspore/ops/operations/sparse_ops.py +2 -2
mindspore/ops/primitive.py +66 -53
mindspore/ops/tensor_method.py +1895 -0
mindspore/ops_generate/__init__.py +0 -5
mindspore/ops_generate/aclnn/__init__.py +0 -0
mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
mindspore/ops_generate/api/__init__.py +0 -0
mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
mindspore/ops_generate/api/functions_cc_generator.py +237 -0
mindspore/ops_generate/api/gen_api.py +103 -0
mindspore/ops_generate/api/op_api_proto.py +235 -0
mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
mindspore/ops_generate/common/__init__.py +0 -0
mindspore/ops_generate/common/base_generator.py +11 -0
mindspore/ops_generate/common/gen_constants.py +91 -0
mindspore/ops_generate/common/gen_utils.py +348 -0
mindspore/ops_generate/common/op_proto.py +473 -0
mindspore/ops_generate/common/template.py +523 -0
mindspore/ops_generate/gen_ops.py +22 -1069
mindspore/ops_generate/op_def/__init__.py +0 -0
mindspore/ops_generate/op_def/gen_op_def.py +90 -0
mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
mindspore/ops_generate/op_def/ops_def_cc_generator.py +296 -0
mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
mindspore/ops_generate/op_def_py/__init__.py +0 -0
mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
mindspore/ops_generate/pyboost/__init__.py +0 -0
mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
mindspore/ops_generate/resources/__init__.py +0 -0
mindspore/ops_generate/resources/resource_list.py +30 -0
mindspore/ops_generate/resources/resource_loader.py +36 -0
mindspore/ops_generate/resources/resource_manager.py +64 -0
mindspore/ops_generate/resources/yaml_loader.py +88 -0
mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
mindspore/parallel/__init__.py +7 -3
mindspore/parallel/_auto_parallel_context.py +159 -40
mindspore/parallel/_cell_wrapper.py +132 -15
mindspore/parallel/_parallel_serialization.py +107 -5
mindspore/parallel/_ps_context.py +1 -1
mindspore/parallel/_recovery_context.py +7 -2
mindspore/parallel/_tensor.py +142 -18
mindspore/parallel/_utils.py +199 -23
mindspore/parallel/algo_parameter_config.py +4 -4
mindspore/parallel/auto_parallel.py +732 -0
mindspore/parallel/checkpoint_convert.py +159 -0
mindspore/parallel/checkpoint_transform.py +700 -35
mindspore/parallel/cluster/process_entity/_api.py +276 -50
mindspore/parallel/cluster/process_entity/_utils.py +41 -6
mindspore/parallel/cluster/run.py +21 -4
mindspore/parallel/function/__init__.py +24 -0
mindspore/parallel/function/reshard_func.py +258 -0
mindspore/parallel/nn/__init__.py +25 -0
mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
mindspore/parallel/parameter_broadcast.py +25 -14
mindspore/parallel/shard.py +137 -59
mindspore/parallel/transform_safetensors.py +364 -305
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +22 -5
mindspore/profiler/analysis/__init__.py +0 -0
mindspore/profiler/analysis/parser/__init__.py +0 -0
mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
mindspore/profiler/analysis/parser/base_parser.py +158 -0
mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +109 -0
mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
mindspore/profiler/analysis/task_manager.py +131 -0
mindspore/profiler/analysis/time_converter.py +84 -0
mindspore/profiler/analysis/viewer/__init__.py +0 -0
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
mindspore/profiler/analysis/work_flow.py +73 -0
mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
mindspore/profiler/common/command_executor.py +90 -0
mindspore/profiler/common/constant.py +186 -3
mindspore/profiler/common/file_manager.py +208 -0
mindspore/profiler/common/log.py +130 -0
mindspore/profiler/common/msprof_cmd_tool.py +221 -0
mindspore/profiler/common/path_manager.py +395 -0
mindspore/profiler/common/process_bar.py +168 -0
mindspore/profiler/common/process_pool.py +9 -3
mindspore/profiler/common/profiler_context.py +500 -0
mindspore/profiler/common/profiler_info.py +304 -0
mindspore/profiler/common/profiler_meta_data.py +74 -0
mindspore/profiler/common/profiler_output_path.py +284 -0
mindspore/profiler/common/profiler_parameters.py +251 -0
mindspore/profiler/common/profiler_path_manager.py +179 -0
mindspore/profiler/common/record_function.py +76 -0
mindspore/profiler/common/tlv_decoder.py +76 -0
mindspore/profiler/common/util.py +75 -2
mindspore/profiler/dynamic_profiler.py +341 -75
mindspore/profiler/envprofiler.py +163 -0
mindspore/profiler/experimental_config.py +197 -0
mindspore/profiler/mstx.py +242 -0
mindspore/profiler/platform/__init__.py +21 -0
mindspore/profiler/platform/base_profiler.py +40 -0
mindspore/profiler/platform/cpu_profiler.py +124 -0
mindspore/profiler/platform/gpu_profiler.py +74 -0
mindspore/profiler/platform/npu_profiler.py +335 -0
mindspore/profiler/profiler.py +1073 -90
mindspore/profiler/profiler_action_controller.py +187 -0
mindspore/profiler/profiler_interface.py +118 -0
mindspore/profiler/schedule.py +243 -0
mindspore/rewrite/api/node.py +15 -13
mindspore/rewrite/api/symbol_tree.py +2 -3
mindspore/run_check/_check_version.py +27 -20
mindspore/run_check/run_check.py +1 -1
mindspore/runtime/__init__.py +37 -0
mindspore/runtime/device.py +27 -0
mindspore/runtime/event.py +209 -0
mindspore/runtime/executor.py +177 -0
mindspore/runtime/memory.py +416 -0
mindspore/runtime/stream.py +460 -0
mindspore/runtime/thread_bind_core.py +401 -0
mindspore/safeguard/rewrite_obfuscation.py +12 -9
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +8 -8
mindspore/train/_utils.py +96 -27
mindspore/train/amp.py +9 -5
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +2 -16
mindspore/train/callback/_checkpoint.py +53 -55
mindspore/train/callback/_cluster_monitor.py +14 -18
mindspore/train/callback/_early_stop.py +1 -1
mindspore/train/callback/_flops_collector.py +103 -68
mindspore/train/callback/_history.py +8 -5
mindspore/train/callback/_lambda_callback.py +2 -2
mindspore/train/callback/_landscape.py +0 -3
mindspore/train/callback/_loss_monitor.py +2 -1
mindspore/train/callback/_on_request_exit.py +6 -5
mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
mindspore/train/callback/_summary_collector.py +52 -19
mindspore/train/callback/_time_monitor.py +2 -1
mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +228 -108
mindspore/train/data_sink.py +25 -2
mindspore/train/dataset_helper.py +15 -16
mindspore/train/loss_scale_manager.py +8 -7
mindspore/train/metrics/accuracy.py +3 -3
mindspore/train/metrics/confusion_matrix.py +9 -9
mindspore/train/metrics/error.py +3 -3
mindspore/train/metrics/hausdorff_distance.py +4 -4
mindspore/train/metrics/mean_surface_distance.py +3 -3
mindspore/train/metrics/metric.py +0 -12
mindspore/train/metrics/occlusion_sensitivity.py +4 -2
mindspore/train/metrics/precision.py +11 -10
mindspore/train/metrics/recall.py +9 -9
mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
mindspore/train/mind_ir_pb2.py +174 -46
mindspore/train/model.py +269 -136
mindspore/train/serialization.py +622 -978
mindspore/train/summary/_summary_adapter.py +2 -2
mindspore/train/summary/summary_record.py +2 -3
mindspore/train/train_thor/model_thor.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +6 -3
mindspore/utils/dryrun.py +140 -0
mindspore/utils/hooks.py +81 -0
mindspore/utils/runtime_execution_order_check.py +552 -0
mindspore/utils/utils.py +138 -4
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/METADATA +3 -3
{mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/RECORD +587 -418
{mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/entry_points.txt +1 -1
mindspore/_install_custom.py +0 -43
mindspore/common/_register_for_adapter.py +0 -74
mindspore/common/_tensor_overload.py +0 -139
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
mindspore/ops_generate/gen_aclnn_implement.py +0 -263
mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
mindspore/ops_generate/gen_pyboost_func.py +0 -1052
mindspore/ops_generate/gen_utils.py +0 -209
mindspore/ops_generate/op_proto.py +0 -145
mindspore/ops_generate/template.py +0 -261
mindspore/profiler/envprofiling.py +0 -254
mindspore/profiler/profiling.py +0 -1926
{mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/WHEEL +0 -0
{mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/top_level.txt +0 -0

mindspore/ops/operations/nn_ops.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020-2023 Huawei Technologies Co., Ltd
+# Copyright 2020-2024 Huawei Technologies Co., Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -31,15 +31,18 @@ from mindspore.ops.primitive import PrimitiveWithInfer
 from mindspore.ops.primitive import PrimitiveWithCheck
 from mindspore.ops.primitive import prim_attr_register
 from mindspore.run_check._check_version import AscendEnvChecker
-from ..auto_generate import (CeLU, Flatten, LogSoftmax, LogSoftmaxExt, ReLU, ReLU6, Dense, Tanh,
+from mindspore._c_expression import pyboost_all_finite
+from mindspore.common._stub_tensor import _convert_stub
+from ..auto_generate import (CeLU, Flatten, LogSoftmax, LogSoftmaxExt, GLU, ReLU, ReLU6, Dense, Tanh,
                              Elu, Sigmoid, Softmax, SoftplusExt, HSwish, HSigmoid, AvgPool, BiasAdd,
                              NLLLoss, OneHot, GeLU, FastGeLU, PReLU, RmsNorm, IncreFlashAttention, MSELossExt,
                              GridSampler3D, GridSampler2D, LayerNorm, LayerNormExt, HShrink, AdamWeightDecay, Dropout,
-                             ApplyRotaryPosEmb, PagedAttention, PagedAttentionMask, ReshapeAndCache,
-                             FlashAttentionScore, Embedding, UpsampleNearest1D, UpsampleNearest2D,
+                             ApplyRotaryPosEmb, GroupTopk, PagedAttention, PagedAttentionMask, ReshapeAndCache,
+                             FlashAttentionScore, PromptFlashAttention, Embedding, UpsampleNearest1D, UpsampleNearest2D,
                              UpsampleNearest3D, UpsampleTrilinear3D,
-                             UpsampleBilinear2D, UpsampleLinear1D,
-                             BinaryCrossEntropy, BCEWithLogitsLoss, SoftShrink)
+                             SoftMarginLoss, UpsampleBilinear2D, UpsampleLinear1D,
+                             BinaryCrossEntropy, BCEWithLogitsLoss, SoftShrink, AdaptiveMaxPool2D,
+                             SmoothL1Loss)
 from .manually_defined import BatchNorm
@@ -246,78 +249,6 @@ class AdaptiveAvgPool2D(Primitive):
         self.add_prim_attr('output_size', self.output_size)
-class AdaptiveMaxPool2D(Primitive):
-    r"""
-    Performs 2D adaptive max pooling on a multi-plane input signal.
-    Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details.
-    Args:
-        output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
-            or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
-            If it is None, it means the output size is the same as the input size.
-    Inputs:
-        - **input_x** (Tensor) - The input of AdaptiveMaxPool2D, which is a 3D or 4D tensor,
-          with float16, float32 or float64 data type.
-    Outputs:
-        Tensor, with the same type as the `input_x`.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> # case 1: output_size=(None, 2)
-        >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
-        >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((None, 2))
-        >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output[0])
-        [[[[2. 3.]
-           [5. 6.]
-           [8. 9.]]
-          [[2. 3.]
-           [5. 6.]
-           [8. 9.]]
-          [[2. 3.]
-           [5. 6.]
-           [8. 9.]]]]
-        >>> # case 2: output_size=2
-        >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D(2)
-        >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output[0])
-        [[[[5. 6.]
-           [8. 9.]]
-          [[5. 6.]
-           [8. 9.]]
-          [[5. 6.]
-           [8. 9.]]]]
-        >>> # case 3: output_size=(1, 2)
-        >>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((1, 2))
-        >>> output = adaptive_max_pool_2d(input_x)
-        >>> print(output[0])
-        [[[[8. 9.]]
-          [[8. 9.]]
-          [[8. 9.]]]]
-    """
-    @prim_attr_register
-    def __init__(self, output_size):
-        """Initialize AdaptiveMaxPool2D."""
-        validator.check_value_type("output_size", output_size, [int, tuple], self.name)
-        if isinstance(output_size, tuple):
-            validator.check_int(len(output_size), 2, validator.EQ,
-                                'length of output_size', self.name)
-        self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
-        self.output_size = (-1 if self.output_size[0] is None else self.output_size[0],
-                            -1 if self.output_size[1] is None else self.output_size[1])
-        for size in self.output_size:
-            validator.check_number("output_size", size, -1, validator.GE, None)
-        self.add_prim_attr('output_size', self.output_size)
 class AdaptiveMaxPool3D(Primitive):
     r"""
     Performs 3D adaptive max pooling on a multi-plane input signal.
@@ -612,12 +543,12 @@ class InstanceNorm(PrimitiveWithInfer):
     Inputs:
         - **input_x** (Tensor) - The input of InstanceNorm, Tensor of shape :math:`(N, C)`,
           data type: float16 or float32.
-        - **gamma** (Parameter) - Scale, Tensor of shape :math:`(C,)`,
+        - **gamma** (Union[Parameter, Tensor])) - Scale, Tensor of shape :math:`(C,)`,
           data type: float32.
-        - **beta** (Parameter) - Bias, Tensor of shape :math:`(C,)`,
+        - **beta** (Union[Parameter, Tensor])) - Bias, Tensor of shape :math:`(C,)`,
           data type: float32.
-        - **mean** (Parameter) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
-        - **variance** (Parameter) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
+        - **mean** (Union[Parameter, Tensor])) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
+        - **variance** (Union[Parameter, Tensor])) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
     Outputs:
         Tuple of 3 Tensors, the normalized input, the updated parameters.
@@ -880,13 +811,13 @@ class Conv2D(Primitive):
     Inputs:
         - **x** (Tensor) - Input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or
-          :math:`(N, H_{in}, W_{in}, C_{in}, )` depending on `data_format` .
+          :math:`(N, H_{in}, W_{in}, C_{in})` depending on `data_format` .
         - **weight** (Tensor) - The convolutional kernel value, it should has shape
           :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})` .
     Outputs:
         Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`
-        or :math:`(N, H_{out}, W_{out}, C_{out}, )`.
+        or :math:`(N, H_{out}, W_{out}, C_{out})`.
         To see how different pad modes affect the output shape, please refer to
         :class:`mindspore.nn.Conv2d` for more details.
@@ -2052,17 +1983,18 @@ class Conv2DTranspose(Conv2DBackpropInput):
               If this mode is set, `pad` must be greater than or equal to 0.
             Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`.
-        pad (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the paddings
-                    of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers,
-                    the padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3]
-                    correspondingly.
-        pad_list (Union[str, None]): The pad list like (top, bottom, left, right). Default: ``None`` .
-        mode (int): Modes for different convolutions. The value is currently not used. Default: ``1`` .
-        stride (Union[int, tuple[int]]): The stride to be applied to the convolution filter. Default: ``1`` .
-        dilation (Union[int, tuple[int]]): Specifies the dilation rate to be used for the dilated convolution.
+        pad (Union[int, tuple[int]], optional): The pad value to be filled. Default: ``0`` .
+            If `pad` is an integer, the paddings
+            of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers,
+            the padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3]
+            correspondingly.
+        pad_list (Union[str, None], optional): The pad list like (top, bottom, left, right). Default: ``None`` .
+        mode (int, optional): Modes for different convolutions. The value is currently not used. Default: ``1`` .
+        stride (Union[int, tuple[int]], optional): The stride to be applied to the convolution filter. Default: ``1`` .
+        dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to be used for the dilated convolution.
             Default: ``1`` .
-        group (int): Splits input into groups. Default: ``1`` .
-        data_format (str): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
+        group (int, optional): Splits input into groups. Default: ``1`` .
+        data_format (str, optional): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
             Default is ``'NCHW'`` .
     Inputs:
@@ -2130,7 +2062,7 @@ class SoftmaxCrossEntropyWithLogits(Primitive):
         - **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`, has the same data type with `logits`.
     Outputs:
-        Tuple of 2 tensors(loss, dlogits), the `loss` shape is :math:`(N,)`,
+        Tuple of 2 tensors( `loss` , `dlogits` ), the `loss` shape is :math:`(N,)`,
         and the `dlogits` with the same shape as `logits`.
     Raises:
@@ -2164,7 +2096,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
     r"""
     Computes the softmax cross-entropy value between logits and sparse encoding labels.
-    Sets input logits as `X`, input label as `Y`, output as `loss`. Then,
+    Sets input logits as `X`, input label as `Y`, output as `loss`. The formula is as follows:
     .. math::
         \begin{array}{ll} \\
@@ -2174,7 +2106,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
         \end{array}
     Args:
-        is_grad (bool): If ``True`` , this operation returns the computed gradient. Default: ``False`` .
+        is_grad (bool, optional): If ``True`` , this operation returns the computed gradient. Default: ``False`` .
     Inputs:
         - **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
@@ -2182,7 +2114,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
           Data type must be int32 or int64.
     Outputs:
-        Tensor, if `is_grad` is False, the output tensor is the value of loss which is a scalar tensor;
+        Tensor, if `is_grad` is ``False``, the output tensor is the value of loss;
         if `is_grad` is ``True`` , the output tensor is the gradient of input with the same shape as `logits`.
     Raises:
@@ -2281,15 +2213,15 @@ class ApplyMomentum(Primitive):
     Refer to :class:`mindspore.nn.Momentum` for more details about the formula and usage.
     Args:
-        use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
+        use_locking (bool, optional): Whether to enable a lock to protect the variable and accumulation tensors
                             from being updated. Default: ``False`` .
-        use_nesterov (bool): Enable Nesterov momentum. Default: ``False`` .
-        gradient_scale (float): The scale of the gradient. Default: ``1.0`` .
+        use_nesterov (bool, optional): Enable Nesterov momentum. Default: ``False`` .
+        gradient_scale (float, optional): The scale of the gradient. Default: ``1.0`` .
     Inputs:
-        - **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float, float16,
-          int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
-        - **accumulation** (Parameter) - Accumulated gradient value by moment weight,
+        - **variable** (Union[Parameter, Tensor]) - Weights to be updated. Data type must be float64, int64, float,
+          float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
+        - **accumulation** (Union[Parameter, Tensor]) - Accumulated gradient value by moment weight,
           has the same data type with `variable`.
         - **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float,
           float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
@@ -2306,7 +2238,7 @@ class ApplyMomentum(Primitive):
     Raises:
         TypeError: If the `use_locking` or `use_nesterov` is not a bool or `gradient_scale` is not a float.
-        TypeError: If the data type of `var`, `accum` and `grad` conversion of Parameter is not supported.
+        TypeError: If the data type of `var`, `accum` and `grad` conversion is not supported.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -2354,55 +2286,6 @@ class ApplyMomentum(Primitive):
         self.add_prim_attr('side_effect_mem', True)
-class SmoothL1Loss(Primitive):
-    r"""
-    Calculate the smooth L1 loss, and the L1 loss function has robustness.
-    Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
-    Args:
-        beta (float, optional): A parameter used to control the point where the function will change between
-            L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
-        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
-            ``'sum'`` . Default: ``'none'`` .
-            - ``'none'``: no reduction will be applied.
-            - ``'mean'``: compute and return the mean of elements in the output.
-            - ``'sum'``: the output elements will be summed.
-    Inputs:
-        - **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64.
-        - **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`.
-    Outputs:
-        Tensor, loss float tensor, same shape and dtype as the `logits`.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> loss = ops.SmoothL1Loss()
-        >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
-        >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
-        >>> output = loss(logits, labels)
-        >>> print(output)
-        [0.  0.  0.5]
-    """
-    @prim_attr_register
-    def __init__(self, beta=1.0, reduction='none'):
-        """Initialize SmoothL1Loss."""
-        validator.check_value_type('beta', beta, [float], self.name)
-        validator.check('beta', beta, '', 0, validator.GT, self.name)
-        validator.check_string(
-            reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
-        self.add_prim_attr('sigma', self.beta)
-        self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
 class MultiMarginLoss(Primitive):
     r"""
     Creates a loss function that minimizes the hinge loss
@@ -2470,63 +2353,6 @@ class MultiMarginLoss(Primitive):
         return super().__call__(x, target, weight)
-class SoftMarginLoss(Primitive):
-    r"""
-    SoftMarginLoss operation.
-    Creates a criterion that optimizes a two-class classification
-    logistic loss between input tensor :math:`x` and target tensor :math:`y`
-    (containing 1 or -1).
-    .. math::
-        \text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
-    where :math:`x.nelement()` is the number of elements of x.
-    Args:
-        reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
-            ``'sum'`` . Default: ``'mean'`` .
-            - ``'none'``: no reduction will be applied.
-            - ``'mean'``: compute and return the mean of elements in the output.
-            - ``'sum'``: the output elements will be summed.
-    Inputs:
-        - **logits** (Tensor) - Predict data. Data type must be float16 or float32.
-        - **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
-    Outputs:
-        Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `logits`.
-        Otherwise, a scalar value will be returned.
-    Raises:
-        TypeError: If `logits` or `labels` is not a Tensor.
-        TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
-        ValueError: If shape of `logits` is not the same as `labels`.
-        ValueError: If `reduction` is not one of ``"none"`` , ``"mean"`` or ``"sum"`` .
-    Supported Platforms:
-        ``Ascend`` ``GPU``
-    Examples:
-        >>> import mindspore
-        >>> import numpy as np
-        >>> from mindspore import Tensor, ops
-        >>> loss = ops.SoftMarginLoss()
-        >>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
-        >>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
-        >>> output = loss(logits, labels)
-        >>> print(output)
-        0.6764238
-    """
-    @prim_attr_register
-    def __init__(self, reduction="mean"):
-        """Initialize SoftMarginLoss"""
-        self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss'])
-        self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
 class L2Loss(Primitive):
     r"""
     Calculates half of the L2 norm, but do not square the result.
@@ -2790,12 +2616,12 @@ class ApplyRMSProp(PrimitiveWithInfer):
     :math:`\eta` represents `learning_rate`. :math:`\nabla Q_{i}(w)` represents `grad`.
     .. warning::
-        Note that in dense implementation of this algorithm, "mean_square" and "moment" will update even if "grad" is 0,
-        but in this sparse implementation, "mean_square" and "moment" will not update
-        in iterations during which "grad" is 0.
+        Note that in dense implementation of this algorithm, `mean_square` and `moment` will update even if `grad` is 0,
+        but in this sparse implementation, `mean_square` and `moment` will not update
+        in iterations during which `grad` is 0.
     Args:
-        use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
+        use_locking (bool, optional): Whether to enable a lock to protect the variable and accumulation tensors
                             from being updated. Default: ``False`` .
     Inputs:
@@ -3453,7 +3279,7 @@ class ComputeAccidentalHits(Primitive):
     the weight is FLOAT_MAX. FLOAT_MAX indicates the max value in the type of Float
     Args:
-        num_true (int): The number of target classes per training example. Default: ``1`` .
+        num_true (int, optional): The number of target classes per training example. Default: ``1`` .
     Inputs:
         - **true_classes** (Tensor) - The target classes. With data type of int64
@@ -3610,11 +3436,11 @@ class Adam(Primitive):
             If ``False`` , update the gradients without using NAG. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
+        - **var** (Union[Parameter, Tensor]) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
           any number of additional dimensions. The data type can be float16 or float32.
-        - **m** (Parameter) - The 1st moment vector in the updating formula,
+        - **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
           the shape should be the same as `var`.
-        - **v** (Parameter) - the 2nd moment vector in the updating formula,
+        - **v** (Union[Parameter, Tensor]) - the 2nd moment vector in the updating formula,
           the shape should be the same as `var`.
         - **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
         - **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
@@ -3785,8 +3611,8 @@ class AdamNoUpdateParam(Primitive):
 class FusedSparseAdam(Primitive):
     r"""
-    Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation (Adam)
-    algorithm. This operator is used when the gradient is sparse.
+    Merges the duplicate value of the gradient and then updates parameters or tensors by the Adaptive Moment Estimation
+    (Adam) algorithm. This operator is used when the gradient is sparse.
     The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
@@ -3819,11 +3645,12 @@ class FusedSparseAdam(Primitive):
             If ``False`` , update the gradients without using NAG. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Parameters to be updated with float32 data type. The shape is :math:`(N, *)`
-          where :math:`*` means, any number of additional dimensions.
-        - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data type as `var`.
-        - **v** (Parameter) - The 2nd moment vector in the updating formula, has the same shape and data type as `var`.
-          Mean square gradients, has the same type as `var` with float32 data type.
+        - **var** (Union[Parameter, Tensor]) - Parameters or tensors to be updated with float32 data type. The shape is:
+          math:`(N, *)` where :math:`*` means, any number of additional dimensions.
+        - **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape and data
+          type as `var`.
+        - **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula, has the same shape and data
+          type as `var`. Mean square gradients, has the same type as `var` with float32 data type.
         - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
           The shape is :math:`(1, )`.
         - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
@@ -3841,7 +3668,7 @@ class FusedSparseAdam(Primitive):
         - **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
     Outputs:
-        Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
+        Tuple of 3 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
         - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
         - **m** (Tensor) - A Tensor with shape :math:`(1, )`.
@@ -3911,8 +3738,8 @@ class FusedSparseAdam(Primitive):
 class FusedSparseLazyAdam(Primitive):
     r"""
-    Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation (Adam)
-    algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
+    Merges the duplicate value of the gradient and then updates parameters or tensors by the Adaptive Moment Estimation
+    (Adam) algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
     original Adam algorithm, as only the current indices parameters will be updated.
     The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
@@ -3946,11 +3773,12 @@ class FusedSparseLazyAdam(Primitive):
             If ``False`` , update the gradients without using NAG. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Parameters to be updated with float32 data type. The shape is :math:`(N, *)`
-          where :math:`*` means, any number of additional dimensions.
-        - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data type as `var`.
-        - **v** (Parameter) - The 2nd moment vector in the updating formula, has the same shape and data type as `var`.
-          Mean square gradients, has the same type as `var` with float32 data type.
+        - **var** (Union[Parameter, Tensor]) - Parameters or tensors to be updated with float32 data type. The shape is:
+          math:`(N, *)` where :math:`*` means, any number of additional dimensions.
+        - **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape and data
+          type as `var`.
+        - **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula, has the same shape and data
+          type as `var`. Mean square gradients, has the same type as `var` with float32 data type.
         - **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
           The shape is :math:`(1, )`.
         - **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
@@ -3968,7 +3796,7 @@ class FusedSparseLazyAdam(Primitive):
         - **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
     Outputs:
-        Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
+        Tuple of 3 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
         - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
         - **m** (Tensor) - A Tensor with shape :math:`(1, )`.
@@ -4054,17 +3882,18 @@ class FusedSparseFtrl(Primitive):
         use_locking (bool): Use locks for updating operation if True . Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - The variable to be updated. The data type must be float32. The shape is :math:`(N, *)`
-          where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
-        - **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as `var`.
+        - **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float32. The shape is:
+          math:`(N, *)` where :math:`*` means, any number of additional dimensions.
+        - **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same type and shape as `var`.
+        - **linear** (Union[Parameter, Tensor]) - the linear coefficient to be updated, must be same type and shape as
+          `var`.
         - **grad** (Tensor) - A tensor of the same type as `var` and
           grad.shape[1:] = var.shape[1:] if var.shape > 1.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
           The type must be int32 and indices.shape[0] = grad.shape[0].
     Outputs:
-        Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
+        Tuple of 3 Tensor, this operator will update the input parameters or tensors directly, the outputs are useless.
         - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
         - **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
@@ -4151,9 +3980,10 @@ class FusedSparseProximalAdagrad(Primitive):
             Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Variable tensor to be updated, has the same shape and data type as `var`.
+        - **accum** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same shape and data type as
+          `var`.
         - **lr** (Tensor) - The learning rate value. The data type must be float32. The shape is :math:`(1, )`.
         - **l1** (Tensor) - l1 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
         - **l2** (Tensor) - l2 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
@@ -4163,7 +3993,7 @@ class FusedSparseProximalAdagrad(Primitive):
           The type must be int32 and indices.shape[0] = grad.shape[0].
     Outputs:
-        Tuple of 2 Tensors, this operator will update the input parameters directly, the outputs are useless.
+        Tuple of 2 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
         - **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
         - **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
@@ -4254,7 +4084,7 @@ class KLDivLoss(Primitive):
           or ``'sum'``.
     Args:
-        reduction (str): Specifies the reduction to be applied to the output.
+        reduction (str, optional): Specifies the reduction to be applied to the output.
             Default: ``'mean'`` .
             - ``'none'``: no reduction will be applied.
@@ -4275,7 +4105,7 @@ class KLDivLoss(Primitive):
         TypeError: If neither `logits` nor `labels` is a Tensor.
         TypeError: If dtype of `logits` or `labels` is not currently supported.
         ValueError: If shape of `logits` is not the same as `labels`.
-        RuntimeError: If `logits` or `labels` is a scalar when `reduction` is 'batchmean'.
+        RuntimeError: If `logits` or `labels` is a scalar when `reduction` is ``'batchmean'``.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -4342,11 +4172,11 @@ class ApplyAdaMax(Primitive):
     the relatively highest priority data type.
     Inputs:
-        - **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. With float32 or float16 data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape as `var`.
+        - **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape as `var`.
           With float32 or float16 data type.
-        - **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients
+        - **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula. Mean square gradients
           with the same shape as `var`. With float32 or float16 data type.
         - **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be a scalar.
           With float32 or float16 data type.
@@ -4362,7 +4192,7 @@ class ApplyAdaMax(Primitive):
           With float32 or float16 data type.
     Outputs:
-        Tuple of 3 Tensor, the updated parameters.
+        Tuple of 3 Tensor, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **m** (Tensor) - The same shape and data type as `m`.
@@ -4456,10 +4286,11 @@ class ApplyAdadelta(Primitive):
     the relatively highest priority data type.
     Inputs:
-        - **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
+        - **var** (Union[Parameter, Tensor]) - Weights to be updated. With float32 or float16 data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Accumulation to be updated, has the same shape and data type as `var`.
-        - **accum_update** (Parameter) - Accum_update to be updated, has the same shape and data type as `var`.
+        - **accum** (Union[Parameter, Tensor]) - Accumulation to be updated, has the same shape and data type as `var`.
+        - **accum_update** (Union[Parameter, Tensor]) - Accum_update to be updated, has the same shape and data type as
+          `var`.
         - **lr** (Union[Number, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
         - **rho** (Union[Number, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
         - **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar.
@@ -4467,7 +4298,7 @@ class ApplyAdadelta(Primitive):
         - **grad** (Tensor) - Gradients, has the same shape and data type as `var`.
     Outputs:
-        Tuple of 3 Tensor, the updated parameters.
+        Tuple of 3 Tensor, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -4558,14 +4389,14 @@ class ApplyAdagrad(Primitive):
         update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. With float or complex data type.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. With float or complex data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
+        - **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type.
         - **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
     Outputs:
-        Tuple of 2 Tensors, the updated parameters.
+        Tuple of 2 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -4645,15 +4476,15 @@ class ApplyAdagradV2(Primitive):
         update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. With float16 or float32 data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
+        - **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
           a scalar tensor with float16 or float32 data type.
         - **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
     Outputs:
-        Tuple of 2 Tensors, the updated parameters.
+        Tuple of 2 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -4751,14 +4582,15 @@ class SparseApplyAdagradV2(Primitive):
     Args:
         lr (float): Learning rate.
         epsilon (float): A small value added for numerical stability.
-        use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
+        use_locking (bool, optional): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
             Default: ``False`` .
-        update_slots (bool): If ``True`` , the computation logic will be different to `False`. Default: ``True`` .
+        update_slots (bool, optional): If ``True`` , the computation logic will be different to `False`.
+            Default: ``True`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
+        - **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
         - **grad** (Tensor) - Gradients has the same shape as `var` and
           :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
@@ -4766,7 +4598,7 @@ class SparseApplyAdagradV2(Primitive):
           must be unique. Otherwise, the result is unpredictable.
     Outputs:
-        Tuple of 2 tensors, the updated parameters.
+        Tuple of 2 tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -4842,13 +4674,14 @@ class ApplyProximalAdagrad(Primitive):
     the relatively highest priority data type.
     Args:
-        use_locking (bool): If ``True`` , the var and accumulation tensors will be protected from being updated.
-            Default: ``False`` .
+        use_locking (bool, optional): If ``True`` , the var and accumulation tensors will be protected
+            from being updated. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Accumulation to be updated, must have the same shape and dtype as `var`.
+        - **accum** (Union[Parameter, Tensor]) - Accumulation to be updated, must have the same shape and dtype as
+          `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. The data type must be
           float16 or float32.
         - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar. The data type must be
@@ -4858,7 +4691,7 @@ class ApplyProximalAdagrad(Primitive):
         - **grad** (Tensor) - Gradient with the same shape and dtype as `var`.
     Outputs:
-        Tuple of 2 Tensors, the updated parameters.
+        Tuple of 2 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -4943,9 +4776,9 @@ class SparseApplyProximalAdagrad(Primitive):
             Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - Variable tensor to be updated, has the same shape as `var`.
+        - **accum** (Parameterv) - Variable tensor to be updated, has the same shape as `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
           a scalar tensor with float16 or float32 data type. It must be positive.
         - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
@@ -4959,7 +4792,7 @@ class SparseApplyProximalAdagrad(Primitive):
           following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`.
     Outputs:
-        Tuple of 2 tensors, the updated parameters.
+        Tuple of 2 tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -5045,9 +4878,9 @@ class ApplyAddSign(Primitive):
     the relatively highest priority data type.
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **m** (Parameter) - Variable tensor to be updated, has the same data type as `var`.
+        - **m** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same data type as `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
         - **alpha** (Union[Number, Tensor]) - Must be a scalar.
         - **sign_decay** (Union[Number, Tensor]) - Must be a scalar.
@@ -5055,7 +4888,7 @@ class ApplyAddSign(Primitive):
         - **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
     Outputs:
-        Tuple of 2 Tensors, the updated parameters.
+        Tuple of 2 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **m** (Tensor) - The same shape and data type as `m`.
@@ -5144,10 +4977,10 @@ class ApplyPowerSign(Primitive):
         On Ascend, input data type of float64 is currently not supported.
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. With float64, float32 or float16 data type.
-          If data type of `var` is float16, all inputs must have the same data type as `var`.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float64, float32 or float16 data
+          type. If data type of `var` is float16, all inputs must have the same data type as `var`.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **m** (Parameter) - Variable tensor to be updated, has the same shape as `var`.
+        - **m** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same shape as `var`.
         - **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar or Tensor
           with float64, float32 or float16 data type.
         - **logbase** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or float16 data type.
@@ -5158,7 +4991,7 @@ class ApplyPowerSign(Primitive):
         - **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
     Outputs:
-        Tuple of 2 Tensors, the updated parameters.
+        Tuple of 2 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **m** (Tensor) - The same shape and data type as `m`.
@@ -5235,7 +5068,7 @@ class ApplyGradientDescent(Primitive):
     the relatively highest priority data type.
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float32 or float16 data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
         - **delta** (Tensor) - A tensor for the change, has the same shape as `var`.
@@ -5304,7 +5137,7 @@ class ApplyProximalGradientDescent(Primitive):
     the relatively highest priority data type.
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float32 or float16 data type.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
         - **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar.
@@ -5448,10 +5281,10 @@ class ApplyFtrl(Primitive):
         use_locking (bool): Use locks for updating operation if ``True`` . Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
-        - **linear** (Parameter) - The linear coefficient to be updated, must be same shape as `var`.
+        - **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same shape as `var`.
+        - **linear** (Union[Parameter, Tensor]) - The linear coefficient to be updated, must be same shape as `var`.
         - **grad** (Tensor) - Gradient. The data type must be float16 or float32.
         - **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: ``0.001`` .
           It must be a float number or a scalar tensor with float16 or float32 data type.
@@ -5464,16 +5297,16 @@ class ApplyFtrl(Primitive):
           Default: ``-0.5`` . It must be a float number or a scalar tensor with float16 or float32 data type.
     Outputs:
-        - **var** (Tensor) - Represents the updated `var`. As the input parameters has been updated in-place, this
-          value is always zero when the platform is GPU.
+        - **var** (Tensor) - Represents the updated `var`. As the input parameters or tensors has been updated in-place,
+          this value is always zero when the platform is GPU.
     Raises:
         TypeError: If `use_locking` is not a bool.
         TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32.
         TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor.
         TypeError: If `grad` is not a Tensor.
-        TypeError: If the parameter types of `var`, `accum` and `linear` are inconsistent.
-        TypeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
+        TypeError: If the parameter or tensor types of `var`, `accum` and `linear` are inconsistent.
+        TypeError: If the parameter or tensor types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
             and the precision is greater than `var`.
     Supported Platforms:
@@ -5548,10 +5381,10 @@ class SparseApplyFtrl(Primitive):
         use_locking (bool, optional): Use locks for updating operation if ``True`` . Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
-        - **linear** (Parameter) - The linear coefficient to be updated, must be the same shape as `var`.
+        - **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same shape as `var`.
+        - **linear** (Union[Parameter, Tensor]) - The linear coefficient to be updated, must be the same shape as `var`.
         - **grad** (Tensor) - A tensor must meet with :math:`grad.shape[1:] = var.shape[1:]`
           if var.shape > 1.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
@@ -5739,7 +5572,7 @@ class Dropout3D(PrimitiveWithInfer):
     Dropout3D can improve the independence between channel feature maps.
     Args:
-        keep_prob (float): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
+        keep_prob (float, optional): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
             means dropping out 20% of channels. Default: ``0.5`` .
     Inputs:
@@ -5791,12 +5624,14 @@ class CTCLoss(Primitive):
     such that the length of target series must be less than or equal to the length of input.
     Args:
-        preprocess_collapse_repeated (bool): If ``True`` , repeated labels will be collapsed prior to the CTC
+        preprocess_collapse_repeated (bool, optional): If ``True`` , repeated labels will be collapsed prior to the CTC
                                              calculation. Default: ``False`` .
-        ctc_merge_repeated (bool): If ``False`` , during CTC calculation, repeated non-blank labels will not be merged
+        ctc_merge_repeated (bool, optional): If ``False`` , during CTC calculation,
+                                   repeated non-blank labels will not be merged
                                    and these labels will be interpreted as individual ones. This is a simplified
                                    version of CTC. Default: ``True`` .
-        ignore_longer_outputs_than_inputs (bool): If ``True`` , sequences with longer outputs than inputs will be
+        ignore_longer_outputs_than_inputs (bool, optional): If ``True`` ,
+                                                  sequences with longer outputs than inputs will be
                                                   ignored. Default: ``False`` .
     Inputs:
@@ -6370,10 +6205,7 @@ class AvgPool3D(Primitive):
     Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, AvgPool3D outputs
     regional average in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size
-    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows.
-    .. warning::
-        "kernel_size" is in the range [1, 255]. "strides" is in the range [1, 63].
+    :math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows:
     .. math::
         \text{output}(N_i, C_j, d, h, w) =
@@ -6384,12 +6216,13 @@ class AvgPool3D(Primitive):
         This interface currently does not support Atlas A2 training series products.
     Args:
-        kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value,
+        kernel_size (Union[int, tuple[int]], optional): The size of kernel used to take the average value,
             is an int number that represents depth, height and width are both kernel_size, or a tuple
-            of three int numbers that represent depth, height and width respectively. Default: ``1`` .
-        strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
+            of three int numbers that represent depth, height and width respectively.
+            Default: ``1`` . The value range is: [1, 255].
+        strides (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents
             the depth, height and width of movement are both strides, or a tuple of three int numbers that
-            represent depth, height and width of movement respectively. Default: ``1`` .
+            represent depth, height and width of movement respectively. Default: ``1`` . The value range is: [1, 63].
         pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
             ``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
@@ -6406,16 +6239,18 @@ class AvgPool3D(Primitive):
               in the depth, height and width dimension is determined by the `pad` parameter.
               If this mode is set, `pad` must be greater than or equal to 0.
-        pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
+        pad (Union(int, tuple[int], list[int]), optional): The pad value to be filled. Default: ``0`` .
+            If `pad` is an integer,
             the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
             If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
             pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
-        ceil_mode (bool): If ``True`` , ceil instead of floor to compute the output shape. Default: ``False`` .
-        count_include_pad (bool): If ``True`` , averaging calculation will include the zero-padding.
+        ceil_mode (bool, optional): If ``True`` , ceil instead of floor to compute the output shape.
+            Default: ``False`` .
+        count_include_pad (bool, optional): If ``True`` , averaging calculation will include the zero-padding.
             Default: ``True`` .
-        divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
+        divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
             otherwise kernel_size will be used. Default: ``0`` .
-        data_format (str) : The optional value for data format. Currently only support ``'NCDHW'`` .
+        data_format (str, optional): The optional value for data format. Currently only support ``'NCDHW'`` .
             Default: ``'NCDHW'`` .
     Inputs:
@@ -6599,39 +6434,8 @@ class Conv3D(Primitive):
     Outputs:
         Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
-        `pad_mode` is ``"same"``:
-        .. math::
-            \begin{array}{ll} \\
-                D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
-                H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
-                W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
-            \end{array}
-        `pad_mode` is ``"valid"``:
-        .. math::
-            \begin{array}{ll} \\
-                D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
-                {\text{stride[0]}} + 1} \right \rfloor \\
-                H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
-                {\text{stride[1]}} + 1} \right \rfloor \\
-                W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
-                {\text{stride[2]}} + 1} \right \rfloor \\
-            \end{array}
-        `pad_mode` is ``"pad"``:
-        .. math::
-            \begin{array}{ll} \\
-                D_{out} = \left \lfloor{\frac{D_{in} + pad[0] + pad[1] - (\text{dilation[0]} - 1) \times
-                \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
-                H_{out} = \left \lfloor{\frac{H_{in} + pad[2] + pad[3] - (\text{dilation[1]} - 1) \times
-                \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
-                W_{out} = \left \lfloor{\frac{W_{in} + pad[4] + pad[5] - (\text{dilation[2]} - 1) \times
-                \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
-            \end{array}
+        To see how different pad modes affect the output shape, please refer to
+        :class:`mindspore.nn.Conv3d` for more details.
     Raises:
         TypeError: If `out_channel` or `group` is not an int.
@@ -6908,7 +6712,7 @@ class SparseApplyAdadelta(Primitive):
     to make the data types consistent. Besides, inputs of 'lr' and 'rho' also support implicit type conversion.
     If they have different data types, the lower priority data type will be converted to
     relatively highest priority data type.
-    RuntimeError exception will be thrown when the data type conversion of Parameter is required.
+    RuntimeError exception will be thrown when the data type conversion of Parameter or Tensor is required.
     Note:
         If there are negative values or values greater than or equal to var.shape[0] in `indices`,
@@ -6920,11 +6724,11 @@ class SparseApplyAdadelta(Primitive):
             Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
-        - **accum** (Parameter) - Accumulation to be updated. Mush have the same shape and dtype as `var`.
-          With float32 or float16 data type.
-        - **accum_update** (Parameter) - Accum_update to be updated. Must have the same shape and dtype as `var`.
-          With float32 or float16 data type.
+        - **var** (Union[Parameter, Tensor]) - Weights to be updated. With float32 or float16 data type.
+        - **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. Mush have the same shape and dtype as
+          `var`. With float32 or float16 data type.
+        - **accum_update** (Union[Parameter, Tensor]) - Accum_update to be updated. Must have the same shape and dtype
+          as `var`. With float32 or float16 data type.
         - **lr** (Union[float, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
         - **rho** (Union[float, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
         - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
@@ -6932,7 +6736,7 @@ class SparseApplyAdadelta(Primitive):
           Must be one of the following types: int32, int64 and indices.shape[0] = grad.shape[0].
     Outputs:
-        Tuple of 3 Tensor, the updated parameters.
+        Tuple of 3 Tensor, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **accum** (Tensor) - The same shape and data type as `accum`.
@@ -7020,9 +6824,9 @@ class CTCLossV2(Primitive):
             and its correlated gradient to zero. Default: ``False`` .
     Inputs:
-        - **log_probs** (Tensor) - A tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
+        - **log_probs** (Tensor) - A 3D tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
           batch size and :math:`C` is number of classes (including blank). Supported dtypes: float32, float64.
-        - **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
+        - **targets** (Tensor) - A 2D tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
           means the target sequences. Supported dtypes: int32, int64.
         - **input_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`.
           It means the lengths of the input. Supported dtypes: int32, int64.
@@ -7093,7 +6897,7 @@ class CTCLossV2Grad(Primitive):
     Args:
         blank (int): The blank label. Default: ``0`` .
-        reduction (string): Apply specific reduction method to the output. Currently only support 'none'.
+        reduction (str): Apply specific reduction method to the output. Currently only support 'none'.
             Default: ``"none"`` .
         zero_infinity (bool): Whether to set infinite loss and correlation gradient to zero. Default: ``False`` .
@@ -7209,12 +7013,15 @@ class Conv3DTranspose(Primitive):
     Inputs:
         - **dout** (Tensor) - The gradients with respect to the output of the convolution.
           The shape conforms to the default.
-          data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`. Currently dout data type only supports float16
-          and float32.
+          data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
+          Supported dtypes:
+          - Ascend: float16.
+          - GPU/CPU: float16, float32.
         - **weight** (Tensor) - Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is
           :math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter,
           :math:`//` is the symbol for integer division.
-          Currently weight data type only supports float16 and float32.
+          It has the same dtype as `dout`.
         - **bias** (Tensor) - Tensor of shape :math:`C_{out}`. Currently, only support none. Default: ``None`` .
     Outputs:
@@ -7500,12 +7307,12 @@ class ApplyAdagradDA(Primitive):
                             Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`. Must have the same
-          shape as `var`.
-        - **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`.
+        - **gradient_accumulator** (Union[Parameter, Tensor]) - The dict of mutable tensor :math:`grad\_accum`.
           Must have the same shape as `var`.
+        - **gradient_squared_accumulator** (Union[Parameter, Tensor]) - The dict of mutable tensor
+          :math:`grad\_squared\_accum`. Must have the same shape as `var`.
         - **grad** (Tensor) - A tensor for gradient. Must have the same shape as `var`.
         - **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type.
         - **l1** ([Number, Tensor]) -  L1 regularization. Must be a scalar. With float32 or float16 data type.
@@ -7513,12 +7320,12 @@ class ApplyAdagradDA(Primitive):
         - **global_step** ([Number, Tensor]) - Training step number. Must be a scalar. With int32 or int64 data type.
     Outputs:
-        Tuple of 1 Tensors, the updated parameters.
+        Tuple of 1 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
     Raises:
-        TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator` is not a Parameter.
+        TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator` neither a Parameter nor a Tensor.
         TypeError: If `grad` is not a Tensor.
         TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor.
         TypeError: If use_locking is not a bool.
@@ -7564,9 +7371,8 @@ class ApplyAdagradDA(Primitive):
         >>> global_step = Tensor(2, mstype.int32)
         >>> output = net(grad, lr, l1, l2, global_step)
         >>> print(output)
-        (Tensor(shape=[2, 2], dtype=Float32, value=
-        [[-7.39064650e-04, -1.36888528e-03],
-         [-5.96988888e-04, -1.42478070e-03]]))
+        [[-0.00073906, -0.00136889],
+         [-0.00059699, -0.00142478]]
     """
     __mindspore_signature__ = (
@@ -7612,10 +7418,12 @@ class SparseApplyRMSProp(Primitive):
                             otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **ms** (Parameter) - The dict of mutable tensor ms. Must have the same shape and dtype as `var`.
-        - **mom** (Parameter) - The dict of mutable tensor mom. Must have the same shape and dtype as `var`.
+        - **ms** (Union[Parameter, Tensor]) - The dict of mutable tensor ms. Must have the same shape and dtype as
+          `var`.
+        - **mom** (Union[Parameter, Tensor]) - The dict of mutable tensor mom. Must have the same shape and dtype as
+          `var`.
         - **lr** ([Number, Tensor]) - Learning rate. Must be a scalar. With float16 or float32 data type.
         - **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var`, `ms` and `mom`.
@@ -7623,7 +7431,7 @@ class SparseApplyRMSProp(Primitive):
           following types: int32, int64 and indices.shape[0] = var.shape[0].
     Outputs:
-        Tuple of 3 Tensors, the updated parameters.
+        Tuple of 3 Tensors, the updated parameters or tensors.
         - **var** (Tensor) -  The same shape and data type as `var`.
         - **ms** (Tensor) - The same shape and data type as `ms`.
@@ -7729,12 +7537,12 @@ class SparseApplyCenteredRMSProp(Primitive):
                             Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64,
-          uint8, uint16, uint32, uint64, float16, float32 or float64.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **mg** (Parameter) - Mean gradients. Must have the same shape and dtype as `var`.
-        - **ms** (Parameter) - Mean square gradients. Must have the same shape and dtype as `var`.
-        - **mom** (Parameter) - Delta of `var`. Must have the same shape and dtype as `var`.
+        - **mg** (Union[Parameter, Tensor]) - Mean gradients. Must have the same shape and dtype as `var`.
+        - **ms** (Union[Parameter, Tensor]) - Mean square gradients. Must have the same shape and dtype as `var`.
+        - **mom** (Union[Parameter, Tensor]) - Delta of `var`. Must have the same shape and dtype as `var`.
         - **lr** (Union[Number, Tensor]) - Learning rate. Must be a float number or a scalar tensor.
           Must have the same type as `var`.
         - **rho** (Union[Number, Tensor]) - Decay rate. Must be a float number or a scalar tensor.
@@ -7837,8 +7645,9 @@ class ApplyKerasMomentum(Primitive):
                             so in the end, the var you get is actually var + momentum * accum. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
-        - **accum** (Parameter) - Must have the same shape and type as `var`. With float16 or float32 data type.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. With float16 or float32 data type.
+        - **accum** (Union[Parameter, Tensor]) - Must have the same shape and type as `var`. With float16 or float32
+          data type.
         - **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. With float16 or float32 data type.
         - **grad** (Tensor) - The gradient. Must have the same shape and type as `var`.
           With float16 or float32 data type.
@@ -7989,12 +7798,12 @@ class ApplyAdamWithAmsgrad(Primitive):
           Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type can be float16 or float32.
-        - **m** (Parameter) - The 1st moment vector in the updating formula,
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type can be float16 or float32.
+        - **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
           the shape and data type value should be the same as `var`.
-        - **v** (Parameter) - the 2nd moment vector in the updating formula,
+        - **v** (Union[Parameter, Tensor]) - the 2nd moment vector in the updating formula,
           the shape and data type value should be the same as `var`.
-        - **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
+        - **vhat** (Union[Parameter, Tensor]) - :math:`\hat v_t` in the updating formula,
           the shape and data type value should be the same as `var`.
         - **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
           a scalar tensor with float16 or float32 data type.
@@ -8004,7 +7813,7 @@ class ApplyAdamWithAmsgrad(Primitive):
         - **grad** (Tensor) - The gradient, has the same shape and data type as `var`.
     Outputs:
-        Tuple of 4 Tensors, the updated parameters.
+        Tuple of 4 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **m** (Tensor) - The same shape and data type as `m`.
@@ -8012,7 +7821,7 @@ class ApplyAdamWithAmsgrad(Primitive):
         - **vhat** (Tensor) - The same shape and data type as `vhat`.
     Raises:
-        TypeError: If `var`, `m`, `v`, `vhat` is not a Parameter.
+        TypeError: If `var`, `m`, `v`, `vhat` neither a Parameter nor a Tensor.
         TypeError: If `beta1_power`, `beta2_power`, `lr` is neither a Number nor a Tensor.
         TypeError: If `grad` is not a Tensor.
         TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
@@ -8092,16 +7901,16 @@ class ApplyAdamWithAmsgradV2(Primitive):
     Args:
         use_locking (bool): If ``True`` , updating of the `var`, `m`, and `v` tensors will
-            be protected by a lock; Otherwise the behavior is undefined, but may exhibit less contention.
+            be protected by a lock; Otherwise some contention may occur.
             Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated. The data type can be float16, float32 or float64.
-        - **m** (Parameter) - The 1st moment vector in the updating formula,
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type can be float16, float32 or float64.
+        - **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
           the shape should be the same as `var`.
-        - **v** (Parameter) - The 2nd moment vector in the updating formula,
+        - **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula,
           the shape should be the same as `var`.
-        - **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
+        - **vhat** (Union[Parameter, Tensor]) - :math:`\hat v_t` in the updating formula,
           the shape and data type value should be the same as `var`.
         - **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
           with float16, float32 or float64 data type.
@@ -8117,7 +7926,7 @@ class ApplyAdamWithAmsgradV2(Primitive):
         - **grad** (Tensor) - The gradient, has the same shape as `var`.
     Outputs:
-        Tuple of 4 Tensors, the updated parameters.
+        Tuple of 4 Tensors, the updated parameters or tensors.
         - **var** (Tensor) - The same shape and data type as `var`.
         - **m** (Tensor) - The same shape and data type as `m`.
@@ -8125,7 +7934,7 @@ class ApplyAdamWithAmsgradV2(Primitive):
         - **vhat** (Tensor) - The same shape and data type as `vhat`.
     Raises:
-        TypeError: If `var`, `m`, `v`, `vhat` is not a Parameter.
+        TypeError: If `var`, `m`, `v`, `vhat` neither a Parameter nor a Tensor.
         TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
             `lr`, `beta1` , `beta2` , `epsilon` or `grad` is not float64, float32 or float16.
         RuntimeError: If the data type of `var`, `m`, `v` , `vhat` and `grad` conversion of Parameter is not supported.
@@ -8640,13 +8449,13 @@ class TripletMarginLoss(Primitive):
         - **margin** (Tensor) - Make a margin between the positive pair and the negative pair.
     Outputs:
-        Union[Tensor, Scalar], if `reduction` is ``"none"``, its shape is :math:`(N)`.
+        Union[Tensor, Scalar], if `reduction` is ``"none"``, a Ten sor will be returned with a shape of :math:`(N)`.
         Otherwise, a scalar value will be returned.
     Raises:
-        TypeError: If `x` or `positive` or `negative` or `margin` is not a Tensor.
-        TypeError: If dtype of `x` or `positive` or `negative` is not BasicType.
-        TypeError: If dtype of `x`, `positive` and `negative` is not the same.
+        TypeError: If `x`, `positive`, `negative`, or `margin` is not a Tensor.
+        TypeError: If dtype of `x`, `positive`, or `negative` is not BasicType.
+        TypeError: If dtypes of `x`, `positive` and `negative` are not the same.
         TypeError: If `margin` is not float32.
         TypeError: If `p` is not an int.
         TypeError: If `eps` is not a float.
@@ -8656,7 +8465,7 @@ class TripletMarginLoss(Primitive):
         ValueError: If the dimension of input `x` or `positive` or `negative`
           is bigger than or equal to 8.
         ValueError: If length of shape of `margin` is not 0.
-        ValueError: If shape of `x`, `positive` and `negative` cannot broadcast.
+        ValueError: If shapes of `x`, `positive` and `negative` cannot broadcast.
         ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
     Supported Platforms:
@@ -8805,11 +8614,11 @@ class SparseApplyAdagradDA(Primitive):
                             Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable to be updated.
+        - **var** (Union[Parameter, Tensor]) - Variable to be updated.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
-        - **grad_accum** (Parameter) - The dict of mutable tensor grad_accum. Must have the same
+        - **grad_accum** (Union[Parameter, Tensor]) - The dict of mutable tensor grad_accum. Must have the same
           shape and dtype as `var`.
-        - **grad_square_accum** (Parameter) - The dict of mutable tensor grad_square_accum.
+        - **grad_square_accum** (Union[Parameter, Tensor]) - The dict of mutable tensor grad_square_accum.
           Must have the same shape and dtype as `var`.
         - **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
         - **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
@@ -8987,8 +8796,8 @@ class SparseApplyProximalGradientDescent(Primitive):
             Default: ``False`` .
     Inputs:
-        - **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32, int64,
-          uint8, uint16, uint32, uint64, float16, float32 or float64.
+        - **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be int8, int16, int32,
+          int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
           The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
         - **alpha** (Union[Number, Tensor]) - Scaling factor. Must be a scalar with same type as `var`.
         - **l1** (Union[Number, Tensor]) - L1 regularization. Must be a scalar with same type as `var`.
@@ -9003,7 +8812,7 @@ class SparseApplyProximalGradientDescent(Primitive):
         - **var** (Tensor) - Tensor, has the same shape and type as 'var'.
     Raises:
-        TypeError: If `var`, `grad` or `indices` is not a Parameter..
+        TypeError: If `var` neither a Parameter nor a Tensor.
         TypeError: If `alpha`, `l1`, `l2` is neither a Number nor a Tensor.
         TypeError: If `use_locking` is not a bool.
         TypeError: If dtype of `var`, `alpha`, `l1`, `l2` or `grad` is not one of int8, int16,
@@ -9139,51 +8948,6 @@ class NuclearNorm(Primitive):
         validator.check_value_type("keepdim", keepdim, [bool], self.name)
-class GLU(Primitive):
-    r"""
-    Computes GLU (Gated Linear Unit activation function) of input tensors.
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
-    Refer to :func:`mindspore.ops.glu` for more details.
-    Args:
-        axis (int, optional): Axis on which to split the input.
-            The value of `axis` must be an int within range [-rank(`x`), rank(`x`)).
-            Default: ``-1`` , specifying the last dimension.
-    Inputs:
-        - **x** (Tensor) - Input tensor. `x.shape[axis]` must be even.
-    Outputs:
-        Tensor, has the same data type with `x`.
-    Supported Platforms:
-        ``Ascend`` ``CPU``
-    Examples:
-        >>> from mindspore import ops, Tensor
-        >>> from mindspore import dtype as mstype
-        >>> import numpy as np
-        >>> axis = 0
-        >>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
-        ...                            0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
-        ...                            0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
-        ...                            0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32)
-        >>> glu = ops.GLU(axis=axis)
-        >>> y = glu(x)
-        >>> print(y)
-        [[[0.20028052 0.6916126  0.57412136 0.06512236 0.26307625]
-          [0.3682598  0.3093122  0.17306386 0.10212085 0.63814086]]]
-    """
-    @prim_attr_register
-    def __init__(self, axis=-1):
-        """Initialize GLU"""
-        validator.check_value_type("axis", axis, [int], self.name)
 class FractionalMaxPoolWithFixedKsize(Primitive):
     r"""
     Applies a 2D fractional max pooling to an input signal composed of multiple input planes.
@@ -9267,7 +9031,8 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
 class ChannelShuffle(Primitive):
     r"""
     Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` group and
-    rearrange them as :math:`(*, \frac C g, g, H*W)`, while keeping the original tensor shapes.
+    rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while retaining the original tensor
+    shape in the final output.
     .. warning::
         This is an experimental API that is subject to change or deletion.
@@ -9475,93 +9240,6 @@ class WKV(Primitive):
                                 outputs=["output", "out_sp", "out_sq", "out_sm"])
-class PromptFlashAttention(Primitive):
-    r"""
-    The interface for fully inference.
-    B -- Batch size
-    S -- Sequence length
-    H -- Hidden size
-    Note:
-    experiment ops
-    .. warning::
-        This is an experimental API that is subject to change or deletion.
-    Args:
-        num_heads (int): The number of heads.
-        scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
-          Muls in the calculation. Default: 1.0.
-        pre_tokens (int): Previous tokens. Default: 2147483547.
-        next_tokens (int): next tokens.  Default: 0.
-          indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
-          indicates that the data blocks in the upper triangle are not involved in the calculation
-        input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
-        num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
-          The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
-        sparse_mode (int): Default: 0
-        inner_precise (int): 0, float16 high precision. 1, high performance. default 1
-    Inputs:
-        - **query** (Tensor) - The query tensor with data type of float16 or float32.
-          Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
-        - **key** (Tensor) - The key tensor with data type of float16 or float32.
-          Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
-        - **value** (Tensor) - The value tensor with data type of float16 or float32.
-          Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
-        - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
-          For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
-        - **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int64.
-        - **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int64.
-        - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
-        - **dep_scale1** (Tensor)
-        - **quant_scale1** (Tensor)
-        - **deq_scale2** (Tensor)
-        - **quant_scale2** (Tensor)
-        - **quant_offset2** (Tensor)
-    Outputs:
-        - **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
-    Supported Platforms:
-        ``Ascend``
-    Examples:
-        >>> import mindspore.ops.operations.nn_ops as P
-        >>> from mindspore import Tensor
-        >>> import numpy as np
-        >>> B = 1
-        >>> N = 16
-        >>> S = 256
-        >>> D = 16
-        >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
-        >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
-        >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
-        >>> attn_mask = Tensor(np.ones((B, 1, S, S), dtype=np.float16))
-        >>> pfa = P.PromptFlashAttention(N, input_layout='BNSD')
-        >>> out = pfa(query, key, value, attn_mask, None, None, None, None, None, None, None, None)
-        >>> print(out.shape)
-        (1, 16, 256, 16)
-    """
-    @prim_attr_register
-    def __init__(self, num_heads, scale_value=1.0, pre_tokens=214748647, next_tokens=0, input_layout='BSH',
-                 num_key_value_heads=0, sparse_mode=0, inner_precise=1):
-        """Initialize PromptFlashAttention."""
-        validator.check_value_type('num_heads', num_heads, [int], self.name)
-        validator.check_value_type('scale_value', scale_value, [float], self.name)
-        validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
-        validator.check_value_type('next_tokens', next_tokens, [int], self.name)
-        validator.check_value_type('input_layout', input_layout, [str], self.name)
-        validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
-        validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
-        validator.check_value_type('inner_precise', inner_precise, [int], self.name)
-        self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
-                                        "actual_seq_lengths_kv", "pse_shift", "deq_scale1", "quant_scale1",
-                                        "deq_scale2", "quant_scale2", "quant_offset2"],
-                                outputs=["attention_out"])
 class AllFinite(Primitive):
     r"""
     Check all gradients is finite.
@@ -9578,3 +9256,6 @@ class AllFinite(Primitive):
                 raise RuntimeError(
                     "The version of Ascend AI software package installed "
                     "in the current environment does not support AllFinite.")
+    def __call__(self, *args):
+        return _convert_stub(pyboost_all_finite(self, args))