PyPI - mindspore - Versions diffs - 2.7.0rc1__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl - Mend

mindspore 2.7.0rc1__cp310-cp310-win_amd64.whl → 2.7.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (370) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +5 -2
mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
mindspore/_checkparam.py +2 -2
mindspore/_extends/builtin_operations.py +3 -3
mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/__init__.py +3 -3
mindspore/_extends/parse/compile_config.py +24 -1
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
mindspore/_extends/parse/parser.py +28 -22
mindspore/_extends/parse/resources.py +1 -1
mindspore/_extends/parse/standard_method.py +23 -2
mindspore/_extends/parse/trope.py +2 -1
mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
mindspore/amp.py +0 -18
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/base.py +29 -2
mindspore/common/__init__.py +18 -12
mindspore/common/_decorator.py +3 -2
mindspore/common/_grad_function.py +3 -1
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +371 -96
mindspore/common/_utils.py +7 -43
mindspore/common/api.py +434 -135
mindspore/common/dtype.py +98 -57
mindspore/common/dump.py +7 -108
mindspore/common/dynamic_shape/__init__.py +0 -0
mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
mindspore/common/file_system.py +59 -9
mindspore/common/hook_handle.py +82 -3
mindspore/common/jit_config.py +5 -1
mindspore/common/jit_trace.py +27 -12
mindspore/common/lazy_inline.py +5 -3
mindspore/common/np_dtype.py +3 -3
mindspore/common/parameter.py +17 -127
mindspore/common/recompute.py +4 -13
mindspore/common/tensor.py +50 -217
mindspore/communication/_comm_helper.py +11 -1
mindspore/communication/comm_func.py +138 -4
mindspore/communication/management.py +85 -1
mindspore/config/op_info.config +0 -15
mindspore/context.py +20 -106
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/transforms.py +1 -1
mindspore/dataset/core/config.py +35 -1
mindspore/dataset/engine/datasets.py +338 -319
mindspore/dataset/engine/datasets_user_defined.py +38 -22
mindspore/dataset/engine/datasets_vision.py +1 -1
mindspore/dataset/engine/validators.py +1 -15
mindspore/dataset/transforms/c_transforms.py +2 -2
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/vision/__init__.py +1 -1
mindspore/dataset/vision/py_transforms.py +8 -8
mindspore/dataset/vision/transforms.py +17 -5
mindspore/dataset/vision/utils.py +632 -21
mindspore/device_context/ascend/op_tuning.py +35 -1
mindspore/dnnl.dll +0 -0
mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
mindspore/graph/custom_pass.py +55 -0
mindspore/include/api/cell.h +28 -4
mindspore/include/api/cfg.h +24 -7
mindspore/include/api/context.h +1 -0
mindspore/include/api/delegate.h +0 -2
mindspore/include/api/dual_abi_helper.h +100 -19
mindspore/include/api/graph.h +14 -1
mindspore/include/api/kernel.h +16 -3
mindspore/include/api/kernel_api.h +9 -1
mindspore/include/api/metrics/accuracy.h +9 -0
mindspore/include/api/model.h +5 -1
mindspore/include/api/model_group.h +4 -0
mindspore/include/api/model_parallel_runner.h +2 -0
mindspore/include/api/status.h +48 -10
mindspore/include/api/types.h +6 -1
mindspore/include/dataset/constants.h +9 -0
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/mindrecord/__init__.py +3 -3
mindspore/mindrecord/common/exceptions.py +1 -0
mindspore/mindrecord/config.py +1 -1
mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
mindspore/mindrecord/filereader.py +4 -4
mindspore/mindrecord/filewriter.py +5 -5
mindspore/mindrecord/mindpage.py +2 -2
mindspore/mindrecord/tools/cifar10.py +4 -3
mindspore/mindrecord/tools/cifar100.py +1 -1
mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
mindspore/mindrecord/tools/csv_to_mr.py +1 -1
mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_cluster.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_cpu.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_hardware_abstract.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mindspore_runtime_utils.dll +0 -0
mindspore/mindspore_tools.dll +0 -0
mindspore/mint/__init__.py +15 -10
mindspore/mint/distributed/__init__.py +4 -0
mindspore/mint/distributed/distributed.py +392 -69
mindspore/mint/nn/__init__.py +2 -16
mindspore/mint/nn/functional.py +4 -110
mindspore/mint/nn/layer/__init__.py +0 -2
mindspore/mint/nn/layer/_functions.py +1 -2
mindspore/mint/nn/layer/activation.py +0 -6
mindspore/mint/nn/layer/basic.py +0 -47
mindspore/mint/nn/layer/conv.py +10 -10
mindspore/mint/nn/layer/normalization.py +11 -16
mindspore/mint/nn/layer/pooling.py +0 -4
mindspore/nn/__init__.py +1 -3
mindspore/nn/cell.py +231 -239
mindspore/nn/layer/activation.py +4 -2
mindspore/nn/layer/basic.py +56 -14
mindspore/nn/layer/container.py +16 -0
mindspore/nn/layer/embedding.py +4 -169
mindspore/nn/layer/image.py +1 -1
mindspore/nn/layer/normalization.py +2 -1
mindspore/nn/layer/thor_layer.py +4 -85
mindspore/nn/optim/ada_grad.py +0 -1
mindspore/nn/optim/adafactor.py +0 -1
mindspore/nn/optim/adam.py +32 -127
mindspore/nn/optim/adamax.py +0 -1
mindspore/nn/optim/asgd.py +0 -1
mindspore/nn/optim/ftrl.py +8 -102
mindspore/nn/optim/lamb.py +1 -4
mindspore/nn/optim/lars.py +0 -3
mindspore/nn/optim/lazyadam.py +25 -218
mindspore/nn/optim/momentum.py +5 -43
mindspore/nn/optim/optimizer.py +6 -55
mindspore/nn/optim/proximal_ada_grad.py +0 -1
mindspore/nn/optim/rmsprop.py +0 -1
mindspore/nn/optim/rprop.py +0 -1
mindspore/nn/optim/sgd.py +0 -1
mindspore/nn/optim/tft_wrapper.py +2 -4
mindspore/nn/optim/thor.py +0 -2
mindspore/nn/probability/bijector/bijector.py +7 -8
mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
mindspore/nn/probability/bijector/power_transform.py +20 -21
mindspore/nn/probability/bijector/scalar_affine.py +5 -5
mindspore/nn/probability/bijector/softplus.py +13 -14
mindspore/nn/probability/distribution/_utils/utils.py +2 -2
mindspore/nn/wrap/cell_wrapper.py +39 -5
mindspore/nn/wrap/grad_reducer.py +4 -89
mindspore/numpy/array_creations.py +4 -4
mindspore/numpy/fft.py +9 -9
mindspore/numpy/utils_const.py +1 -1
mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
mindspore/onnx/onnx_export.py +137 -0
mindspore/opencv_core4110.dll +0 -0
mindspore/opencv_imgcodecs4110.dll +0 -0
mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
mindspore/ops/__init__.py +2 -0
mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
mindspore/ops/_op_impl/cpu/__init__.py +1 -5
mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
mindspore/ops/auto_generate/gen_extend_func.py +6 -11
mindspore/ops/auto_generate/gen_ops_def.py +385 -154
mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
mindspore/ops/communication.py +97 -0
mindspore/ops/composite/__init__.py +5 -2
mindspore/ops/composite/base.py +16 -2
mindspore/ops/composite/multitype_ops/__init__.py +3 -1
mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
mindspore/ops/function/__init__.py +2 -0
mindspore/ops/function/array_func.py +24 -18
mindspore/ops/function/comm_func.py +3883 -0
mindspore/ops/function/debug_func.py +7 -6
mindspore/ops/function/grad/grad_func.py +4 -12
mindspore/ops/function/math_func.py +89 -86
mindspore/ops/function/nn_func.py +92 -313
mindspore/ops/function/random_func.py +9 -18
mindspore/ops/functional.py +4 -1
mindspore/ops/functional_overload.py +377 -30
mindspore/ops/operations/__init__.py +2 -5
mindspore/ops/operations/_custom_ops_utils.py +7 -9
mindspore/ops/operations/_inner_ops.py +12 -50
mindspore/ops/operations/_rl_inner_ops.py +0 -933
mindspore/ops/operations/array_ops.py +5 -50
mindspore/ops/operations/comm_ops.py +95 -17
mindspore/ops/operations/custom_ops.py +237 -22
mindspore/ops/operations/debug_ops.py +33 -35
mindspore/ops/operations/manually_defined/ops_def.py +39 -318
mindspore/ops/operations/math_ops.py +5 -5
mindspore/ops/operations/nn_ops.py +3 -3
mindspore/ops/operations/sparse_ops.py +0 -83
mindspore/ops/primitive.py +4 -27
mindspore/ops/tensor_method.py +88 -10
mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
mindspore/ops_generate/api/functions_cc_generator.py +53 -4
mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
mindspore/ops_generate/common/gen_constants.py +11 -10
mindspore/ops_generate/common/op_proto.py +18 -1
mindspore/ops_generate/common/template.py +102 -245
mindspore/ops_generate/common/template_utils.py +212 -0
mindspore/ops_generate/gen_custom_ops.py +69 -0
mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
mindspore/ops_generate/resources/yaml_loader.py +13 -0
mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
mindspore/parallel/_auto_parallel_context.py +5 -15
mindspore/parallel/_cell_wrapper.py +1 -1
mindspore/parallel/_parallel_serialization.py +4 -6
mindspore/parallel/_ps_context.py +2 -2
mindspore/parallel/_utils.py +34 -17
mindspore/parallel/auto_parallel.py +23 -9
mindspore/parallel/checkpoint_transform.py +20 -2
mindspore/parallel/cluster/process_entity/_api.py +28 -33
mindspore/parallel/cluster/process_entity/_utils.py +9 -5
mindspore/parallel/cluster/run.py +5 -3
mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
mindspore/parallel/function/reshard_func.py +6 -5
mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
mindspore/parallel/shard.py +7 -21
mindspore/parallel/strategy.py +336 -0
mindspore/parallel/transform_safetensors.py +127 -20
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
mindspore/profiler/common/constant.py +5 -0
mindspore/profiler/common/file_manager.py +9 -0
mindspore/profiler/common/msprof_cmd_tool.py +40 -4
mindspore/profiler/common/path_manager.py +65 -24
mindspore/profiler/common/profiler_context.py +27 -14
mindspore/profiler/common/profiler_info.py +3 -3
mindspore/profiler/common/profiler_meta_data.py +1 -0
mindspore/profiler/common/profiler_op_analyse.py +10 -6
mindspore/profiler/common/profiler_path_manager.py +13 -0
mindspore/profiler/common/util.py +30 -3
mindspore/profiler/dynamic_profiler.py +91 -46
mindspore/profiler/envprofiler.py +30 -5
mindspore/profiler/experimental_config.py +18 -2
mindspore/profiler/platform/cpu_profiler.py +10 -4
mindspore/profiler/platform/npu_profiler.py +34 -7
mindspore/profiler/profiler.py +193 -145
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +2 -2
mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +108 -24
mindspore/runtime/__init__.py +9 -6
mindspore/runtime/executor.py +35 -0
mindspore/runtime/memory.py +113 -0
mindspore/runtime/thread_bind_core.py +1 -1
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
mindspore/tools/data_dump.py +130 -0
mindspore/tools/sdc_detect.py +91 -0
mindspore/tools/stress_detect.py +63 -0
mindspore/train/__init__.py +6 -6
mindspore/train/_utils.py +8 -21
mindspore/train/amp.py +6 -7
mindspore/train/callback/_callback.py +2 -1
mindspore/train/callback/_checkpoint.py +1 -17
mindspore/train/callback/_flops_collector.py +10 -6
mindspore/train/callback/_train_fault_tolerance.py +72 -25
mindspore/train/data_sink.py +5 -9
mindspore/train/dataset_helper.py +5 -5
mindspore/train/model.py +41 -230
mindspore/train/serialization.py +160 -401
mindspore/train/train_thor/model_thor.py +2 -2
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +6 -3
mindspore/utils/dlpack.py +92 -0
mindspore/utils/dryrun.py +1 -1
mindspore/utils/runtime_execution_order_check.py +10 -0
mindspore/utils/sdc_detect.py +14 -12
mindspore/utils/stress_detect.py +43 -0
mindspore/utils/utils.py +152 -16
mindspore/version.py +1 -1
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
mindspore/communication/_hccl_management.py +0 -297
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
mindspore/experimental/llm_boost/atb/__init__.py +0 -23
mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
mindspore/experimental/llm_boost/register.py +0 -130
mindspore/experimental/llm_boost/utils.py +0 -31
mindspore/include/OWNERS +0 -7
mindspore/mindspore_cpu_res_manager.dll +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
mindspore/nn/reinforcement/_batch_read_write.py +0 -142
mindspore/nn/reinforcement/_tensors_queue.py +0 -152
mindspore/nn/reinforcement/tensor_array.py +0 -145
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
mindspore/ops/operations/_tensor_array.py +0 -359
mindspore/ops/operations/rl_ops.py +0 -288
mindspore/parallel/_offload_context.py +0 -275
mindspore/parallel/_recovery_context.py +0 -115
mindspore/parallel/_transformer/__init__.py +0 -35
mindspore/parallel/_transformer/layers.py +0 -765
mindspore/parallel/_transformer/loss.py +0 -251
mindspore/parallel/_transformer/moe.py +0 -693
mindspore/parallel/_transformer/op_parallel_config.py +0 -222
mindspore/parallel/_transformer/transformer.py +0 -3124
mindspore/parallel/mpi/_mpi_config.py +0 -116
mindspore/profiler/common/validator/validate_path.py +0 -84
mindspore/train/memory_profiling_pb2.py +0 -298
mindspore/utils/hooks.py +0 -81
/mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0

mindspore/mint/distributed/distributed.py CHANGED Viewed

@@ -18,13 +18,16 @@ import hashlib
 import builtins
 import io
 import pickle
+from datetime import timedelta
 import numpy as np
 from mindspore import log as logger
 from mindspore.common import dtype as mstype
+from mindspore._checkparam import args_type_check
 from mindspore.ops import ReduceOp, cat
 from mindspore.common.tensor import Tensor
 from mindspore._c_expression import TensorPy as Tensor_
 from mindspore.ops.primitive import _primexpr
+from mindspore.common.api import _pynative_executor
 from mindspore.communication._comm_helper import (
     _destroy_group_helper,
     _get_rank_helper,
@@ -33,10 +36,11 @@ from mindspore.communication._comm_helper import (
     _get_group_ranks,
     _is_available,
     _is_initialized,
+    _ExistingGroup,
 )
+from mindspore.communication.management import _init_without_sched
 from mindspore.communication import (
     init,
-    release,
     get_group_size,
     get_world_rank_from_group_rank,
     create_group,
@@ -58,9 +62,11 @@ from mindspore.ops.auto_generate.gen_ops_prim import (
     dist_comm_isend_op,
     dist_comm_all_to_all_v_op,
     dist_comm_reduce_scatter_tensor_op,
+    dist_comm_reduce_scatter_tensor_uneven_op,
     dist_comm_all_to_all_v_single_op,
     dist_comm_broadcast_op,
     dist_comm_all_gather_into_tensor_op,
+    dist_comm_all_gather_into_tensor_uneven_op,
     dist_comm_irecv_op,
     dist_comm_scatter_tensor_op,
     dist_comm_gather_into_tensor_op,
@@ -70,7 +76,7 @@ from mindspore.ops.auto_generate.gen_ops_prim import (
     dist_comm_barrier_op,
     dist_comm_batch_isend_irecv_op,
 )
-from mindspore._c_expression import TCPStoreClient, GroupOptions
+from mindspore._c_expression import TCPStoreClient, GroupOptions, _finalize_collective
 _pickler = pickle.Pickler
 _unpickler = pickle.Unpickler
@@ -144,28 +150,26 @@ class TCPStore:
     Note:
         - The function is implemented by CPU and does not involve any hardware operations related to Ascend.
-        - Currently, all parameters provided by the TCPStore class constructor are not supported.
-          The master node and port number are uniformly specified by the MindSpore framework.
-          The following parameters are provided, currently not supported and settings are invalid.
-        - The current TcpStore function is limited and only supports scenarios where the key is
+        - Currently, all parameters provided by the TCPStore class constructor are not supported
+          except for `host_name`, `port`, `world_size`, `is_master`, `timeout` and `wait_for_workers`,
+          which are reserved parameters and invalid settings.
+        - The current TCPStore function is limited and only supports scenarios where the key is
           less than 4k and the value is less than 1G. Complex scenarios are to be supported.
-        - The timeout interval for message sending and receiving in the TcpStore function is controlled by
-          the `MS_RECEIVE_MSG_TIMEOUT` environment variable, in seconds, with a default value of ``15``.
-          If a timeout occurs, the user needs to increase the configuration value.
     Args:
-        host_name (str, invalid, optional): The hostname or IP Address the server store should run on.
-            Default is ``None``.
-        port (int, invalid, optional): The port on which the server store should listen for incoming requests.
-            Default is ``None``.
-        world_size (int, invalid, optional): The total number of store users (number of clients + 1 for the server).
-            Default is ``None`` (``None`` indicates a non-fixed number of store users).
-        is_master (bool, invalid, optional): True when initializing the server store and False for client stores.
+        host_name (str): The hostname or IP Address the server store should run on.
+            Currently only supports user input IP addresses.
+        port (int): The port on which the server store should listen for incoming requests.
+        world_size (int, optional): The total number of store users (number of clients + 1 for the server).
+            Default is ``None``, indicates a non-fixed number of store users. This parameter is
+            only valid for the server.
+        is_master (bool, optional): True when initializing the server store and False for client stores.
             Default is ``False``.
-        timeout (timedelta, invalid, optional): Timeout used by the store during initialization, Unit: seconds.
-            Default is ``300``.
-        wait_for_workers (bool, invalid, optional): Whether to wait for all the workers to connect with the server
-            store. This is only applicable when `world_size` is a fixed value. Default is ``True``.
+        timeout (timedelta, optional): Timeout used by the store during initialization. Default is
+            ``timedelta(seconds=300)``.
+        wait_for_workers (bool, optional): Whether to wait for all the workers to connect with the server
+            store. This is only applicable when `world_size` is a fixed value. Default is ``True``. This
+            parameter is only valid for the server.
         multi_tenant (bool, invalid, optional): If ``True``, all ``TCPStore`` instances in the current process with
             the same host/port will use the same underlying ``TCPServer``. Default is ``False``.
         master_listen_fd (int, invalid, optional): If specified, the underlying ``TCPServer`` will listen on this file
@@ -191,12 +195,106 @@ class TCPStore:
             for more details.
         >>> from mindspore.mint.distributed import TCPStore
-        >>> store = TCPStore()
+        >>> store = TCPStore("127.0.0.1", 1234)
     """
-    def __init__(self, host_name=None, port=None, world_size=None, is_master=False, timeout=300,
+    def __init__(self, host_name, port, world_size=None, is_master=False, timeout=timedelta(seconds=300),
                  wait_for_workers=True, multi_tenant=False, master_listen_fd=None, use_libuv=True):
-        self.instance = TCPStoreClient.get_instance()
+        if not isinstance(host_name, str):
+            raise TypeError(
+                "For 'TCPStore', the argument 'host_name' must be type of string, "
+                "but got 'host_name' type : {}.".format(type(host_name))
+            )
+        if not isinstance(port, int):
+            raise TypeError(
+                "For 'TCPStore', the argument 'port' must be type of int, "
+                "but got 'port' type : {}.".format(type(port))
+            )
+        if not isinstance(is_master, bool):
+            raise TypeError(
+                "For 'TCPStore', the argument 'is_master' must be type of bool, "
+                "but got 'is_master' type : {}.".format(type(is_master))
+            )
+        if not isinstance(timeout, timedelta):
+            raise TypeError(
+                "For 'TCPStore', the argument 'timeout' must be type of timedelta, "
+                "but got 'timeout' type : {}.".format(type(timeout))
+            )
+        if not isinstance(wait_for_workers, bool):
+            raise TypeError(
+                "For 'TCPStore', the argument 'wait_for_workers' must be type of bool, "
+                "but got 'wait_for_workers' type : {}.".format(type(wait_for_workers))
+            )
+        if world_size is None:
+            world_size = 1
+        if not isinstance(world_size, int):
+            raise TypeError(
+                "For 'TCPStore', the argument 'world_size' must be type of int, "
+                "but got 'world_size' type : {}.".format(type(world_size))
+            )
+        if port < 0 or port > 65535:
+            raise ValueError(
+                "For 'TCPStore', the argument 'port' must be legal, "
+                f"but got {port}."
+            )
+        if world_size <= 0:
+            raise ValueError(
+                "For 'TCPStore', the argument 'world_size' must be legal, "
+                f"but got {world_size}."
+            )
+        timeout_ms = int(timeout.total_seconds() * 1000)
+        self.instance = TCPStoreClient(host_name, port, is_master, timeout_ms, world_size, wait_for_workers)
+        self.host = host_name
+        self.port = port
+    def add(self, key, amount):
+        """
+        When the `add` function is called for the first time with a given key, it creates a counter in
+        the storage corresponding to that key, with the initial value set to `amount`. Subsequent calls
+        to `add` with the same key increment the counter by amount.
+        Args:
+            key (str): The key whose counter value will be incremented.
+            amount (int): The amount by which the counter will be incremented.
+        Returns:
+            int, value of counter with `key`.
+        Raises:
+            TypeError: If `key` is not string.
+            TypeError: If `amount` is not int.
+            RuntimeError: If the `add` and `set` pass the same `key` and the `value` passed by `set` cannot
+                be correctly converted to a numerical value, calling `add` will result in an error.
+        Supported Platforms:
+            ``Ascend``
+        Examples:
+            .. note::
+                Before running the following examples, you need to configure the communication environment variables.
+                For Ascend devices, it is recommended to use the msrun startup method
+                without any third-party or configuration file dependencies.
+                Please see the `msrun start up
+                <https://www.mindspore.cn/tutorials/en/master/parallel/msrun_launcher.html>`_
+                for more details.
+            >>> from mindspore.mint.distributed import TCPStore
+            >>> store = TCPStore("127.0.0.1", 1234)
+            >>> store.add("first_key", 1)
+        """
+        if not isinstance(key, str):
+            raise TypeError(
+                "For 'TCPStore.add', the argument 'key' must be type of string, "
+                "but got 'key' type : {}.".format(type(key))
+            )
+        if not isinstance(amount, int):
+            raise TypeError(
+                "For 'TCPStore.add', the argument 'amount' must be type of string or int, "
+                "but got 'amount' type : {}.".format(type(amount))
+            )
+        return self.instance.add(key, amount)
     def set(self, key, value):
@@ -227,7 +325,7 @@ class TCPStore:
                 for more details.
             >>> from mindspore.mint.distributed import TCPStore
-            >>> store = TCPStore()
+            >>> store = TCPStore("127.0.0.1", 1234)
             >>> store.set("first_key", "first_value")
         """
         if not isinstance(key, str):
@@ -245,8 +343,9 @@ class TCPStore:
     def get(self, key):
         """
-        Retrieves the value associated with the given `key` in the store. If `key` is not
-        present in the store, the function will return "".
+        Retrieves the value associated with the given `key` in the store. If the `key` does not exist
+        in the storage, this function will wait for the `timeout` set by the class initialization and then
+        throw an exception.
         Args:
             key (str): The function will return the value associated with this key.
@@ -256,6 +355,7 @@ class TCPStore:
         Raises:
             TypeError: If `key` is not string.
+            RuntimeError: If `get` runs out of time.
         Supported Platforms:
             ``Ascend``
@@ -271,7 +371,7 @@ class TCPStore:
                 for more details.
             >>> from mindspore.mint.distributed import TCPStore
-            >>> store = TCPStore()
+            >>> store = TCPStore("127.0.0.1", 1234)
             >>> store.set("first_key", "first_value")
             >>> data = store.get("first_key")
             >>> print(data)
@@ -299,7 +399,7 @@ class TCPStore:
             TypeError: If `key` is not string.
         Supported Platforms:
-            ``CPU``
+            ``Ascend``
         Examples:
             .. note::
@@ -312,7 +412,7 @@ class TCPStore:
                 for more details.
             >>> from mindspore.mint.distributed import TCPStore
-            >>> store = TCPStore()
+            >>> store = TCPStore("127.0.0.1", 1234)
             >>> store.set("first_key", "first_value")
             >>> # This should return true
             >>> store.delete_key("first_key")
@@ -387,6 +487,7 @@ def is_initialized():
     return _is_initialized()
+@args_type_check(init_method=str, timeout=timedelta, world_size=int, rank=int, store=TCPStore)
 def init_process_group(backend="hccl",
                        init_method=None,
                        timeout=None,
@@ -404,26 +505,29 @@ def init_process_group(backend="hccl",
         and the instantiation and execution of any operation and net.
     Args:
-        backend (str, optional): The backend to ues. default is hccl and now only support hccl.
-        init_method (str, invalid): URL specifying how to init collective communication group. Provides parameters
-            consistent with pytorch, but is not currently support, setting is invalid.
-        timeout (timedelta, invalid): Timeout for API executed. Provides parameters consistent with pytorch, but is not
-            currently support, setting is invalid.
-        world_size (int, optional): Number of the processes participating in the job.
-        rank (int, invalid): Rank of the current process. Provides parameters consistent with pytorch, but is not
-            currently support, setting is invalid.
-        store (Store, invalid): Key/Value store accessible to all workers, used to exchange connection/address
-            information. Provides parameters consistent with pytorch, but is not currently support,
-            setting is invalid.
+        backend (str, optional): The backend to ues. Default is ``"hccl"`` and now only support hccl.
+        init_method (str, optional): URL specifying how to init collective communication group. Default is ``None``.
+        timeout (timedelta, optional): Timeout for API executed. Default is ``None``. Currently, this parameter is
+            only supported for host-side cluster network configuration using `init_method` or `store`.
+        world_size (int, optional): Number of the processes participating in the job. Default is ``-1``.
+        rank (int, optional): Rank of the current process. Default is ``-1``.
+        store (Store, optional): An object that stores key/value data, facilitating the exchange of inter-process
+            communication addresses and connection information. Default is ``None``. Currently, only the
+            ``TCPStore`` type is supported.
         pg_options (ProcessGroupOptions, invalid): process group options specifying what additional options need to be
-            passed in during the construction of specific process group. Provides parameters consistent with pytorch,
-            but is not currently support, setting is invalid.
-        device_id (int, invalid): the device id to exeute. Provides parameters consistent with pytorch, but is not
-            currently support, setting is invalid.
+            passed in during the construction of specific process group. The provided parameter is a reserved
+            parameter, and the current setting does not take effect.
+        device_id (int, invalid): the device id to exeute. The provided parameter is a reserved parameter,
+            and the current setting does not take effect.
     Raises:
         ValueError: If `backend` is not hccl.
         ValueError: If `world_size` is not equal to -1 or process group number.
+        ValueError: If both `init_method` and `store` are set.
+        ValueError: `world_size` is not correctly set as a positive integer value, when using the initialization
+            method `init_method` or `store`.
+        ValueError: `rank` is not correctly set as a non-negative integer, when using the initialization method
+            `init_method` or `store`.
         RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails,
             or the environment variables RANK_ID/MINDSPORE_HCCL_CONFIG_PATH
             have not been exported when backend is HCCL.
@@ -447,25 +551,34 @@ def init_process_group(backend="hccl",
         >>> init_process_group()
         >>> destroy_process_group()
     """
-    if init_method is not None:
-        logger.warning("init_method is ignored, setting is invalid")
-    if timeout is not None:
-        logger.warning("timeout is ignored, setting is invalid")
-    if store is not None:
-        logger.warning("store is ignored, setting is invalid")
     if pg_options is not None:
         logger.warning("pg_options is ignored, setting is invalid")
     if device_id is not None:
         logger.warning("device_id is ignored, setting is invalid")
-    if rank != -1:
-        logger.warning("rank is ignored, setting is invalid")
     if backend != "hccl":
         raise ValueError(
             "Only support hccl now, please setting backend to hccl or using default value"
         )
-    # init hccl & create world group
-    init(backend)
+    if init_method is not None and store is not None:
+        raise ValueError(
+            "Only one of init_method and store is supported."
+        )
+    if init_method is not None or store is not None:
+        if world_size <= 0:
+            raise ValueError(
+                "Specified world_size must be a positive integer."
+            )
+        if rank < 0:
+            raise ValueError(
+                "Specified rank must be a non-negative integer."
+            )
+        if timeout is None:
+            timeout = timedelta(seconds=300)
+        timeout_ms = int(timeout.total_seconds() * 1000)
+        _init_without_sched(backend, init_method, timeout_ms, world_size, rank, store)
+    else:
+        init(backend)
     if world_size != -1 and world_size != get_group_size():
         raise ValueError(
@@ -513,7 +626,10 @@ def destroy_process_group(group=None):
     """
     if group == GlobalComm.WORLD_COMM_GROUP or group is None:
-        release()
+        _pynative_executor.sync()
+        _finalize_collective()
+        _ExistingGroup.ITEMS.clear()
+        _ExistingGroup.GROUP_RANKS.clear()
     elif not isinstance(group, str):
         raise TypeError(
             "For 'destroy_group', the argument 'group' must be type of string or None, "
@@ -671,6 +787,12 @@ def new_group(ranks=None,
                     hccl_config(dict)
                 }
+            `hccl_config` currently only supports "hccl_buffer_size" or "hccl_comm".
+            - hccl_buffer_size (uint32): specifies the size of the HCCL communication buffer.
+            - hccl_comm (int64): specifies an existing HcclComm pointer. If "hccl_comm" is set,
+              "hccl_buffer_size" will be ignored.
         use_local_synchronization (bool, invalid): Currently it is a reserved parameter.
         group_desc (str, invalid): Currently it is a reserved parameter.
@@ -989,6 +1111,22 @@ def _check_all_tensor_same_dtype_and_shape(*tensor_lists):
                     )
+@_primexpr
+def _check_output_shape(output, expected_shape, op_name):
+    if output.shape != expected_shape:
+        raise TypeError(
+            f"For {op_name}, the output shape should be {expected_shape}, "
+            f"but got {output.shape}.")
+@_primexpr
+def _check_output_dtype(output, expected_dtype, op_name):
+    if output.dtype != expected_dtype:
+        raise TypeError(
+            f"For {op_name}, the output dtype should be {expected_dtype}, "
+            f"but got {output.dtype}.")
 def all_reduce(tensor, op=ReduceOp.SUM, group=None, async_op=False):
     """
     Reduce tensors across all devices in such a way that all deviceswill get the same final result,
@@ -1153,6 +1291,91 @@ def all_gather_into_tensor(output_tensor, input_tensor, group=None, async_op=Fal
     return handle
+def all_gather_into_tensor_uneven(output, input, output_split_sizes=None, group=None, async_op=False):
+    r"""
+    Gathers and concatenates tensors across devices with uneven first dimensions.
+    Note:
+        - Input tensors must have identical shapes except for the first dimension.
+        - Output tensor's first dimension should equal to the sum of all devices' input first dimensions.
+    Args:
+        output (Tensor): Concatenated output tensor with shape :math:`(\sum_{i=0}^{N-1} x_{i1}, x_2, ..., x_R)`,
+            where N is the number of devices in the group.
+        input (Tensor): Local input tensor with shape :math:`(x_{k1}, x_2, ..., x_R)`, where k is current device's rank.
+        output_split_sizes (list[int], optional): Specifies first dimension sizes from each device.
+            Must match actual input dimensions when provided.
+            If ``None``, assumes equal split sizes across devices. Default: ``None``.
+        group (str, optional): The communication group to work on. If ``None``,
+            which means ``"hccl_world_group"`` in Ascend. Default: ``None``.
+        async_op (bool, optional): Whether this operator should be an async operator. Default: ``False``.
+    Returns:
+        CommHandle, CommHandle is an async work handle, if `async_op` is set to True.
+        CommHandle will be None, when `async_op` is False.
+    Raises:
+        ValueError: If the shape of `input` does not match the constraints of `output_split_sizes`.
+        RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        .. note::
+            Before running the following examples, you need to configure the communication environment variables.
+            For Ascend devices, it is recommended to use the msrun startup method
+            without any third-party or configuration file dependencies.
+            Please see the `msrun start up
+            <https://www.mindspore.cn/tutorials/en/master/parallel/msrun_launcher.html>`_
+            for more details.
+            This example should be run with 2 devices.
+        >>> import numpy as np
+        >>> import mindspore as ms
+        >>> from mindspore import ops
+        >>> from mindspore.mint.distributed import init_process_group, get_rank
+        >>> from mindspore.mint.distributed import all_gather_into_tensor_uneven
+        >>> from mindspore import Tensor
+        >>>
+        >>> ms.set_device(device_target="Ascend")
+        >>> init_process_group()
+        >>> if get_rank() == 0:
+        ...     input_tensor = Tensor(np.ones([3, 4]).astype(np.float32))
+        ... else:
+        ...     input_tensor = Tensor(np.ones([2, 4]).astype(np.float32))
+        >>> out_tensor = Tensor(np.zeros([5, 4]).astype(np.float32))
+        >>> output_split_sizes = [3, 2]
+        >>> output = all_gather_into_tensor_uneven(out_tensor, input_tensor, output_split_sizes)
+        >>> print(out_tensor)
+        [[1. 1. 1. 1.]
+         [1. 1. 1. 1.]
+         [1. 1. 1. 1.]
+         [1. 1. 1. 1.]
+         [1. 1. 1. 1.]]
+    """
+    if group is None:
+        group = GlobalComm.WORLD_COMM_GROUP
+    if not isinstance(group, str):
+        raise TypeError(
+            "The argument 'group' must be type of string, "
+            "but got 'group' type : {}.".format(type(group))
+        )
+    if not isinstance(async_op, bool):
+        raise TypeError(
+            f"The argument 'async_op' must be a bool, but got {type(async_op)}."
+        )
+    group_size = get_cache_group_size(group)
+    output_split_sizes = [] if output_split_sizes is None else output_split_sizes
+    result = dist_comm_all_gather_into_tensor_uneven_op(
+        output, input, output_split_sizes, group_size, group
+    )
+    _, handle = _deal_comm_outputs(result, async_op)
+    return handle
 def reduce_scatter_tensor(output, input, op=ReduceOp.SUM, group=None, async_op=False):
     r"""
     Reduces and scatters tensors from the specified communication group and
@@ -1243,6 +1466,101 @@ def reduce_scatter_tensor(output, input, op=ReduceOp.SUM, group=None, async_op=F
     return handle
+def reduce_scatter_tensor_uneven(output, input, input_split_sizes=None, op=ReduceOp.SUM, group=None, async_op=False):
+    r"""
+    Reduce tensors from the specified communication group and scatter to the output tensor
+    according to `input_split_sizes`.
+    Note:
+        - The input tensor must have identical shape and format across all processes.
+        - The first dimension of input tensor should equal to the sum of `input_split_sizes`.
+    Args:
+        output(Tensor): the output tensor has the same dtype as `input` with a shape of
+            :math:`(input\_split\_sizes[rank], *)`, where rank is the local rank id of the device.
+        input(Tensor): The input tensor to be reduced and scattered, Expected shape :math:`(N, *)`, where `*`
+            means any number of additional dimensions. N must equal the sum of `input_split_sizes` across ranks.
+        input_split_sizes (list[int], optional): List specifying how to split the first dimension of input tensor.
+            If ``None``, splits evenly according to group size. Default: ``None``.
+        op (str, optional): Specifies an operation used for element-wise reductions,
+            One of ReduceOp: 'SUM', 'MIN', 'MAX'. Default: ``ReduceOp.SUM``.
+        group (str, optional): The communication group to work on. If ``None``, which means ``"hccl_world_group"`` in
+            Ascend. Default: ``None``.
+        async_op (bool, optional): Whether this operator should be an async operator. Default: ``False``.
+    Returns:
+        CommHandle, CommHandle is an async work handle, if `async_op` is set to True.
+        CommHandle will be None, when `async_op` is False.
+    Raises:
+        ValueError: If the shape of `output` does not match the constraints of `input_split_sizes`.
+        RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        .. note::
+            Before running the following examples, you need to configure the communication environment variables.
+            For Ascend devices, it is recommended to use the msrun startup method
+            without any third-party or configuration file dependencies.
+            Please see the `msrun start up
+            <https://www.mindspore.cn/tutorials/en/master/parallel/msrun_launcher.html>`_
+            for more details.
+            This example should be run with 2 devices.
+        >>> import mindspore as ms
+        >>> from mindspore import Tensor
+        >>> from mindspore.mint.distributed import init_process_group, get_rank
+        >>> from mindspore.mint.distributed import reduce_scatter_tensor_uneven
+        >>> import numpy as np
+        >>>
+        >>> ms.set_device(device_target="Ascend")
+        >>> init_process_group()
+        >>> input_tensor = Tensor(np.ones([5, 8]).astype(np.float32))
+        >>> if get_rank() == 0:
+        ...     output_tensor = Tensor(np.ones([2, 8]).astype(np.float32))
+        ... else:
+        ...     output_tensor = Tensor(np.ones([3, 8]).astype(np.float32))
+        >>> input_split_sizes = [2, 3]
+        >>> output = reduce_scatter_tensor_uneven(output_tensor, input_tensor, input_split_sizes)
+        >>> print(output_tensor)
+        rank 0:
+        [[2. 2. 2. 2. 2. 2. 2. 2.]
+         [2. 2. 2. 2. 2. 2. 2. 2.]]
+        rank 1:
+        [[2. 2. 2. 2. 2. 2. 2. 2.]
+         [2. 2. 2. 2. 2. 2. 2. 2.]
+         [2. 2. 2. 2. 2. 2. 2. 2.]]
+    """
+    if not isinstance(op, str):
+        raise TypeError("For reduce_scatter_tensor_uneven, the input op type must be str")
+    if op not in ("sum", "min", "max"):
+        raise TypeError(
+            "For reduce_scatter_tensor_uneven, the input op value must be one of sum, prod, min, max"
+        )
+    if group is None:
+        group = GlobalComm.WORLD_COMM_GROUP
+    if not isinstance(group, str):
+        raise TypeError(
+            "The argument 'group' must be type of string, "
+            "but got 'group' type : {}.".format(type(group))
+        )
+    if not isinstance(async_op, bool):
+        raise TypeError(
+            f"The argument 'async_op' must be a bool, but got {type(async_op)}."
+        )
+    input_split_sizes = [] if input_split_sizes is None else input_split_sizes
+    rank_size = get_cache_group_size(group)
+    result = dist_comm_reduce_scatter_tensor_uneven_op(
+        output, input, input_split_sizes, rank_size, op, group
+    )
+    _, handle = _deal_comm_outputs(result, async_op)
+    return handle
 def reduce(tensor, dst, op=ReduceOp.SUM, group=None, async_op=False):
     """
     Reduces tensors across the processes in the specified communication group, sends the result
@@ -2386,10 +2704,7 @@ def all_to_all_single(output,
 def _check_tensor_list(tensor_list, tensor, group_size):
     """check all elements in tensor_list are type of Tensor or tuple or list"""
-    if not tensor_list or len(tensor_list) != group_size:
-        raise TypeError(
-            f"The argument list tensor len must be equal to group rank size, but got {len(tensor_list)}."
-        )
+    _check_group_tensor_list(tensor_list, group_size)
     if tensor.dtype != tensor_list[0].dtype:
         raise TypeError(
             f"The argument list tensor type must be equal to tensor type, but got {tensor_list[0].dtype}."
@@ -2400,13 +2715,17 @@ def _check_tensor_list(tensor_list, tensor, group_size):
         )
+def _check_group_tensor_list(tensor_list, group_size):
+    if not tensor_list or len(tensor_list) != group_size:
+        raise TypeError(
+            f"The argument list tensor len must be equal to group rank size, but got {len(tensor_list)}."
+        )
 def all_gather(tensor_list, tensor, group=None, async_op=False):
     """
     Gathers tensors from the specified communication group and returns the tensor list which is all gathered.
-    Note:
-        The tensors must have the same shape and format in all processes of the collection.
     Args:
         tensor_list (list[Tensor]): Output list.
         tensor (Tensor): The input tensor to be all gathered into tensor.
@@ -2461,7 +2780,7 @@ def all_gather(tensor_list, tensor, group=None, async_op=False):
     """
     _check_all_tensors(tensor_list)
-    _check_all_tensor_same_dtype_and_shape(tensor_list)
+    _check_all_tensor_same_dtype(tensor_list)
     if not isinstance(tensor, (Tensor, Tensor_)):
         raise TypeError("For all_gather_into_tensor, the input tensor must be tensor")
     if group is None:
@@ -2476,7 +2795,10 @@ def all_gather(tensor_list, tensor, group=None, async_op=False):
             f"The argument 'async_op' must be a bool, but got {type(async_op)}."
         )
     group_size = get_cache_group_size(group)
-    _check_tensor_list(tensor_list, tensor, group_size)
+    _check_group_tensor_list(tensor_list, group_size)
+    rank_id = get_group_rank_from_world_rank(get_rank(), group)
+    _check_output_shape(tensor, tensor_list[rank_id].shape, "all_gather")
+    _check_output_dtype(tensor, tensor_list[0].dtype, "all_gather")
     result = dist_comm_all_gather_op(tensor_list, tensor, group_size, group)
     _, handle = _deal_comm_outputs(result, async_op)
     return handle
@@ -2487,9 +2809,6 @@ def reduce_scatter(output, input_list, op=ReduceOp.SUM, group=None, async_op=Fal
     Reduces and scatters tensors from the specified communication group and
     returns the tensor which is reduced and scattered.
-    Note:
-        The tensors must have the same shape and format in all processes of the collection.
     Args:
         output (Tensor): the output tensor.
         input_list (list[Tensor]): List of tensors to reduce and scatter.
@@ -2543,7 +2862,7 @@ def reduce_scatter(output, input_list, op=ReduceOp.SUM, group=None, async_op=Fal
     """
     _check_all_tensors(input_list)
-    _check_all_tensor_same_dtype_and_shape(input_list)
+    _check_all_tensor_same_dtype(input_list)
     if not isinstance(output, (Tensor, Tensor_)):
         raise TypeError("For reduce_scatter, the output tensor must be tensor")
     if group is None:
@@ -2564,7 +2883,11 @@ def reduce_scatter(output, input_list, op=ReduceOp.SUM, group=None, async_op=Fal
             "For reduce_scatter, the input op value must be one of sum, prod, min, max"
         )
     rank_size = get_cache_group_size(group)
-    _check_tensor_list(input_list, output, rank_size)
+    _check_group_tensor_list(input_list, rank_size)
+    rank_id = get_group_rank_from_world_rank(get_rank(), group)
+    _check_output_shape(output, input_list[rank_id].shape, "reduce_scatter")
+    _check_output_dtype(output, input_list[0].dtype, "reduce_scatter")
     result = dist_comm_reduce_scatter_op(output, input_list, rank_size, op, group)
     _, handle = _deal_comm_outputs(result, async_op)
     return handle