PyPI - mindspore - Versions diffs - 2.4.10__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl - Mend

mindspore 2.4.10__cp311-cp311-win_amd64.whl → 2.6.0rc1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (602) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +13 -6
mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
mindspore/_check_jit_forbidden_api.py +3 -0
mindspore/_checkparam.py +3 -38
mindspore/_deprecated/__init__.py +17 -0
mindspore/_deprecated/jit.py +198 -0
mindspore/_extends/builtin_operations.py +1 -1
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/__init__.py +6 -7
mindspore/_extends/parse/compile_config.py +83 -0
mindspore/_extends/parse/deprecated/__init__.py +0 -0
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
mindspore/_extends/parse/parser.py +46 -197
mindspore/_extends/parse/resources.py +1 -5
mindspore/_extends/parse/standard_method.py +217 -98
mindspore/_extends/pijit/__init__.py +2 -2
mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
mindspore/_extends/pijit/tensor_func_list.py +27 -0
mindspore/_extends/utils.py +1 -1
mindspore/amp.py +11 -5
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/__init__.py +2 -2
mindspore/boost/base.py +3 -7
mindspore/boost/boost_cell_wrapper.py +138 -43
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +6 -3
mindspore/common/_grad_function.py +56 -0
mindspore/common/_pijit_context.py +14 -5
mindspore/common/_register_for_tensor.py +1 -2
mindspore/common/_stub_tensor.py +30 -14
mindspore/common/_tensor_cpp_method.py +17 -0
mindspore/common/_tensor_docs.py +4760 -0
mindspore/common/api.py +435 -371
mindspore/common/auto_dynamic_shape.py +41 -44
mindspore/common/dtype.py +39 -36
mindspore/common/dump.py +9 -6
mindspore/common/file_system.py +9 -1
mindspore/common/generator.py +2 -0
mindspore/common/hook_handle.py +6 -2
mindspore/common/initializer.py +13 -10
mindspore/common/jit_begin_end.py +94 -0
mindspore/common/jit_config.py +6 -1
mindspore/common/jit_context.py +76 -0
mindspore/common/jit_trace.py +378 -0
mindspore/common/lazy_inline.py +9 -3
mindspore/common/mindir_util.py +10 -2
mindspore/common/mutable.py +5 -4
mindspore/common/parameter.py +135 -52
mindspore/common/seed.py +2 -2
mindspore/common/sparse_tensor.py +23 -17
mindspore/common/tensor.py +951 -1992
mindspore/communication/__init__.py +7 -5
mindspore/communication/_comm_helper.py +52 -2
mindspore/communication/comm_func.py +240 -181
mindspore/communication/management.py +95 -26
mindspore/context.py +314 -566
mindspore/dataset/__init__.py +65 -37
mindspore/dataset/audio/__init__.py +2 -8
mindspore/dataset/audio/transforms.py +3 -17
mindspore/dataset/callback/ds_callback.py +2 -1
mindspore/dataset/core/config.py +87 -6
mindspore/dataset/engine/cache_admin.py +3 -3
mindspore/dataset/engine/cache_client.py +6 -5
mindspore/dataset/engine/datasets.py +292 -267
mindspore/dataset/engine/datasets_audio.py +22 -8
mindspore/dataset/engine/datasets_standard_format.py +46 -27
mindspore/dataset/engine/datasets_text.py +78 -48
mindspore/dataset/engine/datasets_user_defined.py +182 -116
mindspore/dataset/engine/datasets_vision.py +120 -44
mindspore/dataset/engine/iterators.py +283 -63
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
mindspore/dataset/engine/obs/util.py +8 -0
mindspore/dataset/engine/queue.py +40 -0
mindspore/dataset/engine/samplers.py +289 -43
mindspore/dataset/engine/serializer_deserializer.py +3 -2
mindspore/dataset/engine/validators.py +53 -11
mindspore/dataset/text/__init__.py +7 -6
mindspore/dataset/text/transforms.py +6 -5
mindspore/dataset/text/utils.py +3 -3
mindspore/dataset/transforms/__init__.py +0 -9
mindspore/dataset/transforms/py_transforms_util.py +17 -0
mindspore/dataset/transforms/transforms.py +31 -14
mindspore/dataset/utils/browse_dataset.py +1 -1
mindspore/dataset/vision/__init__.py +2 -9
mindspore/dataset/vision/transforms.py +202 -158
mindspore/dataset/vision/utils.py +7 -5
mindspore/dataset/vision/validators.py +1 -2
mindspore/device_context/__init__.py +21 -0
mindspore/device_context/ascend/__init__.py +25 -0
mindspore/device_context/ascend/device.py +72 -0
mindspore/device_context/ascend/op_debug.py +153 -0
mindspore/device_context/ascend/op_precision.py +193 -0
mindspore/device_context/ascend/op_tuning.py +123 -0
mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
mindspore/device_context/cpu/device.py +62 -0
mindspore/device_context/cpu/op_tuning.py +43 -0
mindspore/device_context/gpu/__init__.py +21 -0
mindspore/device_context/gpu/device.py +70 -0
mindspore/device_context/gpu/op_precision.py +67 -0
mindspore/device_context/gpu/op_tuning.py +175 -0
mindspore/device_manager.py +170 -0
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/es/embedding_service.py +35 -27
mindspore/experimental/llm_boost/__init__.py +1 -0
mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
mindspore/experimental/llm_boost/register.py +1 -0
mindspore/experimental/map_parameter.py +4 -4
mindspore/experimental/optim/adadelta.py +6 -6
mindspore/experimental/optim/adagrad.py +4 -4
mindspore/experimental/optim/adam.py +7 -0
mindspore/experimental/optim/adamax.py +4 -4
mindspore/experimental/optim/adamw.py +4 -0
mindspore/experimental/optim/asgd.py +1 -1
mindspore/experimental/optim/lr_scheduler.py +73 -46
mindspore/experimental/optim/radam.py +34 -31
mindspore/experimental/optim/rprop.py +1 -1
mindspore/experimental/optim/sgd.py +1 -1
mindspore/hal/contiguous_tensors_handle.py +6 -10
mindspore/hal/device.py +55 -53
mindspore/hal/event.py +52 -52
mindspore/hal/memory.py +157 -117
mindspore/hal/stream.py +150 -109
mindspore/include/api/context.h +0 -1
mindspore/include/dataset/constants.h +7 -4
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +50 -0
mindspore/mindrecord/__init__.py +21 -8
mindspore/mindrecord/config.py +17 -316
mindspore/mindrecord/filereader.py +1 -9
mindspore/mindrecord/filewriter.py +5 -15
mindspore/mindrecord/mindpage.py +1 -9
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mint/__init__.py +796 -759
mindspore/mint/distributed/__init__.py +70 -4
mindspore/mint/distributed/distributed.py +2679 -44
mindspore/mint/linalg/__init__.py +8 -0
mindspore/mint/nn/__init__.py +743 -22
mindspore/mint/nn/functional.py +716 -23
mindspore/mint/nn/layer/__init__.py +21 -4
mindspore/mint/nn/layer/_functions.py +334 -0
mindspore/mint/nn/layer/activation.py +276 -1
mindspore/mint/nn/layer/basic.py +123 -0
mindspore/mint/nn/layer/conv.py +921 -0
mindspore/mint/nn/layer/normalization.py +223 -28
mindspore/mint/nn/layer/padding.py +797 -0
mindspore/mint/nn/layer/pooling.py +235 -0
mindspore/mint/optim/__init__.py +3 -1
mindspore/mint/optim/adam.py +223 -0
mindspore/mint/optim/adamw.py +26 -19
mindspore/mint/optim/sgd.py +171 -0
mindspore/mint/special/__init__.py +2 -1
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/multiprocessing/__init__.py +5 -0
mindspore/nn/__init__.py +4 -1
mindspore/nn/cell.py +1370 -189
mindspore/nn/dynamic_lr.py +2 -1
mindspore/nn/layer/activation.py +29 -27
mindspore/nn/layer/basic.py +51 -35
mindspore/nn/layer/channel_shuffle.py +3 -3
mindspore/nn/layer/container.py +1 -1
mindspore/nn/layer/conv.py +22 -17
mindspore/nn/layer/embedding.py +12 -11
mindspore/nn/layer/normalization.py +56 -49
mindspore/nn/layer/padding.py +4 -3
mindspore/nn/layer/pooling.py +120 -42
mindspore/nn/layer/rnn_cells.py +1 -1
mindspore/nn/layer/rnns.py +2 -1
mindspore/nn/layer/timedistributed.py +5 -5
mindspore/nn/layer/transformer.py +59 -36
mindspore/nn/learning_rate_schedule.py +8 -4
mindspore/nn/loss/loss.py +58 -55
mindspore/nn/optim/ada_grad.py +7 -5
mindspore/nn/optim/adadelta.py +11 -9
mindspore/nn/optim/adafactor.py +1 -1
mindspore/nn/optim/adam.py +17 -13
mindspore/nn/optim/adamax.py +8 -7
mindspore/nn/optim/adasum.py +5 -5
mindspore/nn/optim/asgd.py +1 -1
mindspore/nn/optim/ftrl.py +11 -9
mindspore/nn/optim/lamb.py +1 -1
mindspore/nn/optim/lars.py +1 -4
mindspore/nn/optim/lazyadam.py +12 -10
mindspore/nn/optim/momentum.py +7 -6
mindspore/nn/optim/optimizer.py +3 -3
mindspore/nn/optim/proximal_ada_grad.py +12 -10
mindspore/nn/optim/rmsprop.py +13 -12
mindspore/nn/optim/rprop.py +11 -9
mindspore/nn/optim/sgd.py +9 -6
mindspore/nn/optim/tft_wrapper.py +5 -2
mindspore/nn/optim/thor.py +2 -1
mindspore/nn/probability/bijector/bijector.py +17 -11
mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
mindspore/nn/probability/bijector/invert.py +2 -2
mindspore/nn/probability/bijector/scalar_affine.py +3 -3
mindspore/nn/probability/bijector/softplus.py +3 -2
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +1 -1
mindspore/nn/probability/distribution/cauchy.py +4 -2
mindspore/nn/probability/distribution/exponential.py +6 -7
mindspore/nn/probability/distribution/gamma.py +2 -2
mindspore/nn/probability/distribution/gumbel.py +2 -2
mindspore/nn/probability/distribution/half_normal.py +5 -3
mindspore/nn/probability/distribution/logistic.py +5 -3
mindspore/nn/probability/distribution/poisson.py +1 -1
mindspore/nn/probability/distribution/uniform.py +5 -3
mindspore/nn/reinforcement/_tensors_queue.py +1 -1
mindspore/nn/reinforcement/tensor_array.py +1 -1
mindspore/nn/utils/init.py +13 -11
mindspore/nn/wrap/__init__.py +6 -6
mindspore/nn/wrap/cell_wrapper.py +181 -122
mindspore/nn/wrap/grad_reducer.py +45 -36
mindspore/nn/wrap/loss_scale.py +6 -7
mindspore/numpy/array_creations.py +63 -65
mindspore/numpy/array_ops.py +149 -144
mindspore/numpy/logic_ops.py +41 -42
mindspore/numpy/math_ops.py +365 -363
mindspore/numpy/utils.py +17 -18
mindspore/numpy/utils_const.py +5 -6
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +5 -3
mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
mindspore/ops/_op_impl/cpu/__init__.py +1 -0
mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
mindspore/ops/_register_for_op.py +0 -11
mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
mindspore/ops/_vmap/vmap_array_ops.py +27 -25
mindspore/ops/_vmap/vmap_base.py +0 -2
mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
mindspore/ops/_vmap/vmap_math_ops.py +15 -16
mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
mindspore/ops/auto_generate/__init__.py +4 -3
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +236 -46
mindspore/ops/auto_generate/gen_extend_func.py +764 -124
mindspore/ops/auto_generate/gen_ops_def.py +4018 -2264
mindspore/ops/auto_generate/gen_ops_prim.py +15463 -5037
mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
mindspore/ops/composite/__init__.py +2 -1
mindspore/ops/composite/base.py +20 -25
mindspore/ops/composite/math_ops.py +6 -16
mindspore/ops/composite/multitype_ops/__init__.py +5 -2
mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
mindspore/ops/function/__init__.py +40 -2
mindspore/ops/function/_add_attr_func.py +58 -0
mindspore/ops/function/array_func.py +2089 -2403
mindspore/ops/function/clip_func.py +80 -23
mindspore/ops/function/debug_func.py +57 -57
mindspore/ops/function/grad/__init__.py +1 -0
mindspore/ops/function/grad/grad_func.py +104 -71
mindspore/ops/function/image_func.py +2 -2
mindspore/ops/function/linalg_func.py +47 -78
mindspore/ops/function/math_func.py +4501 -3802
mindspore/ops/function/nn_func.py +1726 -620
mindspore/ops/function/other_func.py +159 -1
mindspore/ops/function/parameter_func.py +18 -84
mindspore/ops/function/random_func.py +440 -387
mindspore/ops/function/reshard_func.py +4 -70
mindspore/ops/function/sparse_func.py +3 -3
mindspore/ops/function/sparse_unary_func.py +6 -6
mindspore/ops/function/spectral_func.py +25 -58
mindspore/ops/function/vmap_func.py +24 -17
mindspore/ops/functional.py +22 -7
mindspore/ops/functional_overload.py +1440 -0
mindspore/ops/op_info_register.py +32 -244
mindspore/ops/operations/__init__.py +13 -7
mindspore/ops/operations/_custom_ops_utils.py +247 -0
mindspore/ops/operations/_embedding_cache_ops.py +4 -4
mindspore/ops/operations/_grad_ops.py +2 -43
mindspore/ops/operations/_infer_ops.py +2 -1
mindspore/ops/operations/_inner_ops.py +43 -84
mindspore/ops/operations/_ms_kernel.py +4 -10
mindspore/ops/operations/_rl_inner_ops.py +1 -1
mindspore/ops/operations/_scalar_ops.py +3 -2
mindspore/ops/operations/_sequence_ops.py +1 -1
mindspore/ops/operations/_tensor_array.py +1 -1
mindspore/ops/operations/array_ops.py +81 -324
mindspore/ops/operations/comm_ops.py +154 -108
mindspore/ops/operations/custom_ops.py +232 -78
mindspore/ops/operations/debug_ops.py +153 -59
mindspore/ops/operations/inner_ops.py +7 -5
mindspore/ops/operations/linalg_ops.py +1 -57
mindspore/ops/operations/manually_defined/_inner.py +1 -1
mindspore/ops/operations/manually_defined/ops_def.py +928 -180
mindspore/ops/operations/math_ops.py +32 -234
mindspore/ops/operations/nn_ops.py +210 -498
mindspore/ops/operations/other_ops.py +62 -9
mindspore/ops/operations/random_ops.py +13 -7
mindspore/ops/operations/reshard_ops.py +1 -1
mindspore/ops/operations/sparse_ops.py +2 -2
mindspore/ops/primitive.py +66 -53
mindspore/ops/tensor_method.py +1888 -0
mindspore/ops_generate/__init__.py +0 -5
mindspore/ops_generate/aclnn/__init__.py +0 -0
mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
mindspore/ops_generate/api/__init__.py +0 -0
mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
mindspore/ops_generate/api/functions_cc_generator.py +237 -0
mindspore/ops_generate/api/gen_api.py +103 -0
mindspore/ops_generate/api/op_api_proto.py +235 -0
mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
mindspore/ops_generate/common/__init__.py +0 -0
mindspore/ops_generate/common/base_generator.py +11 -0
mindspore/ops_generate/common/gen_constants.py +91 -0
mindspore/ops_generate/common/gen_utils.py +348 -0
mindspore/ops_generate/common/op_proto.py +473 -0
mindspore/ops_generate/common/template.py +523 -0
mindspore/ops_generate/gen_ops.py +22 -1069
mindspore/ops_generate/op_def/__init__.py +0 -0
mindspore/ops_generate/op_def/gen_op_def.py +90 -0
mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
mindspore/ops_generate/op_def/ops_def_cc_generator.py +299 -0
mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
mindspore/ops_generate/op_def_py/__init__.py +0 -0
mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
mindspore/ops_generate/pyboost/__init__.py +0 -0
mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
mindspore/ops_generate/resources/__init__.py +0 -0
mindspore/ops_generate/resources/resource_list.py +30 -0
mindspore/ops_generate/resources/resource_loader.py +36 -0
mindspore/ops_generate/resources/resource_manager.py +64 -0
mindspore/ops_generate/resources/yaml_loader.py +88 -0
mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
mindspore/parallel/__init__.py +7 -3
mindspore/parallel/_auto_parallel_context.py +152 -34
mindspore/parallel/_cell_wrapper.py +130 -15
mindspore/parallel/_parallel_serialization.py +107 -5
mindspore/parallel/_ps_context.py +1 -1
mindspore/parallel/_recovery_context.py +7 -2
mindspore/parallel/_tensor.py +142 -18
mindspore/parallel/_utils.py +199 -23
mindspore/parallel/algo_parameter_config.py +4 -4
mindspore/parallel/auto_parallel.py +732 -0
mindspore/parallel/checkpoint_convert.py +159 -0
mindspore/parallel/checkpoint_transform.py +698 -35
mindspore/parallel/cluster/process_entity/_api.py +276 -50
mindspore/parallel/cluster/process_entity/_utils.py +41 -6
mindspore/parallel/cluster/run.py +21 -4
mindspore/parallel/function/__init__.py +24 -0
mindspore/parallel/function/reshard_func.py +259 -0
mindspore/parallel/nn/__init__.py +25 -0
mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
mindspore/parallel/parameter_broadcast.py +25 -14
mindspore/parallel/shard.py +137 -58
mindspore/parallel/transform_safetensors.py +363 -305
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +22 -5
mindspore/profiler/analysis/__init__.py +0 -0
mindspore/profiler/analysis/parser/__init__.py +0 -0
mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
mindspore/profiler/analysis/parser/base_parser.py +158 -0
mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +106 -0
mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
mindspore/profiler/analysis/task_manager.py +131 -0
mindspore/profiler/analysis/time_converter.py +84 -0
mindspore/profiler/analysis/viewer/__init__.py +0 -0
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
mindspore/profiler/analysis/work_flow.py +73 -0
mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
mindspore/profiler/common/command_executor.py +90 -0
mindspore/profiler/common/constant.py +186 -3
mindspore/profiler/common/file_manager.py +208 -0
mindspore/profiler/common/log.py +130 -0
mindspore/profiler/common/msprof_cmd_tool.py +221 -0
mindspore/profiler/common/path_manager.py +395 -0
mindspore/profiler/common/process_bar.py +168 -0
mindspore/profiler/common/process_pool.py +9 -3
mindspore/profiler/common/profiler_context.py +500 -0
mindspore/profiler/common/profiler_info.py +304 -0
mindspore/profiler/common/profiler_meta_data.py +74 -0
mindspore/profiler/common/profiler_output_path.py +284 -0
mindspore/profiler/common/profiler_parameters.py +251 -0
mindspore/profiler/common/profiler_path_manager.py +179 -0
mindspore/profiler/common/record_function.py +76 -0
mindspore/profiler/common/tlv_decoder.py +76 -0
mindspore/profiler/common/util.py +75 -2
mindspore/profiler/dynamic_profiler.py +341 -75
mindspore/profiler/envprofiler.py +163 -0
mindspore/profiler/experimental_config.py +197 -0
mindspore/profiler/mstx.py +242 -0
mindspore/profiler/platform/__init__.py +21 -0
mindspore/profiler/platform/base_profiler.py +40 -0
mindspore/profiler/platform/cpu_profiler.py +124 -0
mindspore/profiler/platform/gpu_profiler.py +74 -0
mindspore/profiler/platform/npu_profiler.py +335 -0
mindspore/profiler/profiler.py +1073 -90
mindspore/profiler/profiler_action_controller.py +187 -0
mindspore/profiler/profiler_interface.py +118 -0
mindspore/profiler/schedule.py +243 -0
mindspore/rewrite/api/node.py +15 -13
mindspore/rewrite/api/symbol_tree.py +2 -3
mindspore/run_check/_check_version.py +27 -20
mindspore/run_check/run_check.py +1 -1
mindspore/runtime/__init__.py +37 -0
mindspore/runtime/device.py +27 -0
mindspore/runtime/event.py +209 -0
mindspore/runtime/executor.py +177 -0
mindspore/runtime/memory.py +409 -0
mindspore/runtime/stream.py +460 -0
mindspore/runtime/thread_bind_core.py +401 -0
mindspore/safeguard/rewrite_obfuscation.py +12 -9
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +8 -8
mindspore/train/_utils.py +88 -25
mindspore/train/amp.py +9 -5
mindspore/train/callback/__init__.py +2 -2
mindspore/train/callback/_callback.py +2 -16
mindspore/train/callback/_checkpoint.py +53 -55
mindspore/train/callback/_cluster_monitor.py +14 -18
mindspore/train/callback/_early_stop.py +1 -1
mindspore/train/callback/_flops_collector.py +103 -68
mindspore/train/callback/_history.py +8 -5
mindspore/train/callback/_lambda_callback.py +2 -2
mindspore/train/callback/_landscape.py +0 -3
mindspore/train/callback/_loss_monitor.py +2 -1
mindspore/train/callback/_on_request_exit.py +6 -5
mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
mindspore/train/callback/_summary_collector.py +52 -19
mindspore/train/callback/_time_monitor.py +2 -1
mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +204 -107
mindspore/train/data_sink.py +25 -2
mindspore/train/dataset_helper.py +15 -16
mindspore/train/loss_scale_manager.py +8 -7
mindspore/train/metrics/accuracy.py +3 -3
mindspore/train/metrics/confusion_matrix.py +9 -9
mindspore/train/metrics/error.py +3 -3
mindspore/train/metrics/hausdorff_distance.py +4 -4
mindspore/train/metrics/mean_surface_distance.py +3 -3
mindspore/train/metrics/metric.py +0 -12
mindspore/train/metrics/occlusion_sensitivity.py +4 -2
mindspore/train/metrics/precision.py +11 -10
mindspore/train/metrics/recall.py +9 -9
mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
mindspore/train/mind_ir_pb2.py +174 -46
mindspore/train/model.py +184 -113
mindspore/train/serialization.py +622 -978
mindspore/train/summary/_summary_adapter.py +2 -2
mindspore/train/summary/summary_record.py +2 -3
mindspore/train/train_thor/model_thor.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +6 -3
mindspore/utils/dryrun.py +140 -0
mindspore/utils/hooks.py +81 -0
mindspore/utils/runtime_execution_order_check.py +550 -0
mindspore/utils/utils.py +138 -4
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/METADATA +3 -3
{mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/RECORD +587 -418
{mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/entry_points.txt +1 -1
mindspore/_install_custom.py +0 -43
mindspore/common/_register_for_adapter.py +0 -74
mindspore/common/_tensor_overload.py +0 -139
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
mindspore/ops_generate/gen_aclnn_implement.py +0 -263
mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
mindspore/ops_generate/gen_pyboost_func.py +0 -1052
mindspore/ops_generate/gen_utils.py +0 -209
mindspore/ops_generate/op_proto.py +0 -145
mindspore/ops_generate/template.py +0 -261
mindspore/profiler/envprofiling.py +0 -254
mindspore/profiler/profiling.py +0 -1926
{mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/WHEEL +0 -0
{mindspore-2.4.10.dist-info → mindspore-2.6.0rc1.dist-info}/top_level.txt +0 -0

mindspore/parallel/transform_safetensors.py CHANGED Viewed

@@ -16,19 +16,26 @@
 from __future__ import absolute_import
 import os
-import time
+import sys
 import glob
-import re
 import math
 import json
+import re
 from collections import defaultdict
+import time
 import multiprocessing as mp
+import psutil
 import numpy as np
+from safetensors.numpy import save_file, load_file
+from safetensors import safe_open
 import mindspore as ms
+from mindspore import log as logger
+from mindspore.log import vlog_print
 from mindspore.parallel._parallel_serialization import _get_device_num_from_strategy, _make_dir, \
     _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
-    _insert_opt_shard_reshape, _extract_src_dst_layout_map_by_src
+    _insert_opt_shard_reshape, _extract_src_dst_layout_map_by_src, _insert_expand_layout_reshape
 from mindspore.parallel._tensor import _get_tensor_strategy, _construct_from_to_tensor_layout, \
     _get_needed_rank_transform_operator_map_by_layouts, \
     _generate_transform_operator_stack, _apply_tensor_transform_operators, _construct_tensor_layout_for_opt_shard, \
@@ -36,70 +43,6 @@ from mindspore.parallel._tensor import _get_tensor_strategy, _construct_from_to_
 from mindspore.parallel._parallel_serialization import _build_searched_strategy, _load_protobuf_strategy, \
     _convert_to_list
-from safetensors.numpy import save_file, load_file
-from safetensors import safe_open
-def _load_and_transform(path, name_map, load_func, transform_func):
-    if load_func is not None:
-        param_dict = load_func(path)
-    else:
-        param_dict = path
-    transform_dict = {}
-    for k, v in param_dict.items():
-        new_name = name_map.get(k, k) if name_map is not None else k
-        transform_dict[new_name] = transform_func(v, new_name)
-    return transform_dict
-def _transform_tensor_to_numpy(path, name_map=None):
-    return _load_and_transform(path, name_map, ms.load_checkpoint, lambda v, new_name: v.asnumpy())
-def _transform_numpy_to_tensor(path, name_map=None):
-    return _load_and_transform(path, name_map, load_file, lambda v, new_name: ms.Parameter(v, name=new_name))
-def _process_file(file_info):
-    cur_ckpt_path, name_map, save_path, file = file_info
-    param_dict_numpy = _transform_tensor_to_numpy(cur_ckpt_path, name_map)
-    safetensors_filename = file.replace(".ckpt", ".safetensors")
-    dst_file = os.path.join(save_path, safetensors_filename)
-    save_file(param_dict_numpy, dst_file)
-def _process_file_safetensors(file_info):
-    cur_safe_path, name_map, save_path, file = file_info
-    param_dict_tensor = _transform_numpy_to_tensor(cur_safe_path, name_map)
-    ckpt_filename = file.replace(".safetensors", ".ckpt")
-    dst_file = os.path.join(save_path, ckpt_filename)
-    ms.save_checkpoint(param_dict_tensor, dst_file)
-def _gather_tasks(file_path, save_path, file_name_regex, name_map):
-    """gather transform rank together"""
-    tasks = []
-    for root, dirs, _ in os.walk(file_path):
-        if root != file_path:
-            continue
-        rank_dirs = [d for d in dirs if d.startswith('rank')]
-        if not rank_dirs:
-            raise ValueError(
-                f"For 'ckpt_to_safetensors', no directories starting with 'rank' found in {file_path}")
-        for rank_dir in rank_dirs:
-            rank_dir_path = os.path.join(root, rank_dir)
-            dst_root = os.path.join(save_path,
-                                    os.path.relpath(rank_dir_path, file_path)) if save_path else rank_dir_path
-            os.makedirs(dst_root, exist_ok=True)
-            tasks.extend(
-                (os.path.join(rank_dir_path, file), name_map, dst_root, file)
-                for file in os.listdir(rank_dir_path)
-                if file.endswith(".ckpt") and (file_name_regex is None or re.findall(file_name_regex, file))
-            )
-    return tasks
 def _progress_bar(iterable, total=None):
     """
@@ -125,6 +68,7 @@ def _progress_bar(iterable, total=None):
         elapsed_time_str = time.strftime("%H:%M:%S", time.gmtime(elapsed_time))
         remaining_time_str = time.strftime("%H:%M:%S", time.gmtime(remaining_time))
+        sys.stdout.reconfigure(encoding="utf-8")
         print(f'\r{percent}%|{bar}|[{elapsed_time_str}<{remaining_time_str}]', end='')
         if iteration == total:
             print()
@@ -134,155 +78,16 @@ def _progress_bar(iterable, total=None):
         print_progress_bar(i)
-def ckpt_to_safetensors(file_path, save_path=None, name_map=None, file_name_regex=None, processes_num=1):
-    """
-    Converts MindSpore checkpoint files into safetensors format and saves them to `save_path`.
-    Safetensors is a reliable and portable machine learning model storage format introduced by Huggingface,
-    used for securely storing Tensors with fast speed (zero copy).
-    Note:
-        The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
-        too large, otherwise it may cause freezing.
-        The safetensors format does not support the enc verification function. If ckpt is enabled to save enc
-        verification, an error will be generated when performing the conversion.
-        The safetensors format currently does not support crc verification function. If ckpt contains crc verification
-        information, the crc verification information will be lost after conversion to safetensors.
-    Args:
-        file_path (str): Path to the directory containing checkpoint files or a single checkpoint file (.ckpt).
-        save_path (str, optional): Directory path where safetensors files will be saved. Defaults: ``None``.
-        name_map (dict, optional): Dictionary mapping original parameter names to new names. Defaults: ``None``.
-        file_name_regex (str, optional): Regular expression used to match the file that needs to be converted.
-                                   Defaults: ``None``.
-        processes_num (int, optional): Number of processes to use for parallel processing. Defaults: 1.
-    Raises:
-        ValueError: If the input path is invalid or the save_path is not a directory,
-                    or the file_path does not end with '.ckpt'.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore as ms
-        >>> ms.ckpt_to_safetensors("./ckpt_save_path")
-        >>> ms.ckpt_to_safetensors("./ckpt_save_path/rank0/checkpoint_0.ckpt")
-        >>> ms.ckpt_to_safetensors(file_path="./ckpt_save_path/rank0/checkpoint_0.ckpt", save_path="./new_path/")
-        >>> namemap = {"lin.weight":"new_name"}
-        >>> ms.ckpt_to_safetensors("./ckpt_save_path/rank0/checkpoint_0.ckpt", "./new_path/", namemap)
-    """
-    is_dir = os.path.isdir(file_path)
-    is_file = os.path.isfile(file_path)
-    if not is_dir and not is_file:
-        raise ValueError(f"For 'ckpt_to_safetensors', the input path must be a valid path or file, but got {file_path}")
-    if save_path and os.path.splitext(save_path)[1]:
-        raise ValueError(f"For 'ckpt_to_safetensors', the save_path must be a directory, but got '{save_path}'")
-    if name_map is not None and not isinstance(name_map, dict):
-        raise ValueError(
-            f"For 'ckpt_to_safetensors', the type of 'name_map' must be a directory, but got '{type(name_map)}'")
-    if is_dir:
-        tasks = _gather_tasks(file_path, save_path, file_name_regex, name_map)
-        with mp.Pool(processes=processes_num) as pool:
-            list(_progress_bar(pool.imap(_process_file, tasks), total=len(tasks)))
-    elif is_file:
-        if not file_path.endswith(".ckpt"):
-            raise ValueError(f"For 'ckpt_to_safetensors', the input file must be a .ckpt file, but got {file_path}")
-        if file_name_regex is not None and not re.findall(file_name_regex, file_path):
-            raise ValueError(f"For 'ckpt_to_safetensors', the input file does not match the regular expression.")
-        if save_path and not os.path.exists(save_path):
-            os.makedirs(save_path, exist_ok=True)
-        param_dict_numpy = _transform_tensor_to_numpy(file_path, name_map)
-        safetensors_filename = os.path.basename(file_path).replace(".ckpt", ".safetensors")
-        dst_file = os.path.join(save_path if save_path else os.path.dirname(file_path), safetensors_filename)
-        save_file(param_dict_numpy, dst_file)
-def _gather_safetensors_tasks(file_path, save_path, file_name_regex, name_map):
-    """gather transform rank together"""
-    tasks = []
-    for root, dirs, _ in os.walk(file_path):
-        if root != file_path:
-            continue
-        rank_dirs = [d for d in dirs if d.startswith('rank')]
-        if not rank_dirs:
-            raise ValueError(
-                f"For 'safetensors_to_ckpt', no directories starting with 'rank' found in {file_path}")
-        for rank_dir in rank_dirs:
-            rank_dir_path = os.path.join(root, rank_dir)
-            dst_root = os.path.join(save_path,
-                                    os.path.relpath(rank_dir_path, file_path)) if save_path else rank_dir_path
-            os.makedirs(dst_root, exist_ok=True)
-            tasks.extend(
-                (os.path.join(rank_dir_path, file), name_map, dst_root, file)
-                for file in os.listdir(rank_dir_path)
-                if file.endswith(".safetensors") and (file_name_regex is None or re.findall(file_name_regex, file))
-            )
-    return tasks
-def safetensors_to_ckpt(file_path, save_path=None, name_map=None, file_name_regex=None, processes_num=1):
-    """
-    Converts safetensors files into MindSpore checkpoint format and saves them to `save_path`.
-    Safetensors is a reliable and portable machine learning model storage format introduced by Huggingface,
-    used for securely storing Tensors with fast speed (zero copy).
-    Note:
-        The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
-        too large, otherwise it may cause freezing.
-    Args:
-        file_path (str): Path to the directory containing safetensors files or a single safetensors file (.safetensors).
-        save_path (str, optional): Directory path where checkpoint files will be saved. Defaults: ``None``.
-        name_map (dict, optional): Dictionary mapping original parameter names to new names. Defaults: ``None``.
-        file_name_regex (str, optional): Regular expression used to match the file that needs to be converted.
-                                   Defaults: ``None``.
-        processes_num (int, optional): Number of processes to use for parallel processing. Defaults: 1.
-    Raises:
-        ValueError: If the input path is invalid, the save_path is not a directory,
-                    or the file_path does not end with '.safetensors'.
-    Supported Platforms:
-        ``Ascend`` ``GPU`` ``CPU``
-    Examples:
-        >>> import mindspore as ms
-        >>> ms.safetensors_to_ckpt("./safetensors_save_path")
-        >>> ms.safetensors_to_ckpt("./safetensors_save_path/rank0/checkpoint_0.safetensors")
-        >>> ms.safetensors_to_ckpt("./safetensors_save_path/rank0/checkpoint_0.safetensors", "./new_path/")
-        >>> namemap = {"lin.weight":"new_name"}
-        >>> ms.safetensors_to_ckpt("./safetensors_save_path/rank0/checkpoint_0.safetensors", "./new_path/", namemap)
-    """
-    is_dir = os.path.isdir(file_path)
-    is_file = os.path.isfile(file_path)
-    if not is_dir and not is_file:
-        raise ValueError(f"For 'safetensors_to_ckpt', the input path must be a valid path or file, but got {file_path}")
-    if save_path and os.path.splitext(save_path)[1]:
-        raise ValueError(f"For 'safetensors_to_ckpt', the save_path must be a directory, but got '{save_path}'")
-    if name_map is not None and not isinstance(name_map, dict):
-        raise ValueError(
-            f"For 'safetensors_to_ckpt', the type of 'name_map' must be a directory, but got '{type(name_map)}'")
-    if is_dir:
-        tasks = _gather_safetensors_tasks(file_path, save_path, file_name_regex, name_map)
-        with mp.Pool(processes=processes_num) as pool:
-            list(_progress_bar(pool.imap(_process_file_safetensors, tasks), total=len(tasks)))
-    elif is_file:
-        if not file_path.endswith(".safetensors"):
-            raise ValueError(
-                f"For 'safetensors_to_ckpt', the input file must be a .safetensors file, but got {file_path}")
-        if file_name_regex is not None and not re.findall(file_name_regex, file_path):
-            raise ValueError(f"For 'safetensors_to_ckpt', the input file does not match the regular expression.")
-        if save_path and not os.path.exists(save_path):
-            os.makedirs(save_path, exist_ok=True)
-        param_dict_tensor = _transform_numpy_to_tensor(file_path, name_map)
-        ckpt_filename = os.path.basename(file_path).replace(".safetensors", ".ckpt")
-        dst_file = os.path.join(save_path if save_path else os.path.dirname(file_path), ckpt_filename)
-        ms.save_checkpoint(param_dict_tensor, dst_file)
+def _load_and_transform(path, name_map, load_func, transform_func):
+    if load_func is not None:
+        param_dict = load_func(path)
+    else:
+        param_dict = path
+    transform_dict = {}
+    for k, v in param_dict.items():
+        new_name = name_map.get(k, k) if name_map is not None else k
+        transform_dict[new_name] = transform_func(v, new_name)
+    return transform_dict
 def _check_transform_safetensors(src_safetensors_dir, ckpt_prefix, src_strategy_file, dst_strategy_file):
@@ -460,7 +265,6 @@ def _transform_safetensors_with_parallel(needed_rank_list_map, all_safetensor_fi
         for name, layout in layout_map.items():
             pipe_param_list[layout[6][0]].append(name)
     part_list_dict = _distribute_files_by_size(all_safetensor_files_map, needed_rank_list_map, process_num)
     processes = []
     for i in range(process_num):
@@ -485,8 +289,9 @@ def _count_redundancy_list(rank_num, param_name, redundancy_dict, device_num):
 def _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict, saftensor_dict, redundancy_dict,
-                                    needed_rank, device_num):
+                                    needed_rank, device_num, choice_func):
     """Find the rank_id under redundant groups."""
+    io_time = 0
     for param_name in pipe_param_list:
         rank_num = int(needed_rank)
         redundancy_ranks = _count_redundancy_list(rank_num, param_name, redundancy_dict, device_num)
@@ -499,11 +304,23 @@ def _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dic
                     open_file_id = real_rank
                     break
         if open_file_id is not None:
-            output = file_dict[open_file_id].get_tensor(param_name)
+            start_time = time.time()
+            output = file_dict[open_file_id].get_slice(param_name)
+            end_time = time.time()
+            cost_time = end_time - start_time
+            io_time += cost_time
+            if choice_func is not None:
+                choice_out = choice_func(param_name)
+                if isinstance(choice_out, bool) and not choice_out:
+                    continue
+                if not isinstance(choice_out, (bool, str)):
+                    raise ValueError("For 'unified_safetensors', the return value type of the function "
+                                     f"'choice_func' must be bool or str, but got {type(choice_out)}.")
             saftensor_dict[param_name] = output
         else:
             raise ValueError(f"For _transform_safetensors_single, {param_name} should be in "
                              f"{redundancy_ranks}, but in {single_param_dict[param_name]}.")
+    return io_time
 def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
@@ -512,13 +329,14 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
                                   origin_dst_strategy_list,
                                   ckpt_prefix, dst_safetensors_dir, output_format,
                                   _transform_param_list, pipe_param_list=None, file_index=None, unified_flag=False,
-                                  src_strategy_file=None):
+                                  src_strategy_file=None, choice_func=None):
     """
     Transforms safetensors files to a specified format without using parallel processing.
     """
+    io_cost_time = 0
     if src_strategy_file is not None:
         from mindspore.train._utils import get_parameter_redundancy
-        redundancy_dict_tmp = get_parameter_redundancy(src_strategy_file)
+        redundancy_dict_tmp = get_parameter_redundancy(src_strategy_file, initial_rank=0)
         redundancy_dict = {}
         device_num = 0
         for param_name, redundancy in redundancy_dict_tmp.items():
@@ -552,8 +370,10 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
             if pipe_param_list:
                 saftensor_dict = dict()
                 if src_strategy_file is not None:
-                    _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict, saftensor_dict,
-                                                    redundancy_dict, needed_rank, device_num)
+                    io_time = _find_remove_redundancy_rank_id(pipe_param_list, single_param_dict, file_dict,
+                                                              saftensor_dict, redundancy_dict, needed_rank,
+                                                              device_num, choice_func)
+                    io_cost_time += io_time
                 else:
                     with safe_open(all_safetensor_files_map.get(int(needed_rank)), framework="np") as f:
                         if not unified_flag:
@@ -562,14 +382,32 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
                             dst_param_name_set = set(dst_strategy_list_keys)
                             hyper_param_set = all_param_name_set - (src_param_name_set & dst_param_name_set)
                             pipe_param_list.extend(list(hyper_param_set))
+                        io_time = 0
                         for param_name in pipe_param_list:
                             if param_name not in f.keys():
                                 # param not in ckpt file, check reason
                                 continue
-                            output = f.get_tensor(param_name)
+                            start_time = time.time()
+                            output = f.get_slice(param_name)
+                            end_time = time.time()
+                            cost_time = end_time - start_time
+                            io_time += cost_time
+                            io_cost_time += io_time
+                            if choice_func is not None:
+                                choice_out = choice_func(param_name)
+                                if isinstance(choice_out, bool) and not choice_out:
+                                    continue
+                                if not isinstance(choice_out, (bool, str)):
+                                    raise ValueError("For 'unified_safetensors', the return value type of the function "
+                                                     f"'choice_func' must be bool or str, but got {type(choice_out)}.")
                             saftensor_dict[param_name] = output
             else:
+                start_time = time.time()
                 saftensor_dict = load_file(all_safetensor_files_map.get(int(needed_rank)))
+                end_time = time.time()
+                cost_time = end_time - start_time
+                io_cost_time += cost_time
             for param_name, param in saftensor_dict.items():
                 src_rank = int(needed_rank) % src_stage_device_num
                 param_total_dict[param_name][src_rank] = param
@@ -588,7 +426,7 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
             local_rank_id = transform_rank % dst_stage_device_num
             transform_param_dict = _transform_parallel_safetensor(local_rank_id, param_total_dict,
                                                                   param_attr_dict, src_strategy_list, dst_strategy_list,
-                                                                  param_total_dict_keys, src_strategy_file)
+                                                                  param_total_dict_keys, src_strategy_file, choice_func)
             if file_index is not None:
                 save_safetensor_file = f"part{file_index}.{output_format}"
                 save_safetensor_file_dir = dst_safetensors_dir
@@ -602,15 +440,17 @@ def _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map
             if _transform_param_list is not None:
                 _transform_param_list.append({save_file_name: transform_param_dict})
             else:
-                if output_format == "safetensors":
-                    save_file(transform_param_dict, save_file_name)
-                else:
-                    transform_param_dict = _load_and_transform(transform_param_dict, None, None,
-                                                               transform_func=lambda v, name: ms.Parameter(v,
-                                                                                                           name=name))
-                    ms.save_checkpoint(transform_param_dict, save_file_name)
+                if transform_param_dict:
+                    if output_format == "safetensors":
+                        save_file(transform_param_dict, save_file_name)
+                    else:
+                        transform_param_dict = _load_and_transform(transform_param_dict,
+                                                                   None, None, transform_func=
+                                                                   lambda v, name: ms.Parameter(v, name=name))
+                        ms.save_checkpoint(transform_param_dict, save_file_name)
             del param_total_dict_keys
         del param_total_dict
+    return io_cost_time
 def _save_final_safetensors(_transform_param_list, output_format):
@@ -735,6 +575,13 @@ def transform_safetensors_by_rank(rank_id, safetensor_files_map, save_safetensor
     save_file(transform_param_dict, save_safetensor_file_name)
+def _extrace_number(file_name):
+    """get file last two number"""
+    number_ls = re.findall(r'\d+', file_name)
+    number_ls = [int(i) for i in number_ls]
+    return number_ls[-2:]
 def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_suffix=None):
     """
     Collects all safetensors files from the specified directory and its subdirectories.
@@ -758,12 +605,9 @@ def _collect_safetensor_files(src_safetensors_dir, format='safetensors', file_su
         else:
             safetensor_file_name = os.path.join(safetensor_dir, f"*{file_suffix}.{format}")
         rank_ckpts = glob.glob(safetensor_file_name)
-        rank_ckpts.sort()
-        for safetensor_file in rank_ckpts:
-            if not os.path.isfile(safetensor_file):
-                ms.log.warning("{} is not a safetensor file.".format(safetensor_file))
-                continue
-            all_safetensor_files_map[rank_id] = safetensor_file
+        rank_ckpts.sort(key=_extrace_number)
+        if rank_ckpts:
+            all_safetensor_files_map[rank_id] = rank_ckpts[-1]
     return all_safetensor_files_map
@@ -775,7 +619,7 @@ def _find_needed_ranks(src_strategy_dict, dst_strategy_dict):
     dst_stage_device_num = _get_device_num_from_strategy(dst_strategy_dict)
     dst_stage_num = _extract_pipeline_stage_num(dst_strategy_dict)
     dst_device_num = dst_stage_device_num * dst_stage_num
-    for rank in _progress_bar(range(dst_device_num)):
+    for rank in range(dst_device_num):
         needed_rank_list = ms.rank_list_for_transform(rank, src_strategy_dict, dst_strategy_dict)
         needed_rank_list_key = "-".join([str(r) for r in needed_rank_list])
         needed_rank_list_map[needed_rank_list_key].append(rank)
@@ -791,7 +635,8 @@ def load_file_by_param_name(filename, parme_name_list):
 def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, src_strategy_list,
-                                   dst_strategy_list, param_total_dict_keys=None, src_strategy_file=None):
+                                   dst_strategy_list, param_total_dict_keys=None, src_strategy_file=None,
+                                   choice_func=None):
     """
     Transform model parallel dimension for distributed safetensor files.
     """
@@ -799,7 +644,10 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
     device_num = -1
     param_total_dict_keys = list(param_total_dict.keys()) if param_total_dict_keys is None else param_total_dict_keys
     for param_name in param_total_dict_keys:
-        tensor_shape = list(param_total_dict[param_name].values())[0].shape
+        if str(type(list(param_total_dict[param_name].values())[0])) == "<class 'builtins.PySafeSlice'>":
+            tensor_shape = list(param_total_dict[param_name].values())[0].get_shape()
+        else:
+            tensor_shape = list(param_total_dict[param_name].values())[0].shape
         from_dev_matrix = [1]
         from_tensor_map = [-1] * len(tensor_shape)
         from_opt_shard_step = 0
@@ -832,6 +680,9 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
                 continue
             origin_tensor_shape += (item * param_strategy[i],)
+        has_layout_from = any(isinstance(i, (list, tuple)) for i in from_tensor_map)
+        has_layout_to = any(isinstance(i, (list, tuple)) for i in to_tensor_map_origin)
         from_dev_matrix, from_tensor_map, from_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
             from_dev_matrix, from_tensor_map, from_opt_shard_step, from_opt_shard_size, origin_tensor_shape)
         to_dev_matrix, to_tensor_map, to_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
@@ -851,21 +702,132 @@ def _transform_parallel_safetensor(rank_id, param_total_dict, param_attr_dict, s
         from_info_tuple = (from_opt_shard_size, from_dev_matrix, from_tensor_map, from_full_tensor_shape)
         to_info_tuple = (to_opt_shard_size, to_dev_matrix_origin, to_tensor_map_origin, origin_tensor_shape)
         _insert_opt_shard_reshape(param_rank_map, from_info_tuple, to_info_tuple)
+        _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple, has_layout_from, has_layout_to)
         transform_operator_stack = _generate_transform_operator_stack(param_rank_map, rank_id)
         param_total_dict_copy = param_total_dict[param_name].copy()
         _apply_tensor_transform_operators(transform_operator_stack, param_total_dict_copy, device_num)
+        if choice_func is not None:
+            choice_out = choice_func(param_name)
+            if isinstance(choice_out, str):
+                param_name = choice_out
         transform_param_dict[param_name] = param_total_dict_copy[rank_id % device_num]
+        if str(type(transform_param_dict[param_name])) == "<class 'builtins.PySafeSlice'>":
+            transform_param_dict[param_name] = transform_param_dict[param_name][:]
     # Handle those parameter like learning_rate, global_step which not in strategy_file.
     for param_name in param_total_dict_keys:
+        if choice_func is not None:
+            choice_out = choice_func(param_name)
+            if isinstance(choice_out, str):
+                continue
         if param_name not in transform_param_dict:
             transform_para = param_total_dict[param_name][rank_id % device_num]
+            if str(type(transform_para)) == "<class 'builtins.PySafeSlice'>":
+                transform_para = transform_para[:]
             transform_param_dict[param_name] = transform_para
     return transform_param_dict
-def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundancy=True, file_suffix=None):
+def _cal_param_size(shape, dtype):
+    """cal param size by dtype and shape"""
+    dtype_size = {
+        "BOOL": 1,
+        "U8": 1,
+        "I8": 1,
+        "F8_E5M2": 1,
+        "F8_E4M3": 1,
+        "I16": 2,
+        "U16": 2,
+        "I32": 4,
+        "U32": 4,
+        "I64": 8,
+        "U64": 8,
+        "F16": 2,
+        "BF16": 2,
+        "F32": 4,
+        "F64": 8,
+    }
+    num_elements = math.prod(shape)
+    element_size = dtype_size.get(dtype, 4)
+    total_bytes = num_elements * element_size
+    return total_bytes
+def _split_weight_dict(weights, num_groups):
+    """split weights by num"""
+    sorted_items = sorted(weights.items(), key=lambda x: -x[1])
+    groups = [[] for _ in range(num_groups)]
+    total_bytes = [0] * num_groups
+    for weight_name, byte_size in sorted_items:
+        min_index = total_bytes.index(min(total_bytes))
+        groups[min_index].append(weight_name)
+        total_bytes[min_index] += byte_size
+    return groups
+def _save_hyper_param(split_dst_file, all_safetensor_files_map, name_list, dst_dir):
+    """save hyper param"""
+    if not split_dst_file or (split_dst_file and split_dst_file[0] == 1):
+        with safe_open(all_safetensor_files_map.get(0), framework="np") as f:
+            all_key = f.keys()
+            hyper_parameter = set(all_key) - set(name_list)
+            if hyper_parameter:
+                hyper_dict = {}
+                for key in hyper_parameter:
+                    hyper_dict[key] = f.get_tensor(key)
+                save_file(hyper_dict, os.path.join(dst_dir, "hyper_param.safetensors"))
+def _save_parameter_map_json(split_list, choice_func, split_dst_file, dst_dir, param_total_size):
+    """save parameter map json file"""
+    param_name_dict = dict()
+    for index, part_list in enumerate(split_list):
+        for name in part_list:
+            save_param_name = name
+            if choice_func is not None:
+                choice_out = choice_func(name)
+                if isinstance(choice_out, str):
+                    save_param_name = choice_out
+            if save_param_name == -1:
+                break
+            param_name_dict[save_param_name] = f"part{index}.safetensors"
+    output_dict = {"metadata": {"total_size": param_total_size}, "weight_map": param_name_dict}
+    if not split_dst_file or (split_dst_file and split_dst_file[0] == 1):
+        json_str = json.dumps(output_dict, indent=4)
+        map_file = os.path.join(dst_dir, "param_name_map.json")
+        with open(map_file, 'w') as f:
+            f.write(json_str)
+def _get_dst_shape(param_name, param_shape, src_strategy_list):
+    """get dst shape by strategy"""
+    from_dev_matrix = [1]
+    from_tensor_map = [-1] * len(param_shape)
+    from_opt_shard_size = 0
+    if src_strategy_list is not None:
+        from_dev_matrix, from_tensor_map, _, from_opt_shard_size = _extract_layout_item(
+            src_strategy_list.get(param_name))
+    to_dev_matrix_origin = [1]
+    to_tensor_map_origin = [-1] * len(param_shape)
+    to_opt_shard_step = 0
+    to_opt_shard_size = 0
+    param_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
+    origin_tensor_shape = ()
+    for i, item in enumerate(param_shape):
+        if i == 0 and from_opt_shard_size > 0:
+            origin_tensor_shape += (item * param_strategy[i] * from_opt_shard_size,)
+            continue
+        origin_tensor_shape += (item * param_strategy[i],)
+    _, _, to_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
+        to_dev_matrix_origin, to_tensor_map_origin, to_opt_shard_step, to_opt_shard_size, origin_tensor_shape)
+    return to_full_tensor_shape
+def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundancy=True, file_suffix=None,
+                        max_process_num=64, choice_func=None, split_dst_file=()):
     """
     Merge multiple safetensor files into a unified safetensor file.
@@ -877,6 +839,14 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
             saved safetensors files. Default: ``True``, indicating that the merged source weight files are complete.
         file_suffix (str, optional): Specify the filename suffix for merging safetensors files. Default: ``None``,
             meaning all safetensors files in the source weight directory will be merged.
+        max_process_num (int, optional): Maximum number of processes. Default: ``64``.
+        choice_func (callable, optional): A callable function used to filter parameters or modify parameter names.
+            The return value of the function must be of type str (string) or bool (boolean). Default: ``None``.
+        split_dst_file (tuple, optional) - A parameter used to manually split a task into multiple subtasks for
+            execution, represented as a tuple containing two elements. The first element indicates the number of
+            the current subtask, and the second element indicates the total number of tasks. This parameter supports
+            splitting and executing tasks multiple times on a single machine, and also supports executing different
+            subtasks on multiple machines respectively. Default: ``()``.
     Raises:
         ValueError: If the safetensors file of rank is missing.
@@ -889,8 +859,12 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
         >>> src_dir = "/usr/safetensors/llama31B/4p_safetensors/"
         >>> src_strategy_file = "/usr/safetensors/llama31B/strategy_4p.ckpt"
         >>> dst_dir = "/usr/safetensors/llama31B/merge_llama31B_4p/"
-        >>> ms.unified_safetensors(src_dir, src_strategy_file, dst_dir)
+        >>> ms.parallel.unified_safetensors(src_dir, src_strategy_file, dst_dir)
     """
+    pid = os.getpid()
+    total_cores = os.cpu_count()
+    all_cores = set(range(total_cores))
+    os.sched_setaffinity(pid, all_cores)
     _check_transform_safetensors(src_dir, "", src_strategy_file, None)
     _make_dir(dst_dir, "path")
     if os.path.isfile(src_dir):
@@ -914,13 +888,11 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
                                  "but it is missing.".format(needed_rank, rank))
     layout_map = _convert_to_list(src_strategy_dict)
-    total_size = 0
     actual_params = set()
     for _, file_name in all_safetensor_files_map.items():
-        total_size += os.path.getsize(file_name) / 1024 / 1024 / 1024
         with safe_open(file_name, framework="np") as f:
             actual_params.update(f.keys())
-    split_num = math.ceil(total_size / 3)
     params_to_store = actual_params & set(layout_map.keys())
     name_list = []
@@ -928,29 +900,55 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
         if name.startswith("accu_grads"):
             continue
         name_list.append(name)
-    split_list = _split_list(name_list, split_num)
-    with safe_open(all_safetensor_files_map.get(0), framework="np") as f:
-        all_key = f.keys()
-        hyper_parameter = set(all_key) - set(name_list)
-        if hyper_parameter:
-            hyper_dict = {}
-            for key in hyper_parameter:
-                hyper_dict[key] = f.get_tensor(key)
-            save_file(hyper_dict, os.path.join(dst_dir, "hyper_param.safetensors"))
-    # save parameter map json
-    param_name_dict = dict()
-    for index, part_list in enumerate(split_list):
-        for name in part_list:
-            param_name_dict[name] = f"part{index}.safetensors"
-    json_str = json.dumps(param_name_dict, indent=4)
-    map_file = os.path.join(dst_dir, "param_name_map.json")
-    with open(map_file, 'w') as f:
-        f.write(json_str)
-    max_process = min(split_num, 100)
-    res = [i for i in range(split_num)]
+    param_size_dict = {}
+    param_total_size = 0
+    for _, file_name in all_safetensor_files_map.items():
+        with safe_open(file_name, framework="np") as f:
+            for k in f.keys():
+                if k in name_list:
+                    py_slice = f.get_slice(k)
+                    param_total_size += _cal_param_size(py_slice.get_shape(), py_slice.get_dtype())
+                    param_dst_shape = _get_dst_shape(k, py_slice.get_shape(), origin_src_strategy_list)
+                    # Convert the shape of np.int32 type to int type to prevent overflow in subsequent calculations.
+                    param_dst_shape = [int(item) for item in param_dst_shape]
+                    if choice_func is not None:
+                        choice_out = choice_func(k)
+                        if isinstance(choice_out, bool):
+                            if not choice_out:
+                                continue
+                    if k not in param_size_dict:
+                        param_size_dict[k] = _cal_param_size(param_dst_shape, py_slice.get_dtype())
+    split_num = math.ceil(sum(param_size_dict.values()) / 1024 / 1024 / 1024 / 3)
+    split_num = min(split_num, len(name_list))
+    split_list = _split_weight_dict(param_size_dict, split_num)
+    if split_dst_file:
+        current_machine_num = split_dst_file[0]
+        total_machine_num = split_dst_file[1]
+        n = len(split_list)
+        avg_length = n // total_machine_num
+        remainder = n % total_machine_num
+        start_index = (avg_length * (current_machine_num - 1)) + min(current_machine_num - 1, remainder)
+        end_index = start_index + avg_length + (1 if current_machine_num <= remainder else 0)
+        sub_list = []
+        for i in range(len(split_list)):
+            if start_index <= i < end_index:
+                sub_list.append(split_list[i])
+            else:
+                sub_list.append([-1])
+    else:
+        sub_list = split_list
+    _save_hyper_param(split_dst_file, all_safetensor_files_map, name_list, dst_dir)
+    _save_parameter_map_json(split_list, choice_func, split_dst_file, dst_dir, param_total_size)
+    if split_dst_file:
+        split_num = end_index - start_index
+        res = list(range(start_index, end_index))
+    else:
+        res = [i for i in range(split_num)]
+    max_process = min(split_num, max_process_num)
     res = _split_list(res, max_process)
     processes = []
     src_strategy_name = None
@@ -960,7 +958,7 @@ def unified_safetensors(src_dir, src_strategy_file, dst_dir, merge_with_redundan
         p = mp.Process(target=_transform_safetensors_single_semaphore, args=(
             needed_rank_list_map, all_safetensor_files_map, src_stage_device_num, dst_stage_device_num,
             src_strategy_dict, None, origin_src_strategy_list, origin_dst_strategy_list,
-            "", dst_dir, "safetensors", None, split_list, res[i], True, src_strategy_name))
+            "", dst_dir, "safetensors", None, sub_list, res[i], True, src_strategy_name, choice_func))
         p.start()
         processes.append(p)
     for p in processes:
@@ -974,13 +972,21 @@ def _transform_safetensors_single_semaphore(needed_rank_list_map, all_safetensor
                                             origin_dst_strategy_list,
                                             ckpt_prefix, dst_safetensors_dir, output_format,
                                             _transform_param_list, pipe_param_list=None, file_index=None,
-                                            unified_flag=False, src_strategy_file=None):
+                                            unified_flag=False, src_strategy_file=None, choice_func=None):
+    """transform safetensors single semaphore"""
+    total_io_cost_time = 0
     for i in file_index:
-        _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map, src_stage_device_num,
-                                      dst_stage_device_num, src_strategy_dict, dst_strategy_dict,
-                                      origin_src_strategy_list,
-                                      origin_dst_strategy_list, ckpt_prefix, dst_safetensors_dir, output_format,
-                                      _transform_param_list, pipe_param_list[i], i, unified_flag, src_strategy_file)
+        io_cost_time = _transform_safetensors_single(needed_rank_list_map, all_safetensor_files_map,
+                                                     src_stage_device_num, dst_stage_device_num, src_strategy_dict,
+                                                     dst_strategy_dict, origin_src_strategy_list,
+                                                     origin_dst_strategy_list, ckpt_prefix, dst_safetensors_dir,
+                                                     output_format, _transform_param_list, pipe_param_list[i], i,
+                                                     unified_flag, src_strategy_file, choice_func)
+        while psutil.virtual_memory().percent > 50:
+            time.sleep(1)
+        total_io_cost_time += io_cost_time
+    vlog_print("1", "ME", __file__, sys._getframe().f_lineno,
+               f"Unified safetensors io cost time:{total_io_cost_time}.")
 def _split_list(split_list, split_num):
@@ -1027,36 +1033,76 @@ def _apply_sf_obj_transform_operators(transform_operator_stack, sf_obj, device_n
     return sf_obj
-def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None, dst_safetensors_dir=None,
-                              rank_id=None):
-    """load parallel safetensors by merged file."""
-    file_list = os.listdir(total_safetensors_dir)
-    json_files = [file for file in file_list if file.endswith('.json')]
-    if len(json_files) != 1:
-        raise ValueError(f"For 'load_parallel_checkpoint', the number of json files in 'total_safetensors_dir' "
-                         f"must be 1, but got {len(json_files)}.")
-    param_name_json = os.path.join(total_safetensors_dir, json_files[0])
-    with open(param_name_json, 'r') as f:
-        param_name_map = json.load(f)
+def _process_hyper_params(file_list, total_safetensors_dir, total_param):
+    """process hyper params"""
+    if 'hyper_param.safetensors' in file_list:
+        hyper_parameter_file_name = os.path.join(total_safetensors_dir, "hyper_param.safetensors")
+        with safe_open(hyper_parameter_file_name, framework="np") as f:
+            for key in f.keys():
+                total_param[key] = ms.Parameter(ms.Tensor.from_numpy(f.get_tensor(key)))
+    return total_param
+def _cal_param_name_map_and_param_list(file_list, total_safetensors_dir, json_files, dst_strategy_file, rank_id):
+    """calculate param_name_map and param_list"""
+    if len(file_list) == 1:
+        logger.info("There is only one weight file in the directory, which will be automatically mapped.")
+        file_name = os.path.join(total_safetensors_dir, file_list[0])
+        is_file = os.path.isfile(file_name)
+        if not is_file:
+            raise ValueError(f"For 'load_parallel_checkpoint', weight files must be included "
+                             f"in the `unified_safetensors_dir`.")
+        with safe_open(file_name, framework="np") as f:
+            keys = f.keys()
+            values = len(keys) * [file_list[0]]
+            param_name_map = dict(zip(keys, values))
+    else:
+        if not json_files:
+            raise ValueError(
+                f"For 'load_parallel_checkpoint', there must be a JSON file named 'param_name_map.json' in "
+                f"the 'total_safetensors_dir'.")
+        param_name_json = os.path.join(total_safetensors_dir, json_files[0])
+        with open(param_name_json, 'r') as f:
+            param_name_map = json.load(f)
+            if "weight_map" in param_name_map:
+                param_name_map = param_name_map["weight_map"]
     if dst_strategy_file is not None:
         _, dst_strategy_list = _extract_src_dst_layout_map(rank_id, None, dst_strategy_file)
         param_list = dst_strategy_list.keys()
     else:
         dst_strategy_list = None
         param_list = param_name_map.keys()
+    return param_name_map, param_list, dst_strategy_list
+def _load_parallel_checkpoint(file_info):
+    """load parallel safetensors by merged file."""
+    total_safetensors_dir, dst_strategy_file, net, dst_safetensors_dir, \
+    rank_id, output_format, name_map, return_param_dict = file_info
+    pid = os.getpid()
+    total_cores = os.cpu_count()
+    all_cores = set(range(total_cores))
+    os.sched_setaffinity(pid, all_cores)
+    file_list = os.listdir(total_safetensors_dir)
+    json_files = [file for file in file_list if file == "param_name_map.json"]
+    param_name_map, param_list, dst_strategy_list = _cal_param_name_map_and_param_list(file_list, total_safetensors_dir,
+                                                                                       json_files, dst_strategy_file,
+                                                                                       rank_id)
     total_param = dict()
     dst_stage_device_num = np.prod(dst_strategy_list.get(list(dst_strategy_list.keys())[0])[0]) if dst_strategy_list \
                                                                                                    is not None else 1
     local_rank_id = rank_id % dst_stage_device_num
-    for param_name in param_list:
+    total_io_cost_time = 0
+    for param_name in _progress_bar(param_list):
         if param_name not in param_name_map:
             continue
         file_name = os.path.join(total_safetensors_dir, param_name_map[param_name])
         with safe_open(file_name, framework="np") as f:
-            if param_name not in f.keys():
+            cur_param_name = name_map.get(param_name) if name_map is not None and param_name in name_map else param_name
+            if cur_param_name not in f.keys():
                 continue
-            sf_obj = f.get_slice(param_name)
+            sf_obj = f.get_slice(cur_param_name)
         tensor_shape = sf_obj.get_shape()
         from_dev_matrix = [1]
@@ -1078,6 +1124,9 @@ def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None
                     continue
                 origin_tensor_shape += (item * param_strategy[i],)
+            has_layout_from = any(isinstance(i, (list, tuple)) for i in from_tensor_map)
+            has_layout_to = any(isinstance(i, (list, tuple)) for i in to_tensor_map_origin)
             from_dev_matrix, from_tensor_map, from_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
                 from_dev_matrix, from_tensor_map, from_opt_shard_step, from_opt_shard_size, origin_tensor_shape)
             to_dev_matrix, to_tensor_map, to_full_tensor_shape = _construct_tensor_layout_for_opt_shard(
@@ -1097,25 +1146,34 @@ def _load_parallel_checkpoint(total_safetensors_dir, dst_strategy_file, net=None
             from_info_tuple = (from_opt_shard_size, from_dev_matrix, from_tensor_map, from_full_tensor_shape)
             to_info_tuple = (to_opt_shard_size, to_dev_matrix_origin, to_tensor_map_origin, origin_tensor_shape)
             _insert_opt_shard_reshape(param_rank_map, from_info_tuple, to_info_tuple)
+            _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
+                                          has_layout_from, has_layout_to)
             transform_operator_stack = _generate_transform_operator_stack(param_rank_map, local_rank_id)
+            start_time = time.time()
             slice_param = _apply_sf_obj_transform_operators(transform_operator_stack, sf_obj, device_num)
+            end_time = time.time()
+            cost_time = end_time - start_time
+            total_io_cost_time += cost_time
         else:
+            start_time = time.time()
             slice_param = sf_obj[:]
-        total_param[param_name] = ms.Parameter(slice_param)
-    if 'hyper_param.safetensors' in file_list:
-        hyper_parameter_file_name = os.path.join(total_safetensors_dir, "hyper_param.safetensors")
-        with safe_open(hyper_parameter_file_name, framework="np") as f:
-            for key in f.keys():
-                total_param[key] = ms.Parameter(f.get_tensor(key))
+            end_time = time.time()
+            cost_time = end_time - start_time
+            total_io_cost_time += cost_time
+        total_param[param_name] = ms.Parameter(ms.Tensor.from_numpy(slice_param))
+    vlog_print("1", "ME", __file__, sys._getframe().f_lineno,
+               f"load distributed safetensors io cost time:{total_io_cost_time}.")
+    total_param = _process_hyper_params(file_list, total_safetensors_dir, total_param)
     if net is not None:
-        param_not_load, ckpt_not_load = ms.load_param_into_net(net, total_param)
-        return param_not_load, ckpt_not_load
+        if not return_param_dict:
+            logger.info("start load param into net...")
+            param_not_load, ckpt_not_load = ms.load_param_into_net(net, total_param)
+            logger.info("load param into net is end...")
+            return param_not_load, ckpt_not_load
+        return total_param
     _make_dir(os.path.join(dst_safetensors_dir, f"rank_{rank_id}"), "path")
-    ms.save_checkpoint(total_param, os.path.join(dst_safetensors_dir, f"rank_{rank_id}", f"net.safetensors"),
-                       format='safetensors')
+    ms.save_checkpoint(total_param, os.path.join(dst_safetensors_dir, f"rank_{rank_id}", f"net.{output_format}"),
+                       format=output_format)
     return None
@@ -1143,4 +1201,4 @@ def _get_slice(rank_id, sf_obj, param_name, dst_strategy_list):
 __all__ = ["_transform_safetensors", "transform_safetensors_by_stage",
-           "transform_safetensors_by_rank", "ckpt_to_safetensors", "safetensors_to_ckpt", "unified_safetensors"]
+           "transform_safetensors_by_rank", "unified_safetensors"]