PyPI - mindspore - Versions diffs - 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl - Mend

mindspore 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (370) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +5 -2
mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
mindspore/_checkparam.py +2 -2
mindspore/_extends/builtin_operations.py +3 -3
mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/__init__.py +3 -3
mindspore/_extends/parse/compile_config.py +24 -1
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
mindspore/_extends/parse/parser.py +28 -22
mindspore/_extends/parse/resources.py +1 -1
mindspore/_extends/parse/standard_method.py +23 -2
mindspore/_extends/parse/trope.py +2 -1
mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
mindspore/amp.py +0 -18
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/base.py +29 -2
mindspore/common/__init__.py +18 -12
mindspore/common/_decorator.py +3 -2
mindspore/common/_grad_function.py +3 -1
mindspore/common/_tensor_cpp_method.py +1 -1
mindspore/common/_tensor_docs.py +371 -96
mindspore/common/_utils.py +7 -43
mindspore/common/api.py +434 -135
mindspore/common/dtype.py +98 -57
mindspore/common/dump.py +7 -108
mindspore/common/dynamic_shape/__init__.py +0 -0
mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
mindspore/common/file_system.py +59 -9
mindspore/common/hook_handle.py +82 -3
mindspore/common/jit_config.py +5 -1
mindspore/common/jit_trace.py +27 -12
mindspore/common/lazy_inline.py +5 -3
mindspore/common/np_dtype.py +3 -3
mindspore/common/parameter.py +17 -127
mindspore/common/recompute.py +4 -13
mindspore/common/tensor.py +50 -217
mindspore/communication/_comm_helper.py +11 -1
mindspore/communication/comm_func.py +138 -4
mindspore/communication/management.py +85 -1
mindspore/config/op_info.config +0 -15
mindspore/context.py +20 -106
mindspore/dataset/__init__.py +1 -1
mindspore/dataset/audio/transforms.py +1 -1
mindspore/dataset/core/config.py +35 -1
mindspore/dataset/engine/datasets.py +338 -319
mindspore/dataset/engine/datasets_user_defined.py +38 -22
mindspore/dataset/engine/datasets_vision.py +1 -1
mindspore/dataset/engine/validators.py +1 -15
mindspore/dataset/transforms/c_transforms.py +2 -2
mindspore/dataset/transforms/transforms.py +3 -3
mindspore/dataset/vision/__init__.py +1 -1
mindspore/dataset/vision/py_transforms.py +8 -8
mindspore/dataset/vision/transforms.py +17 -5
mindspore/dataset/vision/utils.py +632 -21
mindspore/device_context/ascend/op_tuning.py +35 -1
mindspore/dnnl.dll +0 -0
mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
mindspore/graph/custom_pass.py +55 -0
mindspore/include/api/cell.h +28 -4
mindspore/include/api/cfg.h +24 -7
mindspore/include/api/context.h +1 -0
mindspore/include/api/delegate.h +0 -2
mindspore/include/api/dual_abi_helper.h +100 -19
mindspore/include/api/graph.h +14 -1
mindspore/include/api/kernel.h +16 -3
mindspore/include/api/kernel_api.h +9 -1
mindspore/include/api/metrics/accuracy.h +9 -0
mindspore/include/api/model.h +5 -1
mindspore/include/api/model_group.h +4 -0
mindspore/include/api/model_parallel_runner.h +2 -0
mindspore/include/api/status.h +48 -10
mindspore/include/api/types.h +6 -1
mindspore/include/dataset/constants.h +9 -0
mindspore/include/dataset/execute.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/mindrecord/__init__.py +3 -3
mindspore/mindrecord/common/exceptions.py +1 -0
mindspore/mindrecord/config.py +1 -1
mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
mindspore/mindrecord/filereader.py +4 -4
mindspore/mindrecord/filewriter.py +5 -5
mindspore/mindrecord/mindpage.py +2 -2
mindspore/mindrecord/tools/cifar10.py +4 -3
mindspore/mindrecord/tools/cifar100.py +1 -1
mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
mindspore/mindrecord/tools/csv_to_mr.py +1 -1
mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
mindspore/mindspore_backend_common.dll +0 -0
mindspore/mindspore_backend_manager.dll +0 -0
mindspore/mindspore_cluster.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_cpu.dll +0 -0
mindspore/mindspore_dump.dll +0 -0
mindspore/mindspore_frontend.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_hardware_abstract.dll +0 -0
mindspore/mindspore_memory_pool.dll +0 -0
mindspore/mindspore_ms_backend.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
mindspore/mindspore_profiler.dll +0 -0
mindspore/mindspore_pyboost.dll +0 -0
mindspore/mindspore_pynative.dll +0 -0
mindspore/mindspore_runtime_pipeline.dll +0 -0
mindspore/mindspore_runtime_utils.dll +0 -0
mindspore/mindspore_tools.dll +0 -0
mindspore/mint/__init__.py +15 -10
mindspore/mint/distributed/__init__.py +4 -0
mindspore/mint/distributed/distributed.py +392 -69
mindspore/mint/nn/__init__.py +2 -16
mindspore/mint/nn/functional.py +4 -110
mindspore/mint/nn/layer/__init__.py +0 -2
mindspore/mint/nn/layer/_functions.py +1 -2
mindspore/mint/nn/layer/activation.py +0 -6
mindspore/mint/nn/layer/basic.py +0 -47
mindspore/mint/nn/layer/conv.py +10 -10
mindspore/mint/nn/layer/normalization.py +11 -16
mindspore/mint/nn/layer/pooling.py +0 -4
mindspore/nn/__init__.py +1 -3
mindspore/nn/cell.py +231 -239
mindspore/nn/layer/activation.py +4 -2
mindspore/nn/layer/basic.py +56 -14
mindspore/nn/layer/container.py +16 -0
mindspore/nn/layer/embedding.py +4 -169
mindspore/nn/layer/image.py +1 -1
mindspore/nn/layer/normalization.py +2 -1
mindspore/nn/layer/thor_layer.py +4 -85
mindspore/nn/optim/ada_grad.py +0 -1
mindspore/nn/optim/adafactor.py +0 -1
mindspore/nn/optim/adam.py +32 -127
mindspore/nn/optim/adamax.py +0 -1
mindspore/nn/optim/asgd.py +0 -1
mindspore/nn/optim/ftrl.py +8 -102
mindspore/nn/optim/lamb.py +1 -4
mindspore/nn/optim/lars.py +0 -3
mindspore/nn/optim/lazyadam.py +25 -218
mindspore/nn/optim/momentum.py +5 -43
mindspore/nn/optim/optimizer.py +6 -55
mindspore/nn/optim/proximal_ada_grad.py +0 -1
mindspore/nn/optim/rmsprop.py +0 -1
mindspore/nn/optim/rprop.py +0 -1
mindspore/nn/optim/sgd.py +0 -1
mindspore/nn/optim/tft_wrapper.py +2 -4
mindspore/nn/optim/thor.py +0 -2
mindspore/nn/probability/bijector/bijector.py +7 -8
mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
mindspore/nn/probability/bijector/power_transform.py +20 -21
mindspore/nn/probability/bijector/scalar_affine.py +5 -5
mindspore/nn/probability/bijector/softplus.py +13 -14
mindspore/nn/probability/distribution/_utils/utils.py +2 -2
mindspore/nn/wrap/cell_wrapper.py +39 -5
mindspore/nn/wrap/grad_reducer.py +4 -89
mindspore/numpy/array_creations.py +4 -4
mindspore/numpy/fft.py +9 -9
mindspore/numpy/utils_const.py +1 -1
mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
mindspore/onnx/onnx_export.py +137 -0
mindspore/opencv_core4110.dll +0 -0
mindspore/opencv_imgcodecs4110.dll +0 -0
mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
mindspore/ops/__init__.py +2 -0
mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
mindspore/ops/_op_impl/cpu/__init__.py +1 -5
mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
mindspore/ops/auto_generate/gen_extend_func.py +6 -11
mindspore/ops/auto_generate/gen_ops_def.py +385 -154
mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
mindspore/ops/communication.py +97 -0
mindspore/ops/composite/__init__.py +5 -2
mindspore/ops/composite/base.py +16 -2
mindspore/ops/composite/multitype_ops/__init__.py +3 -1
mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
mindspore/ops/function/__init__.py +2 -0
mindspore/ops/function/array_func.py +24 -18
mindspore/ops/function/comm_func.py +3883 -0
mindspore/ops/function/debug_func.py +7 -6
mindspore/ops/function/grad/grad_func.py +4 -12
mindspore/ops/function/math_func.py +89 -86
mindspore/ops/function/nn_func.py +92 -313
mindspore/ops/function/random_func.py +9 -18
mindspore/ops/functional.py +4 -1
mindspore/ops/functional_overload.py +377 -30
mindspore/ops/operations/__init__.py +2 -5
mindspore/ops/operations/_custom_ops_utils.py +7 -9
mindspore/ops/operations/_inner_ops.py +12 -50
mindspore/ops/operations/_rl_inner_ops.py +0 -933
mindspore/ops/operations/array_ops.py +5 -50
mindspore/ops/operations/comm_ops.py +95 -17
mindspore/ops/operations/custom_ops.py +237 -22
mindspore/ops/operations/debug_ops.py +33 -35
mindspore/ops/operations/manually_defined/ops_def.py +39 -318
mindspore/ops/operations/math_ops.py +5 -5
mindspore/ops/operations/nn_ops.py +3 -3
mindspore/ops/operations/sparse_ops.py +0 -83
mindspore/ops/primitive.py +4 -27
mindspore/ops/tensor_method.py +88 -10
mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
mindspore/ops_generate/api/functions_cc_generator.py +53 -4
mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
mindspore/ops_generate/common/gen_constants.py +11 -10
mindspore/ops_generate/common/op_proto.py +18 -1
mindspore/ops_generate/common/template.py +102 -245
mindspore/ops_generate/common/template_utils.py +212 -0
mindspore/ops_generate/gen_custom_ops.py +69 -0
mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
mindspore/ops_generate/resources/yaml_loader.py +13 -0
mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
mindspore/parallel/_auto_parallel_context.py +5 -15
mindspore/parallel/_cell_wrapper.py +1 -1
mindspore/parallel/_parallel_serialization.py +4 -6
mindspore/parallel/_ps_context.py +2 -2
mindspore/parallel/_utils.py +34 -17
mindspore/parallel/auto_parallel.py +23 -9
mindspore/parallel/checkpoint_transform.py +20 -2
mindspore/parallel/cluster/process_entity/_api.py +28 -33
mindspore/parallel/cluster/process_entity/_utils.py +9 -5
mindspore/parallel/cluster/run.py +5 -3
mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
mindspore/parallel/function/reshard_func.py +6 -5
mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
mindspore/parallel/shard.py +7 -21
mindspore/parallel/strategy.py +336 -0
mindspore/parallel/transform_safetensors.py +127 -20
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
mindspore/profiler/common/constant.py +5 -0
mindspore/profiler/common/file_manager.py +9 -0
mindspore/profiler/common/msprof_cmd_tool.py +40 -4
mindspore/profiler/common/path_manager.py +65 -24
mindspore/profiler/common/profiler_context.py +27 -14
mindspore/profiler/common/profiler_info.py +3 -3
mindspore/profiler/common/profiler_meta_data.py +1 -0
mindspore/profiler/common/profiler_op_analyse.py +10 -6
mindspore/profiler/common/profiler_path_manager.py +13 -0
mindspore/profiler/common/util.py +30 -3
mindspore/profiler/dynamic_profiler.py +91 -46
mindspore/profiler/envprofiler.py +30 -5
mindspore/profiler/experimental_config.py +18 -2
mindspore/profiler/platform/cpu_profiler.py +10 -4
mindspore/profiler/platform/npu_profiler.py +34 -7
mindspore/profiler/profiler.py +193 -145
mindspore/profiler/profiler_action_controller.py +1 -1
mindspore/profiler/profiler_interface.py +2 -2
mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
mindspore/run_check/_check_version.py +108 -24
mindspore/runtime/__init__.py +9 -6
mindspore/runtime/executor.py +35 -0
mindspore/runtime/memory.py +113 -0
mindspore/runtime/thread_bind_core.py +1 -1
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
mindspore/tools/data_dump.py +130 -0
mindspore/tools/sdc_detect.py +91 -0
mindspore/tools/stress_detect.py +63 -0
mindspore/train/__init__.py +6 -6
mindspore/train/_utils.py +8 -21
mindspore/train/amp.py +6 -7
mindspore/train/callback/_callback.py +2 -1
mindspore/train/callback/_checkpoint.py +1 -17
mindspore/train/callback/_flops_collector.py +10 -6
mindspore/train/callback/_train_fault_tolerance.py +72 -25
mindspore/train/data_sink.py +5 -9
mindspore/train/dataset_helper.py +5 -5
mindspore/train/model.py +41 -230
mindspore/train/serialization.py +160 -401
mindspore/train/train_thor/model_thor.py +2 -2
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +6 -3
mindspore/utils/dlpack.py +92 -0
mindspore/utils/dryrun.py +1 -1
mindspore/utils/runtime_execution_order_check.py +10 -0
mindspore/utils/sdc_detect.py +14 -12
mindspore/utils/stress_detect.py +43 -0
mindspore/utils/utils.py +152 -16
mindspore/version.py +1 -1
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
mindspore/communication/_hccl_management.py +0 -297
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
mindspore/experimental/llm_boost/atb/__init__.py +0 -23
mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
mindspore/experimental/llm_boost/register.py +0 -130
mindspore/experimental/llm_boost/utils.py +0 -31
mindspore/include/OWNERS +0 -7
mindspore/mindspore_cpu_res_manager.dll +0 -0
mindspore/mindspore_ops_kernel_common.dll +0 -0
mindspore/mindspore_res_manager.dll +0 -0
mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
mindspore/nn/reinforcement/_batch_read_write.py +0 -142
mindspore/nn/reinforcement/_tensors_queue.py +0 -152
mindspore/nn/reinforcement/tensor_array.py +0 -145
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
mindspore/ops/operations/_tensor_array.py +0 -359
mindspore/ops/operations/rl_ops.py +0 -288
mindspore/parallel/_offload_context.py +0 -275
mindspore/parallel/_recovery_context.py +0 -115
mindspore/parallel/_transformer/__init__.py +0 -35
mindspore/parallel/_transformer/layers.py +0 -765
mindspore/parallel/_transformer/loss.py +0 -251
mindspore/parallel/_transformer/moe.py +0 -693
mindspore/parallel/_transformer/op_parallel_config.py +0 -222
mindspore/parallel/_transformer/transformer.py +0 -3124
mindspore/parallel/mpi/_mpi_config.py +0 -116
mindspore/profiler/common/validator/validate_path.py +0 -84
mindspore/train/memory_profiling_pb2.py +0 -298
mindspore/utils/hooks.py +0 -81
/mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
{mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0

mindspore/ops_generate/pyboost/pyboost_utils.py CHANGED Viewed

@@ -78,14 +78,14 @@ def get_convert_type_str(dtype: str, optional, use_basic_type=False):
         'float': 'ToFloat',
         'bool': 'ToBool',
         'number': 'ToScalar',
-        'tuple[int]': 'ToIntList<py::tuple>',
-        'tuple[float]': 'ToFloatList<py::tuple>',
-        'tuple[bool]': 'ToBoolList<py::tuple>',
-        'tuple[tensor]': 'ToTensorList<py::tuple>',
-        'list[int]': 'ToIntList<py::list>',
-        'list[float]': 'ToFloatList<py::list>',
-        'list[bool]': 'ToBoolList<py::list>',
-        'list[tensor]': 'ToTensorList<py::list>',
+        'tuple[int]': 'ToIntList<CPythonTuple>',
+        'tuple[float]': 'ToFloatList<CPythonTuple>',
+        'tuple[bool]': 'ToBoolList<CPythonTuple>',
+        'tuple[tensor]': 'ToTensorList<CPythonTuple>',
+        'list[int]': 'ToIntList<CPythonList>',
+        'list[float]': 'ToFloatList<CPythonList>',
+        'list[bool]': 'ToBoolList<CPythonList>',
+        'list[tensor]': 'ToTensorList<CPythonList>',
         'tensor': 'ToTensor',
         'str': 'ToString',
         'type': 'ToDtype',
@@ -97,14 +97,14 @@ def get_convert_type_str(dtype: str, optional, use_basic_type=False):
         'tensor': 'ToTensorOptional',
         'type': 'ToDtypeOptional',
         'str': 'ToStringOptional',
-        'tuple[int]': 'ToIntListOptional<py::tuple>',
-        'tuple[float]': 'ToFloatListOptional<py::tuple>',
-        'tuple[bool]': 'ToBoolListOptional<py::tuple>',
-        'tuple[tensor]': 'ToTensorListOptional<py::tuple>',
-        'list[int]': 'ToIntListOptional<py::list>',
-        'list[float]': 'ToFloatListOptional<py::list>',
-        'list[bool]': 'ToBoolListOptional<py::list>',
-        'list[tensor]': 'ToTensorListOptional<py::list>',
+        'tuple[int]': 'ToIntListOptional<CPythonTuple>',
+        'tuple[float]': 'ToFloatListOptional<CPythonTuple>',
+        'tuple[bool]': 'ToBoolListOptional<CPythonTuple>',
+        'tuple[tensor]': 'ToTensorListOptional<CPythonTuple>',
+        'list[int]': 'ToIntListOptional<CPythonList>',
+        'list[float]': 'ToFloatListOptional<CPythonList>',
+        'list[bool]': 'ToBoolListOptional<CPythonList>',
+        'list[tensor]': 'ToTensorListOptional<CPythonList>',
     }
     basic_optional_type_convert = {
         'tuple[int]': "ToBasicIntVectorOptional",
@@ -385,6 +385,17 @@ def get_input_dtype(dtype: str, optional, use_basic_type=False):
     raise TypeError(f"""Unsupported convert type {dtype} for args.""")
+def get_output_dtype(dtype: str):
+    type_convert = {
+        'tensor': "mindspore::tensor::TensorPtr",
+        'tuple[tensor]': "std::vector<mindspore::tensor::TensorPtr>",
+        'list[tensor]': "std::vector<mindspore::tensor::TensorPtr>",
+    }
+    if dtype in type_convert:
+        return type_convert[dtype]
+    raise TypeError(f"""Unsupported convert type {dtype} for args.""")
 def is_cube(class_name):
     cube_set = {'Bmm', 'Baddbmm', 'MatMulExt', 'Mv'}
     if class_name in cube_set:

mindspore/ops_generate/resources/yaml_loader.py CHANGED Viewed

@@ -29,6 +29,7 @@ class YamlLoader(ResourceLoader):
     """
     YamlLoader is a utility class for loading yaml files.
     """
     def __init__(self, resouce_type: ResourceType, yaml_path: Union[Sequence[str], str]):
         """
         Initialize YamlLoader.
@@ -65,15 +66,26 @@ class OpDocYamlLoader(YamlLoader):
     """
     OpDocYamlLoader is a class for loading op primitive doc yaml files.
     """
     def __init__(self):
         op_doc_yaml_path = os.path.join(K.WORK_DIR, K.MS_OP_DEF_YAML_PATH, "doc")
         super().__init__(ResourceType.OP_DOC_YAML, op_doc_yaml_path)
+class CustomOpDocYamlLoader(YamlLoader):
+    """
+    CustomOpDocYamlLoader is a class for loading op primitive doc yaml files.
+    """
+    def __init__(self, doc_yaml_path):
+        super().__init__(ResourceType.OP_DOC_YAML, doc_yaml_path)
 class TensorMethodDocYamlLoader(YamlLoader):
     """
     TensorMethodDocYamlLoader is a class for loading tensor method doc yaml files.
     """
     def __init__(self):
         tensor_method_doc_yaml_path = os.path.join(K.WORK_DIR, K.MS_TENSOR_METHOD_DOC_YAML_PATH)
         super().__init__(ResourceType.TENSOR_METHOD_DOC_YAML, tensor_method_doc_yaml_path)
@@ -83,6 +95,7 @@ class MintFuncDocYamlLoader(YamlLoader):
     """
     MintFuncDocYamlLoader is a class for loading mint func doc yaml files.
     """
     def __init__(self):
         mint_func_doc_yaml_path = os.path.join(K.WORK_DIR, K.MS_MINT_FUNC_DOC_YAML_PATH)
         super().__init__(ResourceType.MINT_FUNC_DOC_YAML, mint_func_doc_yaml_path)

mindspore/ops_generate/tensor_py_cc_generator.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 # ============================================================================
 """
-Generates mindspore/ccsrc/pybind_api/ir/tensor_py.cc which includes the CPython Tensor APIs.
+Generates mindspore/ccsrc/pybind_api/ir/tensor/tensor_py.cc which includes the CPython Tensor APIs.
 """
 import os
@@ -26,7 +26,7 @@ from pyboost import pyboost_utils
 class TensorPyCppGenerator(BaseGenerator):
     """
-    This class is responsible for generating mindspore/ccsrc/pybind_api/ir/tensor_register/
+    This class is responsible for generating mindspore/ccsrc/pybind_api/ir/tensor/tensor_register/
     auto_generate/tensor_py_gen.cc
     """
     def __init__(self):

mindspore/parallel/_auto_parallel_context.py CHANGED Viewed

@@ -81,6 +81,7 @@ class _PipelineScheduler:
     PIPELINE_SEQPIPE = "seqpipe"
     PIPELINE_SEQVPP = "seqvpp"
     PIPELINE_SEQSMARTVPP = "seqsmartvpp"
+    PIPELINE_ZBV = "zero_bubble_v"
 class _AutoParallelContext:
@@ -434,13 +435,6 @@ class _AutoParallelContext:
         """
         self.check_context_handle()
         run_mode = context.get_context("mode")
-        if run_mode == context.PYNATIVE_MODE and parallel_mode not in (
-                context.ParallelMode.DATA_PARALLEL, context.ParallelMode.STAND_ALONE,
-                context.ParallelMode.AUTO_PARALLEL):
-            raise ValueError(f"Pynative only supports STAND_ALONE, DATA_PARALLEL and AUTO_PARALLEL using"
-                             f" sharding_propagation under shard function"
-                             f" for ParallelMode, "
-                             f"but got {parallel_mode.upper()}.")
         ret = self._context_handle.set_parallel_mode(parallel_mode)
         if ret is False:
             raise ValueError("The context configuration parameter 'parallel_mode' only support 'stand_alone', "
@@ -604,9 +598,6 @@ class _AutoParallelContext:
                 if not isinstance(dim, int):
                     raise TypeError("For 'set_auto_parallel_context', the element of argument "
                                     "'dataset_strategy' must be int type, but got the type : {} .".format(type(dim)))
-        if context.get_context('mode') == context.PYNATIVE_MODE:
-            raise ValueError("In PyNative mode, the setting value of 'dataset_strategy' must be either 'full_batch' "
-                             f"or 'data_parallel', but got {dataset_strategy}.")
         self._dataset_strategy_using_str = False
         self._context_handle.set_dataset_strategy(dataset_strategy)
@@ -646,9 +637,6 @@ class _AutoParallelContext:
                 return "full_batch"
             return "data_parallel"
         dataset_strategy = self._context_handle.get_dataset_strategy()
-        if context.get_context('mode') == context.PYNATIVE_MODE:
-            raise ValueError("In PyNative mode, the value of 'dataset_strategy' must be either 'full_batch' "
-                             f"or 'data_parallel', but got the setting value is {dataset_strategy}.")
         return dataset_strategy
     def set_grad_accumulation_step(self, grad_accumulation_step):
@@ -662,7 +650,7 @@ class _AutoParallelContext:
             raise ValueError("The interface is deprecated. To use gradient accumulation, "
                              "please use GradAccumulationCell in mindspore.nn.wrap.cell_wrapper.")
         self.check_context_handle()
-        Validator.check_positive_int(grad_accumulation_step)
+        Validator.check_positive_int(grad_accumulation_step, prim_name='grad_accumulation_step')
         self._context_handle.set_grad_accumulation_step(grad_accumulation_step)
     def get_grad_accumulation_step(self):
@@ -998,6 +986,8 @@ class _AutoParallelContext:
                                                                _PipelineScheduler.PIPELINE_GPIPE,
                                                                _PipelineScheduler.PIPELINE_SEQPIPE,
                                                                _PipelineScheduler.PIPELINE_SEQVPP,
+                                                               _PipelineScheduler.PIPELINE_SEQSMARTVPP,
+                                                               _PipelineScheduler.PIPELINE_ZBV,
                                                                _PipelineScheduler.PIPELINE_SEQSMARTVPP])
         scheduler_val = pipeline_config[pp_scheduler]
         if not pipeline_config[pp_interleave] and scheduler_val != _PipelineScheduler.PIPELINE_1F1B:
@@ -1072,7 +1062,7 @@ class _AutoParallelContext:
         if threshold_name in parallel_optimizer_config:
             Validator.check_non_negative_int(
-                parallel_optimizer_config[threshold_name])
+                parallel_optimizer_config[threshold_name], prim_name=threshold_name)
             self._context_handle.set_parallel_optimizer_threshold(
                 parallel_optimizer_config[threshold_name])

mindspore/parallel/_cell_wrapper.py CHANGED Viewed

@@ -263,7 +263,7 @@ def _single_parameter_broadcast(net, layout, param_not_load=None, param_loaded=N
     if not single_params:
         return
     param_redundancy_reversed = _get_param_redundancy_reversed(param_redundancy, cur_rank)
-    if not param_redundancy_reversed or cur_rank not in single_params:
+    if not param_redundancy_reversed:
         return
     net_param_dict = net.parameters_dict()
     _chang_parallel_context(origin_dataset_strategy)

mindspore/parallel/_parallel_serialization.py CHANGED Viewed

@@ -144,8 +144,7 @@ def _build_protobuf_strategy(strategy_filename):
     parallel_strategy_map = _load_protobuf_strategy(strategy_filename)
     layout_items = parallel_strategy_map.parallel_layout_item
     if not layout_items:
-        raise ValueError(f"For 'build_searched_strategy', the strategy file {strategy_filename} has no sliced "
-                         f"parameter, please check whether the 'strategy_filename' is correct.")
+        return {}
     strategy = {}
     for layout_item in layout_items:
@@ -159,6 +158,8 @@ def _build_json_strategy(strategy_filename):
     """build strategy from json file"""
     with open(strategy_filename, 'r') as f:
         json_content = json.load(f)
+    if "parallel_layout_item" not in json_content:
+        return {}
     layout_items = json_content.get("parallel_layout_item")
     strategy = {}
     for parameter_name, layout_item in layout_items.items():
@@ -525,10 +526,7 @@ def _make_dir(path, arg_name):
     else:
         ms.log.debug("The directory(%s) doesn't exist, will create it", path)
         try:
-            permissions = os.R_OK | os.W_OK | os.X_OK
-            os.umask(permissions << 3 | permissions)
-            mode = permissions << 6
-            os.makedirs(path, mode=mode, exist_ok=True)
+            os.makedirs(path, mode=0o700, exist_ok=True)
             real_path = path
         except PermissionError as e:
             ms.log.critical("No write permission on the directory(%r), error = %r", path, e)

mindspore/parallel/_ps_context.py CHANGED Viewed

@@ -114,8 +114,8 @@ def _set_ps_context(**kwargs):
                           Default: ``False``.
         config_file_path (str): Configuration file path used by recovery. Default: ''.
         enable_ssl (bool): Set PS SSL mode enabled or disabled. Default: ``False``.
-                           There might be risk when this is set to False.
-                           It is user's responsibility to ensure the network environment is safe.
+                           When set to False, users need to review and confirm the security of network environment
+                           where the distributed job is located.
         client_password (str): Password to decrypt the secret key stored in the client certificate. Default: ''.
         server_password (str): Password to decrypt the secret key stored in the server certificate. Default: ''.

mindspore/parallel/_utils.py CHANGED Viewed

@@ -14,14 +14,15 @@
 # ============================================================================
 """Utils of auto parallel"""
 import os
+import re
 from time import perf_counter
 from importlib import import_module
 import numpy as np
 import mindspore as ms
 from mindspore import context, log as logger
-from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
+from mindspore._c_expression import reset_op_id
 from mindspore.common.tensor import Tensor
-from mindspore.common.dtype import dtype_to_nptype
+from mindspore.common.dtype import _dtype_to_nptype
 from mindspore.common import dtype as mstype
 from mindspore.communication.management import get_group_size, get_rank
 from mindspore.communication._comm_helper import _is_initialized
@@ -156,7 +157,7 @@ def _is_in_auto_parallel_mode():
 def _is_parallel_mode():
-    if not _is_initialized() or context.get_context('mode') == context.PYNATIVE_MODE:
+    if not _is_initialized():
         return False
     if os.getenv("RUN_MODE") != "predict":
         return False
@@ -173,12 +174,6 @@ def _is_in_hybrid_parallel_mode():
     return _get_parallel_mode() == ms.ParallelMode.HYBRID_PARALLEL
-def _is_pynative_parallel():
-    parallel_mode = context.get_auto_parallel_context('parallel_mode')
-    return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
-        context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
 def _get_full_batch():
     """Get whether to use full_batch."""
     return auto_parallel_context().get_full_batch()
@@ -452,7 +447,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
                     batchsize_per_device = item
                 else:
                     new_shape += (item,)
-            new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
+            new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_))  # pylint:disable=protected-access
             start = stage_rank * batchsize_per_device
             new_tensor_numpy[start: start + batchsize_per_device] = data.asnumpy()
         else:
@@ -466,7 +461,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
                 end = (stage_rank % dataset_strategy[index][i] + 1) * item
                 s = slice(start, end, 1)
                 slice_index += (s,)
-            new_tensor_numpy = np.zeros(new_shape, dtype_to_nptype(type_))
+            new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_))  # pylint:disable=protected-access
             new_tensor_numpy[slice_index] = data.asnumpy()
         new_tensor = Tensor(new_tensor_numpy, dtype=type_)
         lst.append(new_tensor)
@@ -590,11 +585,6 @@ def _reset_op_id():
     reset_op_id()
-def _reset_op_id_with_offset():
-    """Reset op id with offset."""
-    reset_op_id_with_offset()
 def _parallel_predict_check():
     """validate parallel model prediction"""
     if _is_in_auto_parallel_mode():
@@ -773,7 +763,7 @@ def _grads_divided_by_device_num_if_recomputation(grads):
     """
     If in pynative parallel and full_batch is True, divide grads by device num to ensure that the gradients is correct.
     """
-    if not _is_pynative_parallel() or not _get_full_batch():
+    if not _get_full_batch():
         return grads
     device_num = _get_device_num()
@@ -804,3 +794,30 @@ def _check_rank(cur_rank, initial_rank, pipeline_stages):
         raise ValueError(f"For parameter broadcast, the cur_rank: {cur_rank} is wrong.")
     if initial_rank % (get_group_size() / pipeline_stages) != 0:
         raise ValueError(f"For parameter broadcast, the initial_rank: {initial_rank} is wrong.")
+def _check_path_safe(path, arg_name):
+    """
+    Check input path string is safe.
+    """
+    illegal_patterns = [
+        r"\.\.",
+        r"//+",
+        r"~",
+        r"^\s*$",
+        r"\./\."
+    ]
+    for pattern in illegal_patterns:
+        if re.search(pattern, path):
+            pattern_info = pattern.replace('\\', '')
+            raise ValueError(f"{arg_name} contains '{pattern_info}' is not safe, please use a safe one.")
+def _check_path_writable(path):
+    """
+    Check the write permission of the input path.
+    """
+    if not os.path.exists(path):
+        raise RuntimeError(f"{path} Path does not exist.")
+    if not os.access(path, os.W_OK):
+        raise PermissionError(f"Don't have the write permission on the directory {path}.")

mindspore/parallel/auto_parallel.py CHANGED Viewed

@@ -14,6 +14,7 @@
 # ============================================================================
 """Cell of auto parallel"""
 import os
+from mindspore import jit
 from mindspore.nn.cell import Cell
 from mindspore.parallel.shard import Layout
 from mindspore.communication.management import get_rank, get_group_size
@@ -281,7 +282,8 @@ class AutoParallel(Cell):
         Note:
             - It only works when `parallel_mode=sharding_propagation`.
             - When performing distributed training, users can first save the strategy using dryrun on a single device
-              and then load strategy to perform distributed training.
+              and then load strategy to perform distributed training. Note that only the first device of each node will
+              save the strategy file, so the simulated rank id specified by Dryrun must be divisible by 8.
         Args:
             file_path (str): Path to save parallel strategy json, must be an absolute path.
@@ -511,17 +513,17 @@ class AutoParallel(Cell):
             raise ValueError("For 'AutoParallel.pipeline', the argument 'stages' "
                              "must be larger than zero, but got value: {}.".format(stages))
         if not isinstance(output_broadcast, bool):
-            raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
+            raise TypeError("For 'AutoParallel.pipeline', the argument 'output_broadcast' "
                             "must be bool type, but got the type : {}.".format(type(output_broadcast)))
         if not isinstance(interleave, bool):
-            raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
+            raise TypeError("For 'AutoParallel.pipeline', the argument 'interleave' "
                             "must be bool type, but got the type : {}.".format(type(interleave)))
         if not isinstance(scheduler, str):
-            raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
+            raise TypeError("For 'AutoParallel.pipeline', the argument 'scheduler' "
                             "must be str type, but got the type : {}.".format(type(scheduler)))
-        if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"):
+        if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp", "zero_bubble_v"):
             raise ValueError("For 'AutoParallel.pipeline', the argument "
-                             "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp' ," \
+                             "'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp'/'zero_bubble_v' ," \
                              " but got the value : {}."
                              .format(scheduler))
         self._pipeline_stages = stages
@@ -665,8 +667,11 @@ class AutoParallel(Cell):
                 - recomputation_communication_overlap (bool): Enable overlap between recompute ops and communication ops
                   if True.
                   Default: False.
-                - grad_matmul_communication_overlap (bool): Enable overlap between dw matmul and
-                  tensor parallel communication ops if True. Default: False.
+                - grad_matmul_communication_overlap (bool, str): When set to ``True``, it indicates that overlap
+                  between dw matmul and tensor parallel communication is enabled. When set to ``False``, it indicates
+                  that this feature is disabled. When set to str, it only optimizes the specified communication
+                  operator types, with operators separated by ``,``. For example, "AlltoAll,AlltoAllV" indicates that
+                  only ``AlltoAll`` and ``AlltoAllV`` are optimized. Default: ``False``.
                 - grad_fa_allgather_overlap (bool): Enable overlap between duplicated allgather by recomputing
                   in sequence parallel and flashattentionscoregrad ops if True. Default: False.
                 - enable_communication_fusion (bool): Enable communication fusion to optimize the number of
@@ -681,7 +686,9 @@ class AutoParallel(Cell):
                   and optimizer parallel allgather communication if True. Currently, do not support
                   `O2 <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.JitConfig.html>`_
                   Default: False.
-                - computation_communication_fusion_level (int): Enable the fusion between compute and communicate.
+                - computation_communication_fusion_level (int): Enable the fusion between compute and communicate,
+                  which fuses communication tasks and computing tasks, allows for partial pipelining and parallel
+                  execution of these tasks during operation, thereby enhancing performance.
                   Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
                   This is an experimental configuration, may be changed or canceled in the future.
@@ -692,6 +699,12 @@ class AutoParallel(Cell):
                   - 2: Apply fusion to backward nodes.
                   - 3: Apply fusion to all nodes.
+                  .. warning::
+                      After setting ``export MS_ENABLE_LCCL=on``, the fusion operator based on memory semantics will be
+                      used. Please note that this operator is still in an experimental stage and may be changed or
+                      removed in the future.
                 - dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
                   support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
@@ -735,5 +748,6 @@ class AutoParallel(Cell):
         self._transformer_opt_config = file_path
         ctx.ascend_config['parallel_speed_up_json_path'] = file_path
+    @jit
     def construct(self, *args, **kwargs):
         return self.network(*args, **kwargs)

mindspore/parallel/checkpoint_transform.py CHANGED Viewed

@@ -31,7 +31,7 @@ from mindspore.communication.management import get_rank, get_group_size
 from mindspore.parallel._tensor import _load_tensor, _reshape_param_data, _reshape_param_data_with_weight, \
     _get_tensor_slice_index, _get_tensor_strategy
 from mindspore.parallel._utils import _is_in_auto_parallel_mode, _get_pipeline_stages, _infer_rank_list, \
-    _remove_repeated_slices, _get_auto_parallel_net
+    _remove_repeated_slices, _get_auto_parallel_net, _check_path_safe, _check_path_writable
 from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
     _transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, _build_searched_strategy, \
     _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
@@ -69,7 +69,9 @@ def merge_pipeline_strategys(src_strategy_dirs, dst_strategy_file):
         >>> ms.parallel.merge_pipeline_strategys("./src_strategy_dir", "./dst_strategy.ckpt")
     """
-    dst_strategy_dir, _ = os.path.split(dst_strategy_file)
+    dst_strategy_file = os.path.normpath(dst_strategy_file)
+    dst_strategy_file = os.path.abspath(dst_strategy_file)
+    dst_strategy_dir = os.path.dirname(dst_strategy_file)
     if not os.path.exists(dst_strategy_dir):
         _make_dir(dst_strategy_dir, "path")
     if not os.path.isdir(src_strategy_dirs):
@@ -495,6 +497,9 @@ def _transform_checkpoint_by_stage(src_checkpoints_dir, dst_checkpoints_dir, ckp
 def _transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix, src_strategy_file=None,
                            dst_strategy_file=None):
     """Transform checkpoints for all stages in src_strategy_file"""
+    _check_path_safe(dst_checkpoints_dir, "dst_checkpoints_dir")
+    dst_checkpoints_dir = os.path.realpath(dst_checkpoints_dir)
+    _check_path_safe(ckpt_prefix, "ckpt_prefix")
     checkpoints_rank_dir_list = os.path.join(src_checkpoints_dir, "rank_[0-9]*")
     all_checkpoint_files_map = {}
     for checkpoint_dir in glob.glob(checkpoints_rank_dir_list):
@@ -563,6 +568,7 @@ def _transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix
             save_checkpoint_file_dir = os.path.join(dst_checkpoints_dir, "rank_{}".format(transform_rank))
             if not os.path.exists(save_checkpoint_file_dir):
                 _make_dir(save_checkpoint_file_dir, "path")
+            _check_path_writable(save_checkpoint_file_dir)
             save_checkpoint_file_name = os.path.join(save_checkpoint_file_dir, save_checkpoint_file)
             ms.save_checkpoint(transform_param_list, save_checkpoint_file_name)
             del param_total_dict_copy
@@ -913,6 +919,15 @@ def set_op_strategy_config(mode="SAVE", path=""):
     if file_type != ".json":
         raise KeyError("File type must be .json")
     dir_path = os.path.dirname(path)
+    normalized_path = os.path.abspath(os.path.realpath(path))
+    dangerous_paths = ['/etc', '/usr', '/bin', '/sbin', '/boot', '/proc', '/sys']
+    for dangerous_path in dangerous_paths:
+        if normalized_path.startswith(dangerous_path):
+            raise PermissionError(
+                f"Writing to system directory '{dangerous_path}' is not allowed"
+            )
     if dir_path and not os.path.exists(dir_path):
         os.makedirs(dir_path, mode=0o700, exist_ok=True)
     check_mode_type = ["SAVE", "LOAD"]
@@ -1165,6 +1180,8 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
             train_strategy_filename = ms.context.get_auto_parallel_context("strategy_ckpt_load_file")
     _train_strategy = build_searched_strategy(train_strategy_filename)
+    if not _train_strategy:
+        return True
     train_strategy = _convert_to_list(_train_strategy)
     train_dev_count = 1
@@ -1180,6 +1197,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
     param_total_dict = defaultdict(dict)
     for file_index, file_name in enumerate(checkpoint_filenames):
+        file_name = os.path.abspath(file_name)
         ckpt_dict = ms.load_checkpoint(file_name, dec_key=dec_key, dec_mode=dec_mode)
         for param_name, param in ckpt_dict.items():
             param_total_dict[param_name][file_index] = param

mindspore/parallel/cluster/process_entity/_api.py CHANGED Viewed

@@ -21,6 +21,7 @@ import subprocess
 import socket
 import psutil
 import mindspore.log as logger
+from mindspore.utils import RSCPluginHandle
 from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url, \
     _is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip, _generate_auto_bind_core_strategy, \
     _generate_bind_core_strategy
@@ -179,9 +180,12 @@ class _ProcessManager:
         self.is_simulation = self.sim_level != -1
         if self.is_simulation:
             os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
+            if self.sim_rank_id == -1:
+                self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
         elif os.getenv("MS_SIMULATION_LEVEL"):
             self.is_simulation = True
-            self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
+            if self.sim_rank_id == -1:
+                self.sim_rank_id = int(os.getenv("RANK_ID", "-1"))
             if os.getenv("RANK_SIZE"):
                 self.exported_rank_size = os.getenv("RANK_SIZE")
         # If sim_rank_id is set, single worker can be started.
@@ -218,23 +222,28 @@ class _ProcessManager:
         self.proc_rank_map = {}
         self.enable_mindx = False
+        self.handler = None
         self._check_taskd()
     def _check_taskd(self):
         """check if enable taskd."""
-        tft_env = os.getenv("MS_ENABLE_TFT", "")
-        if any(v in tft_env for v in ('TTP:1', 'UCE:1', 'ARF:1', 'TSP:1', 'RSC:1', 'HCCE:1')):
-            try:
-                from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
-                self.msmgr = MSRunPlugin()
-                self.msmgr.register_callbacks("KILL_WORKER", self.kill_workers)
-                self.msmgr.register_callbacks("START_ALL_WORKER", self.start_all_workers)
-                self.msmgr.register_callbacks("START_WORKER_LIST", self.start_worker_list)
-                self.msmgr.register_callbacks("MONITOR", self.monitor_rank_status)
-                self.enable_mindx = True
-                os.environ["MS_ENABLE_RECOVERY"] = str(1)
-            except Exception as e:  # pylint: disable=broad-except
-                logger.warning(f"mindx is not installed, using original mindspore recovery strategy.: {str(e)}")
+        self.handler = RSCPluginHandle()
+        self.enable_mindx = self.handler.check_enable()
+        if self.enable_mindx is False:
+            self.handler = None
+            return
+        ret = self.handler.register_callback({"KILL_WORKER": self.kill_workers,
+                                              "START_ALL_WORKER": self.start_all_workers,
+                                              "START_WORKER_LIST": self.start_worker_list,
+                                              "MONITOR": self.monitor_rank_status
+                                              })
+        if not ret:
+            logger.warning(f"Register callback to mindx failed, process controlled by msrun.")
+            self.enable_mindx = False
+            self.handler = None
+            return
+        logger.warning(f"Mindx enabled, process controlled by mindx.")
+        os.environ["MS_ENABLE_RECOVERY"] = str(1)
     def run(self):
         """
@@ -257,7 +266,7 @@ class _ProcessManager:
             if self.is_master and not self.is_simulation:
                 self.start_scheduler()
         if self.enable_mindx:
-            self.msmgr.start()
+            self.handler.start()
         else:
             self.start_workers()
             if self.join:
@@ -379,8 +388,7 @@ class _ProcessManager:
                 logger.error(f"Scheduler process {self.msn_process.pid} exit with exception.")
         if has_exception:
-            logger.info("Analyzing exception log...")
-            self._analyze_log()
+            self._analyze_sched_log()
             raise RuntimeError("Distributed job exited with exception. Please check logs in "
                                f"directory: {self.log_dir}.")
@@ -580,26 +588,13 @@ class _ProcessManager:
             log_name = os.path.join(self.log_dir, formatted_log_name + "_" + str(index) + ".log")
         return node_id, log_name
-    def _analyze_log(self):
+    def _analyze_sched_log(self):
         """
-        Analyze exception logs.
+        Analyze scheduler log.
         """
         scheduler_log_path = os.path.join(self.log_dir, "scheduler.log")
-        time_out_node_ids = []
         if os.path.exists(scheduler_log_path):
-            with open(scheduler_log_path, "r") as log:
-                scheduler_log = log.read()
-                # Filter out abnormal logs.
-                time_out_node_log = re.findall(r"node: .* is timed out", scheduler_log)
-                # Filter out node ids of the processes which exit abnormally.
-                def node_id_splitter(node_id):
-                    return re.split(" is timed out", re.split("node: ", node_id)[1])[0]
-                for node_id in time_out_node_log:
-                    time_out_node_ids.append(node_id_splitter(node_id))
-            logger.error(f"Time out nodes are {time_out_node_ids}")
-        os.system(f"grep -rn -E 'ERROR|CRITICAL|Traceback|Error' -C 5 {self.log_dir}")
+            os.system(f"cat {scheduler_log_path} | grep -E 'ERROR|CRITICAL|Traceback|Error' -C 5")
     def format_worker_log_name(self):
         """