mindspore 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +5 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +2 -2
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
- mindspore/_extends/parse/parser.py +28 -22
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +23 -2
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
- mindspore/amp.py +0 -18
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/__init__.py +18 -12
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +371 -96
- mindspore/common/_utils.py +7 -43
- mindspore/common/api.py +434 -135
- mindspore/common/dtype.py +98 -57
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/hook_handle.py +82 -3
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +17 -127
- mindspore/common/recompute.py +4 -13
- mindspore/common/tensor.py +50 -217
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +20 -106
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +35 -1
- mindspore/dataset/engine/datasets.py +338 -319
- mindspore/dataset/engine/datasets_user_defined.py +38 -22
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/transforms.py +3 -3
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/api/cell.h +28 -4
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +0 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +5 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +6 -1
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +4 -3
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/__init__.py +4 -0
- mindspore/mint/distributed/distributed.py +392 -69
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/_functions.py +1 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +10 -10
- mindspore/mint/nn/layer/normalization.py +11 -16
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +231 -239
- mindspore/nn/layer/activation.py +4 -2
- mindspore/nn/layer/basic.py +56 -14
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/image.py +1 -1
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +32 -127
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +1 -4
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +2 -4
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +39 -5
- mindspore/nn/wrap/grad_reducer.py +4 -89
- mindspore/numpy/array_creations.py +4 -4
- mindspore/numpy/fft.py +9 -9
- mindspore/numpy/utils_const.py +1 -1
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +1 -5
- mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
- mindspore/ops/auto_generate/gen_extend_func.py +6 -11
- mindspore/ops/auto_generate/gen_ops_def.py +385 -154
- mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +16 -2
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +2 -0
- mindspore/ops/function/array_func.py +24 -18
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +7 -6
- mindspore/ops/function/grad/grad_func.py +4 -12
- mindspore/ops/function/math_func.py +89 -86
- mindspore/ops/function/nn_func.py +92 -313
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +4 -1
- mindspore/ops/functional_overload.py +377 -30
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +12 -50
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +5 -50
- mindspore/ops/operations/comm_ops.py +95 -17
- mindspore/ops/operations/custom_ops.py +237 -22
- mindspore/ops/operations/debug_ops.py +33 -35
- mindspore/ops/operations/manually_defined/ops_def.py +39 -318
- mindspore/ops/operations/math_ops.py +5 -5
- mindspore/ops/operations/nn_ops.py +3 -3
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +4 -27
- mindspore/ops/tensor_method.py +88 -10
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_auto_parallel_context.py +5 -15
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +4 -6
- mindspore/parallel/_ps_context.py +2 -2
- mindspore/parallel/_utils.py +34 -17
- mindspore/parallel/auto_parallel.py +23 -9
- mindspore/parallel/checkpoint_transform.py +20 -2
- mindspore/parallel/cluster/process_entity/_api.py +28 -33
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/parallel/cluster/run.py +5 -3
- mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/function/reshard_func.py +6 -5
- mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
- mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
- mindspore/parallel/shard.py +7 -21
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +127 -20
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +40 -4
- mindspore/profiler/common/path_manager.py +65 -24
- mindspore/profiler/common/profiler_context.py +27 -14
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +10 -6
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/dynamic_profiler.py +91 -46
- mindspore/profiler/envprofiler.py +30 -5
- mindspore/profiler/experimental_config.py +18 -2
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +34 -7
- mindspore/profiler/profiler.py +193 -145
- mindspore/profiler/profiler_action_controller.py +1 -1
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +9 -6
- mindspore/runtime/executor.py +35 -0
- mindspore/runtime/memory.py +113 -0
- mindspore/runtime/thread_bind_core.py +1 -1
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +8 -21
- mindspore/train/amp.py +6 -7
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +1 -17
- mindspore/train/callback/_flops_collector.py +10 -6
- mindspore/train/callback/_train_fault_tolerance.py +72 -25
- mindspore/train/data_sink.py +5 -9
- mindspore/train/dataset_helper.py +5 -5
- mindspore/train/model.py +41 -230
- mindspore/train/serialization.py +160 -401
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +152 -16
- mindspore/version.py +1 -1
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/__init__.py +0 -23
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/train/memory_profiling_pb2.py +0 -298
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
|
@@ -29,7 +29,7 @@ from mindspore import nn
|
|
|
29
29
|
from mindspore.train.model import Model
|
|
30
30
|
from mindspore.train.dataset_helper import connect_network_with_dataset
|
|
31
31
|
from mindspore.parallel._utils import _need_to_full, _to_full_tensor
|
|
32
|
-
from mindspore.common.dtype import
|
|
32
|
+
from mindspore.common.dtype import _pytype_to_dtype
|
|
33
33
|
from mindspore._c_expression import init_exec_dataset
|
|
34
34
|
from mindspore.train.train_thor.dataset_helper import DatasetHelper
|
|
35
35
|
|
|
@@ -46,7 +46,7 @@ def _convert_to_ms_type(types):
|
|
|
46
46
|
"""
|
|
47
47
|
ms_types = []
|
|
48
48
|
for numpy_type in types:
|
|
49
|
-
ms_type =
|
|
49
|
+
ms_type = _pytype_to_dtype(numpy_type) # pylint:disable=protected-access
|
|
50
50
|
ms_types.append(ms_type)
|
|
51
51
|
return ms_types
|
|
52
52
|
|
mindspore/turbojpeg.dll
CHANGED
|
Binary file
|
mindspore/utils/__init__.py
CHANGED
|
@@ -14,12 +14,15 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Utils module."""
|
|
16
16
|
from __future__ import absolute_import
|
|
17
|
-
from mindspore._c_expression import
|
|
18
|
-
from .
|
|
17
|
+
from mindspore._c_expression import _reuse_data_ptr
|
|
18
|
+
from .stress_detect import stress_detect
|
|
19
|
+
from .utils import ExitByRequest, RSCPluginHandle, TFTCommValue, _tft_handler
|
|
19
20
|
from .runtime_execution_order_check import runtime_execution_order_check, comm_exec_order_check
|
|
20
21
|
from .sdc_detect import sdc_detect_start, sdc_detect_stop, get_sdc_detect_result
|
|
21
22
|
from . import dryrun
|
|
23
|
+
from .dlpack import from_dlpack, to_dlpack
|
|
22
24
|
|
|
23
25
|
# Symbols from utils module.
|
|
24
26
|
__all__ = ["stress_detect", "ExitByRequest", "runtime_execution_order_check", "dryrun", "_reuse_data_ptr",
|
|
25
|
-
"_tft_handler", "comm_exec_order_check", "sdc_detect_start", "sdc_detect_stop", "get_sdc_detect_result"
|
|
27
|
+
"_tft_handler", "comm_exec_order_check", "sdc_detect_start", "sdc_detect_stop", "get_sdc_detect_result",
|
|
28
|
+
"RSCPluginHandle", "TFTCommValue", "from_dlpack", "to_dlpack"]
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
|
|
2
|
+
# Copyright 2025 Huawei Technologies Co., Ltd
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""dlpack for tensor."""
|
|
16
|
+
from mindspore._c_expression import TensorPy as TensorPy_
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def from_dlpack(dlpack):
|
|
20
|
+
r"""
|
|
21
|
+
Converts a DLPack object to a MindSpore Tensor.
|
|
22
|
+
|
|
23
|
+
This function allows for the sharing of tensor data from other deep learning frameworks that support DLPack.
|
|
24
|
+
The data is not copied and the returned MindSpore Tensor shares the memory with the source tensor.
|
|
25
|
+
|
|
26
|
+
.. warning::
|
|
27
|
+
This is an experimental API that is subject to change or deletion.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
dlpack (PyCapsule): The DLPack object to be converted, which is a capsule containing a pointer to a
|
|
31
|
+
`DLManagedTensor`.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Tensor, the MindSpore Tensor that shares memory with the DLPack object.
|
|
35
|
+
|
|
36
|
+
Supported Platforms:
|
|
37
|
+
``Ascend``
|
|
38
|
+
|
|
39
|
+
Examples:
|
|
40
|
+
>>> import mindspore as ms
|
|
41
|
+
>>> from mindspore.utils.dlpack import to_dlpack, from_dlpack
|
|
42
|
+
>>> import numpy as np
|
|
43
|
+
>>> # Create a MindSpore Tensor and convert it to DLPack
|
|
44
|
+
>>> x = ms.Tensor(np.random.rand(2, 3), ms.float32)
|
|
45
|
+
>>> dlpack_obj = to_dlpack(x)
|
|
46
|
+
>>>
|
|
47
|
+
>>> # Convert the DLPack object back to a MindSpore Tensor
|
|
48
|
+
>>> y = from_dlpack(dlpack_obj)
|
|
49
|
+
>>> print(x.shape == y.shape)
|
|
50
|
+
True
|
|
51
|
+
"""
|
|
52
|
+
return TensorPy_.from_dlpack(dlpack)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def to_dlpack(tensor):
|
|
56
|
+
r"""
|
|
57
|
+
Converts a MindSpore Tensor to a DLPack object.
|
|
58
|
+
|
|
59
|
+
The DLPack format is a standard for sharing tensor data between different deep learning frameworks.
|
|
60
|
+
The returned DLPack object is a Python capsule that can be consumed by other libraries that support DLPack.
|
|
61
|
+
The capsule contains a pointer to a `DLManagedTensor` structure. The consumer of the DLPack object is responsible
|
|
62
|
+
for releasing the memory.
|
|
63
|
+
|
|
64
|
+
.. warning::
|
|
65
|
+
This is an experimental API that is subject to change or deletion.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
tensor (Tensor): The MindSpore Tensor to be converted.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
PyCapsule, a DLPack object that can be consumed by other libraries.
|
|
72
|
+
|
|
73
|
+
Supported Platforms:
|
|
74
|
+
``Ascend``
|
|
75
|
+
|
|
76
|
+
Examples:
|
|
77
|
+
>>> import mindspore as ms
|
|
78
|
+
>>> from mindspore.utils.dlpack import to_dlpack, from_dlpack
|
|
79
|
+
>>> import numpy as np
|
|
80
|
+
>>> # Convert a MindSpore Tensor to DLPack
|
|
81
|
+
>>> x = ms.Tensor(np.random.rand(2, 3), ms.float32)
|
|
82
|
+
>>> dlpack_obj = to_dlpack(x)
|
|
83
|
+
>>>
|
|
84
|
+
>>> # At this point, dlpack_obj can be used by other frameworks that support DLPack.
|
|
85
|
+
>>> # For demonstration, we convert it back to a MindSpore Tensor.
|
|
86
|
+
>>> y = from_dlpack(dlpack_obj)
|
|
87
|
+
>>> print(x.shape == y.shape)
|
|
88
|
+
True
|
|
89
|
+
"""
|
|
90
|
+
if tensor.has_init:
|
|
91
|
+
tensor.init_data()
|
|
92
|
+
return TensorPy_.to_dlpack(tensor)
|
mindspore/utils/dryrun.py
CHANGED
|
@@ -74,7 +74,7 @@ def set_simulation():
|
|
|
74
74
|
os.environ["MS_SIMULATION_LEVEL"] = "1"
|
|
75
75
|
obj = TraceBack()
|
|
76
76
|
Tensor.asnumpy = obj.inject(Tensor.asnumpy)
|
|
77
|
-
Tensor.
|
|
77
|
+
Tensor.__getitem__ = obj.inject(Tensor.__getitem__)
|
|
78
78
|
Tensor.is_contiguous = obj.inject(Tensor.is_contiguous)
|
|
79
79
|
Tensor.flush_from_cache = obj.inject(Tensor.flush_from_cache)
|
|
80
80
|
Tensor.__str__ = no_inject_traceback_for_print
|
|
@@ -623,6 +623,16 @@ def runtime_execution_order_check(folders_, all_rank=None):
|
|
|
623
623
|
parser = RankFolderParser(folders_)
|
|
624
624
|
result_map = parser.parse()
|
|
625
625
|
|
|
626
|
+
if not result_map:
|
|
627
|
+
logger.error("No valid rank data found. Execution order check aborted.")
|
|
628
|
+
return
|
|
629
|
+
|
|
630
|
+
# Check for any rank with empty execution orders
|
|
631
|
+
for rank, orders in result_map.items():
|
|
632
|
+
if not orders:
|
|
633
|
+
logger.error(f"Rank {rank} has no valid execution orders. Please check the csv file.")
|
|
634
|
+
return
|
|
635
|
+
|
|
626
636
|
# Modify execution orders
|
|
627
637
|
modified_orders = modify_execute_orders(result_map)
|
|
628
638
|
|
mindspore/utils/sdc_detect.py
CHANGED
|
@@ -13,17 +13,15 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""SDC detect."""
|
|
16
|
-
|
|
16
|
+
import mindspore.tools
|
|
17
|
+
from mindspore.common._decorator import deprecated
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
@deprecated("2.7.1", "mindspore.tools.sdc_detect_start", module_prefix="mindspore.utils.")
|
|
19
21
|
def sdc_detect_start():
|
|
20
22
|
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
check time decreases as the matrix shapes increase. Starting sdc detection results in approximately 100%
|
|
24
|
-
performance degradation for a single 4096-sized MatMul computation, and approximately 90% degradation on the
|
|
25
|
-
Llama2-7B model (model parallel is 4, pipeline parallel is 2, and using qkv concatenation and ffn concatenation in
|
|
26
|
-
decoder layers).
|
|
23
|
+
This api will be deprecated and removed in future versions, please use the api
|
|
24
|
+
:func:`mindspore.tools.sdc_detect_start` instead.
|
|
27
25
|
|
|
28
26
|
Supported Platforms:
|
|
29
27
|
``Ascend``
|
|
@@ -32,12 +30,14 @@ def sdc_detect_start():
|
|
|
32
30
|
>>> from mindspore.utils import sdc_detect_start
|
|
33
31
|
>>> sdc_detect_start()
|
|
34
32
|
"""
|
|
35
|
-
|
|
33
|
+
return mindspore.tools.sdc_detect_start()
|
|
36
34
|
|
|
37
35
|
|
|
36
|
+
@deprecated("2.7.1", "mindspore.tools.sdc_detect_stop", module_prefix="mindspore.utils.")
|
|
38
37
|
def sdc_detect_stop():
|
|
39
38
|
"""
|
|
40
|
-
|
|
39
|
+
This api will be deprecated and removed in future versions, please use the api
|
|
40
|
+
:func:`mindspore.tools.sdc_detect_stop` instead.
|
|
41
41
|
|
|
42
42
|
Supported Platforms:
|
|
43
43
|
``Ascend``
|
|
@@ -46,12 +46,14 @@ def sdc_detect_stop():
|
|
|
46
46
|
>>> from mindspore.utils import sdc_detect_stop
|
|
47
47
|
>>> sdc_detect_stop()
|
|
48
48
|
"""
|
|
49
|
-
|
|
49
|
+
return mindspore.tools.sdc_detect_stop()
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
@deprecated("2.7.1", "mindspore.tools.get_sdc_detect_result", module_prefix="mindspore.utils.")
|
|
52
53
|
def get_sdc_detect_result():
|
|
53
54
|
"""
|
|
54
|
-
|
|
55
|
+
This api will be deprecated and removed in future versions, please use the api
|
|
56
|
+
:func:`mindspore.tools.get_sdc_detect_result` instead.
|
|
55
57
|
|
|
56
58
|
Returns:
|
|
57
59
|
bool, indicating whether silent data corruption has occurred after detection start.
|
|
@@ -65,4 +67,4 @@ def get_sdc_detect_result():
|
|
|
65
67
|
>>> print(result)
|
|
66
68
|
False
|
|
67
69
|
"""
|
|
68
|
-
return
|
|
70
|
+
return mindspore.tools.get_sdc_detect_result()
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 2025 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""Stress detect."""
|
|
16
|
+
import mindspore.tools
|
|
17
|
+
from mindspore.common._decorator import deprecated
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@deprecated("2.7.1", "mindspore.tools.stress_detect", module_prefix="mindspore.utils.")
|
|
21
|
+
def stress_detect(detect_type="aic"):
|
|
22
|
+
"""
|
|
23
|
+
This api will be deprecated and removed in future versions, please use the api
|
|
24
|
+
:func:`mindspore.tools.stress_detect` instead.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
detect_type (str, optional): The type of stress test to perform. There are two options available: ``'aic'`` and
|
|
28
|
+
``'hccs'``, which perform AiCore and HCCS link stress tests on the device, respectively. Default: "aic".
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
int, the return value represents the error type. 0 indicates normal. 1 indicates failure to start some or
|
|
32
|
+
all test cases. 2 indicates a hardware failure, and it is recommended to replace the device.
|
|
33
|
+
|
|
34
|
+
Supported Platforms:
|
|
35
|
+
``Ascend``
|
|
36
|
+
|
|
37
|
+
Examples:
|
|
38
|
+
>>> from mindspore.utils import stress_detect
|
|
39
|
+
>>> ret = stress_detect()
|
|
40
|
+
>>> print(ret)
|
|
41
|
+
0
|
|
42
|
+
"""
|
|
43
|
+
return mindspore.tools.stress_detect(detect_type)
|
mindspore/utils/utils.py
CHANGED
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
18
|
import os
|
|
19
|
+
import json
|
|
19
20
|
from mindspore import log as logger
|
|
20
21
|
from mindspore import context
|
|
21
22
|
from mindspore import _checkparam as Validator
|
|
@@ -23,9 +24,9 @@ from mindspore.common import dtype as mstype
|
|
|
23
24
|
from mindspore.common.tensor import Tensor
|
|
24
25
|
from mindspore.ops import functional as F
|
|
25
26
|
from mindspore.ops import operations as P
|
|
26
|
-
from mindspore.parallel._recovery_context import _set_recovery_context
|
|
27
27
|
from mindspore.common.api import jit_class
|
|
28
28
|
from mindspore._c_expression import _tft_start_record_threads, _tft_finish_record_threads
|
|
29
|
+
from mindspore._c_expression import set_is_reboot_node, tft_register_config
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
@jit_class
|
|
@@ -64,11 +65,149 @@ class ExitByRequest:
|
|
|
64
65
|
return grad
|
|
65
66
|
|
|
66
67
|
|
|
68
|
+
class TFTCommValue:
|
|
69
|
+
"""Config values"""
|
|
70
|
+
ENABLE_MINDX = ['TTP:1', 'UCE:1', 'ARF:1', 'TSP:1', 'HCCE:1', 'RSC:1'] # support mindx to schedule
|
|
71
|
+
NEED_MINDIO = ["TTP:1", "UCE:1", "ARF:1", "TSP:1", "HCCE:1"] # need mindio-ttp pkg
|
|
72
|
+
DISABLE_WATCHDOG = ['ARF:1', 'TSP:1', 'HCCE:1'] # close watchdog
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _getenv():
|
|
76
|
+
"""Get env """
|
|
77
|
+
tft_env = os.getenv("MS_ENABLE_TFT", "").strip()
|
|
78
|
+
thm_env = os.getenv("MS_ENABLE_THM", "").strip()
|
|
79
|
+
return tft_env, thm_env
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _parser_tft_and_thm_env():
|
|
83
|
+
"""Parser all config: tft, thm ..."""
|
|
84
|
+
tft_env, thm_env = _getenv()
|
|
85
|
+
tft_envs = tft_env.replace("{", "").replace("}", "").strip().split(",")
|
|
86
|
+
thm_envs = thm_env.replace("{", "").replace("}", "").strip().split(",")
|
|
87
|
+
all_config = {}
|
|
88
|
+
for item in tft_envs:
|
|
89
|
+
if item == "":
|
|
90
|
+
continue
|
|
91
|
+
key_v = item.split(":")
|
|
92
|
+
all_config[key_v[0].strip()] = key_v[1].strip()
|
|
93
|
+
|
|
94
|
+
for item in thm_envs:
|
|
95
|
+
if item == "":
|
|
96
|
+
continue
|
|
97
|
+
key_v = item.split(":")
|
|
98
|
+
if key_v[0] == "HCCL_STATUS_SAVE_CONFIG":
|
|
99
|
+
with open(key_v[1].strip("'\""), 'r', encoding='utf-8') as j:
|
|
100
|
+
json_values = json.load(j)
|
|
101
|
+
for key, val in json_values.items():
|
|
102
|
+
if key == "HCCL_STATUS_SAVE_PATH" and not os.path.isabs(str(val)):
|
|
103
|
+
logger.warning(
|
|
104
|
+
f"HCCL_STATUS_SAVE_PATH should be absolute path, but get: {val}, Using default path:'/tmp'")
|
|
105
|
+
val = "/tmp"
|
|
106
|
+
key = "CCAE_" + key
|
|
107
|
+
all_config[key] = val
|
|
108
|
+
continue
|
|
109
|
+
all_config[key_v[0].strip()] = key_v[1].strip()
|
|
110
|
+
if all_config.get("ARF") == "1":
|
|
111
|
+
logger.warning(f"Disable hccl_watchdog and turn on TTP when using ARF.")
|
|
112
|
+
all_config["HCCL_WATCHDOG"] = "0"
|
|
113
|
+
all_config["TTP"] = "1"
|
|
114
|
+
if all_config.get("HCCL_STATUS_SAVE") == "1":
|
|
115
|
+
os.environ["HCCL_STATUS_SAVE"] = "1"
|
|
116
|
+
os.environ["HCCL_STATUS_SAVE_PATH"] = all_config.get("CCAE_HCCL_STATUS_SAVE_PATH")
|
|
117
|
+
os.environ["HCCL_STATUS_SAVE_INTERVAL"] = str(all_config.get("CCAE_HCCL_STATUS_SAVE_INTERVAL"))
|
|
118
|
+
tft_register_config(all_config)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class RSCPluginHandle:
|
|
122
|
+
"""Third party controller handler"""
|
|
123
|
+
|
|
124
|
+
def __init__(self):
|
|
125
|
+
self.enable = False
|
|
126
|
+
self.tft_env, _ = _getenv()
|
|
127
|
+
self._check_env()
|
|
128
|
+
self.msmgr = None
|
|
129
|
+
self.init_taskd_agent = None
|
|
130
|
+
self.start_taskd_agent = None
|
|
131
|
+
self.register_func = None
|
|
132
|
+
self.using_agent = False
|
|
133
|
+
|
|
134
|
+
def _check_env(self):
|
|
135
|
+
"""Check env"""
|
|
136
|
+
self.enable = any(v in self.tft_env for v in TFTCommValue.ENABLE_MINDX)
|
|
137
|
+
|
|
138
|
+
def check_enable(self):
|
|
139
|
+
"""Check env"""
|
|
140
|
+
return self.enable
|
|
141
|
+
|
|
142
|
+
def _register_by_agent(self, func_map):
|
|
143
|
+
""" register by taskd agent"""
|
|
144
|
+
try:
|
|
145
|
+
from taskd.api.taskd_agent_api import init_taskd_agent, start_taskd_agent, register_func
|
|
146
|
+
self.init_taskd_agent = init_taskd_agent
|
|
147
|
+
self.start_taskd_agent = start_taskd_agent
|
|
148
|
+
self.register_func = register_func
|
|
149
|
+
except ImportError as e:
|
|
150
|
+
logger.warning(f"Import task agent: {str(e)}, try to using mindx plugin.")
|
|
151
|
+
return False
|
|
152
|
+
try:
|
|
153
|
+
logger.warning(f"register callbacks to taskd agent")
|
|
154
|
+
if not self.init_taskd_agent({"Framework": "MindSpore"}):
|
|
155
|
+
logger.warning(f"Init taskd agent failed, try to using mindx plugin.")
|
|
156
|
+
return False
|
|
157
|
+
for name, func in func_map.items():
|
|
158
|
+
self.register_func(name, func)
|
|
159
|
+
except Exception as e: # pylint: disable=broad-except
|
|
160
|
+
logger.warning(f"Register callback func failed: {str(e)}, try to using mindx plugin.")
|
|
161
|
+
return False
|
|
162
|
+
self.using_agent = True
|
|
163
|
+
return True
|
|
164
|
+
|
|
165
|
+
def _register_by_plugin(self, func_map):
|
|
166
|
+
""" register by mindx msrun_plugin"""
|
|
167
|
+
# will delete in the future
|
|
168
|
+
self.using_agent = False
|
|
169
|
+
try:
|
|
170
|
+
from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
|
|
171
|
+
self.msmgr = MSRunPlugin()
|
|
172
|
+
except Exception as e: # pylint: disable=broad-except
|
|
173
|
+
logger.warning(f"Import mindx failed: {str(e)}, process controlled by msrun.")
|
|
174
|
+
return False
|
|
175
|
+
try:
|
|
176
|
+
for name, func in func_map.items():
|
|
177
|
+
self.msmgr.register_callbacks(name, func)
|
|
178
|
+
except Exception as e: # pylint: disable=broad-except
|
|
179
|
+
logger.warning(f"Register callback func failed: {str(e)}, process controlled by msrun")
|
|
180
|
+
return False
|
|
181
|
+
return True
|
|
182
|
+
|
|
183
|
+
def register_callback(self, func_map: dict):
|
|
184
|
+
"""Register function"""
|
|
185
|
+
if not isinstance(func_map, dict):
|
|
186
|
+
raise ValueError(f"The value of 'func_map' should be a dict, bug got:{func_map}.")
|
|
187
|
+
if self._register_by_agent(func_map):
|
|
188
|
+
return True
|
|
189
|
+
if self._register_by_plugin(func_map):
|
|
190
|
+
return True
|
|
191
|
+
return False
|
|
192
|
+
|
|
193
|
+
def start(self):
|
|
194
|
+
"""Start execute taskd agent"""
|
|
195
|
+
if self.using_agent:
|
|
196
|
+
logger.warning(f"start by taskd agent")
|
|
197
|
+
self.start_taskd_agent()
|
|
198
|
+
else:
|
|
199
|
+
logger.warning(f"start by mindx")
|
|
200
|
+
if self.msmgr is None:
|
|
201
|
+
raise RuntimeError(f"Mindx unavailable, can not start training.")
|
|
202
|
+
self.msmgr.start()
|
|
203
|
+
|
|
204
|
+
|
|
67
205
|
class TftHandle:
|
|
68
206
|
"""TftHandle class"""
|
|
69
207
|
|
|
70
208
|
def __init__(self):
|
|
71
209
|
super(TftHandle, self).__init__()
|
|
210
|
+
_parser_tft_and_thm_env()
|
|
72
211
|
self._controller_ip = None
|
|
73
212
|
self._controller_rank_id = None
|
|
74
213
|
self._controller_port = None
|
|
@@ -124,24 +263,21 @@ class TftHandle:
|
|
|
124
263
|
Args:
|
|
125
264
|
**kwargs: Reserved parameters.
|
|
126
265
|
"""
|
|
127
|
-
tft_env =
|
|
128
|
-
|
|
129
|
-
tft_enabled = any([opt in tft_env for opt in tft_opts])
|
|
266
|
+
tft_env, _ = _getenv()
|
|
267
|
+
tft_enabled = any([opt in tft_env for opt in TFTCommValue.NEED_MINDIO])
|
|
130
268
|
if not tft_enabled:
|
|
131
|
-
raise ValueError("MindIO TFT
|
|
269
|
+
raise ValueError(F"MindIO TFT register need custom switch on one of:{TFTCommValue.NEED_MINDIO}")
|
|
132
270
|
if "ARF:1" in tft_env:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
os.environ["MS_ENABLE_TFT"] = "{
|
|
271
|
+
if "TTP:1" not in tft_env:
|
|
272
|
+
logger.warning(f"Turn on TTP config when using ARF.")
|
|
273
|
+
tft_env = tft_env.replace("{", "").replace("}", "")
|
|
274
|
+
all_opts = [part.strip() for part in tft_env.split(",")] + ["TTP:1"]
|
|
275
|
+
os.environ["MS_ENABLE_TFT"] = "{" + ",".join(all_opts) + "}"
|
|
138
276
|
os.environ["MS_ENABLE_RECOVERY"] = "1"
|
|
139
277
|
|
|
140
|
-
mode = context.get_context("mode")
|
|
141
278
|
device_target = context.get_context("device_target")
|
|
142
|
-
if device_target != "Ascend"
|
|
143
|
-
logger.warning(f"MindIO adataper only support on Ascend device
|
|
144
|
-
f"device:{device_target}, run mode: {mode}")
|
|
279
|
+
if device_target != "Ascend":
|
|
280
|
+
logger.warning(f"MindIO adataper only support on Ascend device but got device {device_target}!")
|
|
145
281
|
return
|
|
146
282
|
|
|
147
283
|
ctrl_port = int(os.getenv("MS_TFT_PORT"))
|
|
@@ -154,7 +290,7 @@ class TftHandle:
|
|
|
154
290
|
from mindio_ttp import framework_ttp as tft
|
|
155
291
|
self.tft = tft
|
|
156
292
|
except BaseException as e:
|
|
157
|
-
raise ModuleNotFoundError(f"Module
|
|
293
|
+
raise ModuleNotFoundError(f"Module not found. Detail info {str(e)}")
|
|
158
294
|
world_size = int(os.getenv("MS_WORKER_NUM")) # from msrun
|
|
159
295
|
cur_rank = int(os.getenv("MS_NODE_ID")) # from msrun
|
|
160
296
|
enable_local_copy = False
|
|
@@ -181,7 +317,7 @@ class TftHandle:
|
|
|
181
317
|
if self.tft.tft_is_reboot_node():
|
|
182
318
|
logger.warning("tft report reboot init finish ")
|
|
183
319
|
tft.tft_report_error(tft.ReportState.RS_INIT_FINISH.value)
|
|
184
|
-
|
|
320
|
+
set_is_reboot_node(True)
|
|
185
321
|
ret = tft.tft_wait_next_action()
|
|
186
322
|
if ret != tft.Action.RETRY.value:
|
|
187
323
|
raise RuntimeError(f"ARF init failed!")
|
mindspore/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.7.
|
|
1
|
+
__version__ = '2.7.1'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mindspore
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.1
|
|
4
4
|
Summary: MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.
|
|
5
5
|
Home-page: https://www.mindspore.cn
|
|
6
6
|
Download-URL: https://github.com/mindspore-ai/mindspore/tags
|
|
@@ -319,8 +319,9 @@ Project stable branches will be in one of the following states:
|
|
|
319
319
|
|
|
320
320
|
## Maintenance status
|
|
321
321
|
|
|
322
|
-
| **Version**| **Status**
|
|
322
|
+
| **Version**| **Status** | **Initial Release Date** | **Next Phase** | **EOL Date**|
|
|
323
323
|
|------------|--------------|--------------------------|----------------------------------------|-------------|
|
|
324
|
+
| **r2.7** | Maintained | 2025-08-08 | Unmaintained <br> 2026-08-08 estimated | 2026-08-08 |
|
|
324
325
|
| **r2.6** | Maintained | 2025-05-19 | Unmaintained <br> 2026-05-19 estimated | 2026-05-19 |
|
|
325
326
|
| **r2.5** | Maintained | 2025-02-08 | Unmaintained <br> 2026-02-08 estimated | 2026-02-08 |
|
|
326
327
|
| **r2.4** | Maintained | 2024-10-30 | Unmaintained <br> 2025-10-30 estimated | 2025-10-30 |
|