mindspore 2.6.0rc1__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +65 -84
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +58 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +178 -53
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +377 -203
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +117 -131
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +67 -55
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +70 -24
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +429 -23
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +140 -104
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +491 -623
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +4 -6
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +54 -13
- mindspore/ops/auto_generate/gen_extend_func.py +27 -145
- mindspore/ops/auto_generate/gen_ops_def.py +1027 -347
- mindspore/ops/auto_generate/gen_ops_prim.py +2341 -1117
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +19 -102
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +77 -572
- mindspore/ops/function/nn_func.py +46 -94
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +4 -4
- mindspore/ops/functional_overload.py +594 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +5 -51
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +303 -177
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +8 -40
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +63 -15
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +16 -23
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +35 -14
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +13 -7
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +12 -12
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +10 -25
- mindspore/parallel/transform_safetensors.py +469 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +31 -32
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +17 -7
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +98 -21
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +133 -69
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/RECORD +403 -442
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -369,7 +369,19 @@ class SummaryRecord:
|
|
|
369
369
|
global SUMMARY_TENSOR_CACHE
|
|
370
370
|
for tag in tags:
|
|
371
371
|
item_name = name + tag
|
|
372
|
+
time_out = 30
|
|
373
|
+
start_time = time.time()
|
|
374
|
+
last_size = len(SUMMARY_TENSOR_CACHE)
|
|
372
375
|
while item_name not in SUMMARY_TENSOR_CACHE:
|
|
376
|
+
current_size = len(SUMMARY_TENSOR_CACHE)
|
|
377
|
+
if current_size != last_size:
|
|
378
|
+
start_time = time.time()
|
|
379
|
+
last_size = current_size
|
|
380
|
+
if time.time() - start_time > time_out:
|
|
381
|
+
raise RuntimeError(
|
|
382
|
+
f"For '{self.__class__.__name__}', {tag} summary op sync tag "
|
|
383
|
+
f"was not received within {time_out} seconds, indicating potential mbuf issues."
|
|
384
|
+
)
|
|
373
385
|
time.sleep(0.004)
|
|
374
386
|
|
|
375
387
|
with _summary_lock:
|
|
@@ -416,8 +428,7 @@ class SummaryRecord:
|
|
|
416
428
|
if graph_proto is None and train_network is not None:
|
|
417
429
|
graph_proto = _cell_graph_executor.get_optimize_graph_proto(train_network)
|
|
418
430
|
if graph_proto is None:
|
|
419
|
-
|
|
420
|
-
logger.error("Failed to get proto for graph.")
|
|
431
|
+
logger.warning("Failed to get proto for graph.")
|
|
421
432
|
else:
|
|
422
433
|
self._event_writer.write({'graph': [{'step': step, 'value': graph_proto}]})
|
|
423
434
|
self._status['has_graph'] = True
|
|
@@ -29,7 +29,7 @@ from mindspore import nn
|
|
|
29
29
|
from mindspore.train.model import Model
|
|
30
30
|
from mindspore.train.dataset_helper import connect_network_with_dataset
|
|
31
31
|
from mindspore.parallel._utils import _need_to_full, _to_full_tensor
|
|
32
|
-
from mindspore.common.dtype import
|
|
32
|
+
from mindspore.common.dtype import _pytype_to_dtype
|
|
33
33
|
from mindspore._c_expression import init_exec_dataset
|
|
34
34
|
from mindspore.train.train_thor.dataset_helper import DatasetHelper
|
|
35
35
|
|
|
@@ -46,7 +46,7 @@ def _convert_to_ms_type(types):
|
|
|
46
46
|
"""
|
|
47
47
|
ms_types = []
|
|
48
48
|
for numpy_type in types:
|
|
49
|
-
ms_type =
|
|
49
|
+
ms_type = _pytype_to_dtype(numpy_type) # pylint:disable=protected-access
|
|
50
50
|
ms_types.append(ms_type)
|
|
51
51
|
return ms_types
|
|
52
52
|
|
mindspore/turbojpeg.dll
CHANGED
|
Binary file
|
mindspore/utils/__init__.py
CHANGED
|
@@ -16,9 +16,10 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
from mindspore._c_expression import stress_detect, _reuse_data_ptr
|
|
18
18
|
from .utils import ExitByRequest, _tft_handler
|
|
19
|
-
from .runtime_execution_order_check import runtime_execution_order_check
|
|
19
|
+
from .runtime_execution_order_check import runtime_execution_order_check, comm_exec_order_check
|
|
20
|
+
from .sdc_detect import sdc_detect_start, sdc_detect_stop, get_sdc_detect_result
|
|
20
21
|
from . import dryrun
|
|
21
22
|
|
|
22
23
|
# Symbols from utils module.
|
|
23
24
|
__all__ = ["stress_detect", "ExitByRequest", "runtime_execution_order_check", "dryrun", "_reuse_data_ptr",
|
|
24
|
-
"_tft_handler"]
|
|
25
|
+
"_tft_handler", "comm_exec_order_check", "sdc_detect_start", "sdc_detect_stop", "get_sdc_detect_result"]
|
mindspore/utils/dryrun.py
CHANGED
|
@@ -17,7 +17,6 @@ import traceback
|
|
|
17
17
|
import os
|
|
18
18
|
from mindspore.common import Tensor
|
|
19
19
|
from mindspore import log as logger
|
|
20
|
-
from mindspore.common._stub_tensor import StubTensor
|
|
21
20
|
from mindspore.common import dtype as mstype
|
|
22
21
|
from mindspore._checkparam import is_stub_tensor
|
|
23
22
|
|
|
@@ -78,12 +77,7 @@ def set_simulation():
|
|
|
78
77
|
Tensor._getitem = obj.inject(Tensor._getitem)
|
|
79
78
|
Tensor.is_contiguous = obj.inject(Tensor.is_contiguous)
|
|
80
79
|
Tensor.flush_from_cache = obj.inject(Tensor.flush_from_cache)
|
|
81
|
-
StubTensor.asnumpy = obj.inject(StubTensor.asnumpy)
|
|
82
|
-
StubTensor._getitem = obj.inject(StubTensor._getitem)
|
|
83
|
-
StubTensor.is_contiguous = obj.inject(StubTensor.is_contiguous)
|
|
84
|
-
StubTensor.flush_from_cache = obj.inject(StubTensor.flush_from_cache)
|
|
85
80
|
Tensor.__str__ = no_inject_traceback_for_print
|
|
86
|
-
StubTensor.__str__ = no_inject_traceback_for_print
|
|
87
81
|
Tensor.tolist = obj.inject(Tensor.tolist)
|
|
88
82
|
Tensor.__int__ = obj.inject(Tensor.__int__)
|
|
89
83
|
Tensor.__float__ = obj.inject(Tensor.__float__)
|
|
@@ -22,9 +22,86 @@ from multiprocessing import cpu_count
|
|
|
22
22
|
from typing import List, Dict, Union, Optional
|
|
23
23
|
import sys
|
|
24
24
|
import mindspore.log as logger
|
|
25
|
+
from mindspore._c_expression import CommExecOrderChecker
|
|
25
26
|
|
|
26
27
|
# Set Recursion Depth Limit
|
|
27
28
|
sys.setrecursionlimit(10000)
|
|
29
|
+
# support hccl group 150000 card
|
|
30
|
+
csv.field_size_limit(1024 * 1024 * 10)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def comm_exec_order_check(action):
|
|
34
|
+
"""
|
|
35
|
+
Call the CommExecOrderCheck class to start the collection of communication operator execution sequences
|
|
36
|
+
or stop the collection and validate the execution order.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
action (str): Control command - 'start' to begin collection, 'end' to stop and validate.
|
|
40
|
+
|
|
41
|
+
Supported Platforms:
|
|
42
|
+
``Ascend``
|
|
43
|
+
|
|
44
|
+
Examples:
|
|
45
|
+
>>> import mindspore as ms
|
|
46
|
+
>>> from mindspore.utils import comm_exec_order_check
|
|
47
|
+
>>> comm_exec_order_check("start")
|
|
48
|
+
>>> model.train(1, train_dataset)
|
|
49
|
+
>>> comm_exec_order_check("end")
|
|
50
|
+
"""
|
|
51
|
+
if not isinstance(action, str):
|
|
52
|
+
raise TypeError("The 'action' parameter must be a string.")
|
|
53
|
+
checker = CommExecOrderCheck()
|
|
54
|
+
checker(action)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class CommExecOrderCheck:
|
|
58
|
+
"""Controller for communication execution order verification.
|
|
59
|
+
|
|
60
|
+
Provides interface for starting/stopping the collection of communication
|
|
61
|
+
operator execution sequences. Integrates with C++ backend for actual
|
|
62
|
+
order tracking.
|
|
63
|
+
"""
|
|
64
|
+
_instance = None
|
|
65
|
+
|
|
66
|
+
def __new__(cls, *args, **kwargs):
|
|
67
|
+
if cls._instance is None:
|
|
68
|
+
cls._instance = super().__new__(cls)
|
|
69
|
+
return cls._instance
|
|
70
|
+
|
|
71
|
+
def __init__(self):
|
|
72
|
+
if not hasattr(self, 'initialized'):
|
|
73
|
+
self.action = None
|
|
74
|
+
self.order_checker = CommExecOrderChecker.get_instance()
|
|
75
|
+
self.is_collecting = False
|
|
76
|
+
self.initialized = True
|
|
77
|
+
|
|
78
|
+
def __call__(self, action):
|
|
79
|
+
"""
|
|
80
|
+
Args:
|
|
81
|
+
action (str): Control command - 'start' to begin collection,
|
|
82
|
+
'end' to stop and validate
|
|
83
|
+
"""
|
|
84
|
+
self.action = action
|
|
85
|
+
if action == "start":
|
|
86
|
+
self.start_function()
|
|
87
|
+
elif action == "end":
|
|
88
|
+
self.end_function()
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError("Invalid action. Please use 'start' or 'end'.")
|
|
91
|
+
|
|
92
|
+
def start_function(self):
|
|
93
|
+
if self.is_collecting:
|
|
94
|
+
logger.error("The 'start' action cannot be called twice.")
|
|
95
|
+
return
|
|
96
|
+
self.is_collecting = True
|
|
97
|
+
self.order_checker.start_collect_exec_order()
|
|
98
|
+
|
|
99
|
+
def end_function(self):
|
|
100
|
+
if not self.is_collecting:
|
|
101
|
+
logger.error("The 'end' action cannot be called before the 'start' action.")
|
|
102
|
+
return
|
|
103
|
+
self.is_collecting = False
|
|
104
|
+
self.order_checker.stop_collect_exec_order()
|
|
28
105
|
|
|
29
106
|
|
|
30
107
|
class ExecuteOrder:
|
|
@@ -52,12 +129,12 @@ class ExecuteOrder:
|
|
|
52
129
|
comm_str = ",".join(self.comm_rank)
|
|
53
130
|
return f"{self.primitive}_{self.group}_({comm_str})"
|
|
54
131
|
|
|
55
|
-
if self.primitive
|
|
132
|
+
if self.primitive in ["Send", "DistCommIsend", "InnerCommIsend"]:
|
|
56
133
|
# Unique base key of the Send operation.
|
|
57
134
|
return f"Send_Receive_{self.group}_({rank})->({self.dest_rank})_{self.input_shape}"
|
|
58
135
|
|
|
59
|
-
if self.primitive
|
|
60
|
-
# Unique base key of the
|
|
136
|
+
if self.primitive in ["Receive", "DistCommIrecv", "InnerCommIrecv"]:
|
|
137
|
+
# Unique base key of the Recv operation
|
|
61
138
|
return f"Send_Receive_{self.group}_({self.src_rank})->({rank})_{self.output_shape}"
|
|
62
139
|
|
|
63
140
|
# Other operations, such as broadCast
|
|
@@ -200,10 +277,14 @@ class RankFolderParser:
|
|
|
200
277
|
rank_id = os.path.basename(path).split("_")[1]
|
|
201
278
|
# Adding one more layer to access the "execute_order" folder
|
|
202
279
|
execute_order_path = os.path.join(path, "execute_order")
|
|
203
|
-
if os.path.exists(execute_order_path):
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
280
|
+
if not os.path.exists(execute_order_path):
|
|
281
|
+
raise FileNotFoundError(
|
|
282
|
+
f"Execute order folder does not exist: {execute_order_path} "
|
|
283
|
+
f"for rank_{rank_id} folder."
|
|
284
|
+
)
|
|
285
|
+
rank_result = self.parse_rank_folder(execute_order_path, rank_id)
|
|
286
|
+
if rank_result:
|
|
287
|
+
result[rank_id] = rank_result[1] # Extract execute orders
|
|
207
288
|
return result
|
|
208
289
|
|
|
209
290
|
# If the path is a directory containing rank_{x} folders, parse all
|
|
@@ -215,8 +296,12 @@ class RankFolderParser:
|
|
|
215
296
|
rank_folder_path = os.path.join(path, d)
|
|
216
297
|
execute_order_path = os.path.join(rank_folder_path, "execute_order")
|
|
217
298
|
|
|
218
|
-
if os.path.exists(execute_order_path):
|
|
219
|
-
|
|
299
|
+
if not os.path.exists(execute_order_path):
|
|
300
|
+
raise FileNotFoundError(
|
|
301
|
+
f"Execute order folder does not exist: {execute_order_path} "
|
|
302
|
+
f"for rank_{rank_id} folder."
|
|
303
|
+
)
|
|
304
|
+
futures.append(thread_executor.submit(self.parse_rank_folder, execute_order_path, rank_id))
|
|
220
305
|
|
|
221
306
|
for future in as_completed(futures):
|
|
222
307
|
try:
|
|
@@ -283,23 +368,22 @@ class RankFolderParser:
|
|
|
283
368
|
|
|
284
369
|
def modify_execute_orders(execute_orders_map: dict) -> dict:
|
|
285
370
|
"""
|
|
286
|
-
|
|
371
|
+
Modify and generate unique execution order keys for each rank.
|
|
287
372
|
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
373
|
+
This function processes a mapping of execution orders grouped by ranks. For each order,
|
|
374
|
+
it generates a unique key by combining a base key and a counter, ensuring all orders
|
|
375
|
+
are uniquely identifiable. The result is a dictionary where the keys are rank identifiers
|
|
376
|
+
and the values are lists of unique execution order keys.
|
|
292
377
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
Returns:
|
|
299
|
-
dict: A dictionary where keys are rank identifiers and values are lists of unique string
|
|
300
|
-
keys representing the modified execution orders for each rank.
|
|
378
|
+
Args:
|
|
379
|
+
execute_orders_map (dict): A dictionary where keys are rank identifiers (e.g., "rank_0")
|
|
380
|
+
and values are lists of ExecuteOrder objects. If a rank has no
|
|
381
|
+
orders, its value may be `None`.
|
|
301
382
|
|
|
302
|
-
|
|
383
|
+
Returns:
|
|
384
|
+
dict: A dictionary where keys are rank identifiers and values are lists of unique string
|
|
385
|
+
keys representing the modified execution orders for each rank.
|
|
386
|
+
"""
|
|
303
387
|
result = {}
|
|
304
388
|
|
|
305
389
|
for rank, execute_orders in execute_orders_map.items():
|
|
@@ -329,26 +413,25 @@ def modify_execute_orders(execute_orders_map: dict) -> dict:
|
|
|
329
413
|
|
|
330
414
|
def parse_and_validate(data: dict, all_rank: bool = True):
|
|
331
415
|
"""
|
|
332
|
-
|
|
416
|
+
Parse and validate execution orders in a directed graph structure.
|
|
333
417
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
418
|
+
This function checks the integrity and consistency of a given dataset, ensuring all required
|
|
419
|
+
keys are present and correctly referenced. It also validates the structure of the input data
|
|
420
|
+
and parses string values to extract meaningful components.
|
|
337
421
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
Returns:
|
|
345
|
-
None: Log error messages to the console if validation fails, otherwise completes silently.
|
|
422
|
+
Args:
|
|
423
|
+
data (dict): A dictionary where keys are string identifiers and values are lists of strings.
|
|
424
|
+
Each value represents a dependency or reference to other keys.
|
|
425
|
+
all_rank (bool): If True, checks that all elements referenced in the data are present as keys
|
|
426
|
+
in the dictionary. If False, only checks intersections.
|
|
346
427
|
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
TypeError: Raised indirectly if data contains unexpected types.
|
|
428
|
+
Returns:
|
|
429
|
+
None: Log error messages to the console if validation fails, otherwise completes silently.
|
|
350
430
|
|
|
351
|
-
|
|
431
|
+
Raises:
|
|
432
|
+
ValueError: Raised indirectly if `parse_elements` encounters malformed input strings.
|
|
433
|
+
TypeError: Raised indirectly if data contains unexpected types.
|
|
434
|
+
"""
|
|
352
435
|
def parse_elements(value: str, max_groups: int = 2) -> set:
|
|
353
436
|
"""Extract unique elements inside the first one or two parentheses from a string."""
|
|
354
437
|
groups = re.findall(r'\((.*?)\)', value)
|
|
@@ -396,9 +479,8 @@ def detect_cycle_in_graph(ranks_map):
|
|
|
396
479
|
|
|
397
480
|
Returns:
|
|
398
481
|
- tuple: (cycle_path, cycle_ranks) where cycle_path is a list of nodes forming the cycle and cycle_ranks
|
|
399
|
-
|
|
482
|
+
is a list of rank transitions corresponding to the cycle path.
|
|
400
483
|
"""
|
|
401
|
-
# Step 1: Build the directed graph and track edges with ranks
|
|
402
484
|
graph = defaultdict(list)
|
|
403
485
|
rank_edges = {}
|
|
404
486
|
|
|
@@ -408,46 +490,50 @@ def detect_cycle_in_graph(ranks_map):
|
|
|
408
490
|
graph[u].append(v)
|
|
409
491
|
rank_edges[(u, v)] = rank
|
|
410
492
|
|
|
411
|
-
# Step 2: Detect cycle using DFS with path and rank tracking
|
|
412
493
|
visited = set()
|
|
413
|
-
recursion_stack = set()
|
|
414
494
|
path = []
|
|
495
|
+
node_indices = {}
|
|
415
496
|
cycle_path = []
|
|
416
497
|
cycle_ranks = []
|
|
417
498
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
for i in range(cycle_index, len(path) - 1):
|
|
424
|
-
u, v = path[i], path[i + 1]
|
|
425
|
-
cycle_ranks.append(f"{rank_edges[(u, v)]} {u} -> {v}")
|
|
426
|
-
# Add the closing edge for the cycle
|
|
427
|
-
cycle_ranks.append(f"{rank_edges[(path[-1], node)]} {path[-1]} -> {node}")
|
|
428
|
-
return True
|
|
499
|
+
# Use a stack to simulate recursion
|
|
500
|
+
stack = []
|
|
501
|
+
for node in list(graph.keys()):
|
|
502
|
+
if node not in visited:
|
|
503
|
+
stack.append((node, False)) # (node, is_processed)
|
|
429
504
|
|
|
430
|
-
|
|
431
|
-
|
|
505
|
+
while stack:
|
|
506
|
+
current_node, is_processed = stack.pop()
|
|
432
507
|
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
508
|
+
if is_processed:
|
|
509
|
+
# Post-processing: remove node from path and indices
|
|
510
|
+
path.pop()
|
|
511
|
+
del node_indices[current_node]
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
if current_node in node_indices:
|
|
515
|
+
# Found a cycle
|
|
516
|
+
cycle_start = node_indices[current_node]
|
|
517
|
+
cycle_path = path[cycle_start:] + [current_node]
|
|
518
|
+
for i in range(cycle_start, len(path)):
|
|
519
|
+
u = path[i]
|
|
520
|
+
v = path[i + 1] if i + 1 < len(path) else current_node
|
|
521
|
+
cycle_ranks.append(f"{rank_edges[(u, v)]} {u} -> {v}")
|
|
522
|
+
return cycle_path, cycle_ranks
|
|
523
|
+
|
|
524
|
+
if current_node in visited:
|
|
525
|
+
continue
|
|
436
526
|
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
527
|
+
visited.add(current_node)
|
|
528
|
+
node_indices[current_node] = len(path)
|
|
529
|
+
path.append(current_node)
|
|
440
530
|
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
path.pop()
|
|
444
|
-
return False
|
|
531
|
+
# Mark current node as processed
|
|
532
|
+
stack.append((current_node, True))
|
|
445
533
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
if dfs(node):
|
|
450
|
-
return cycle_path, cycle_ranks
|
|
534
|
+
# Add neighbors to stack
|
|
535
|
+
for neighbor in reversed(graph[current_node]):
|
|
536
|
+
stack.append((neighbor, False))
|
|
451
537
|
|
|
452
538
|
return None, None
|
|
453
539
|
|
|
@@ -485,13 +571,13 @@ def output_cycle_results(cycle_path, cycle_ranks):
|
|
|
485
571
|
None: Outputs results to the console.
|
|
486
572
|
"""
|
|
487
573
|
if cycle_path:
|
|
488
|
-
logger.
|
|
489
|
-
logger.
|
|
490
|
-
logger.
|
|
574
|
+
logger.error("Cycle detected:")
|
|
575
|
+
logger.error(" -> ".join(cycle_path) + f" -> {cycle_path[0]}") # Close the cycle
|
|
576
|
+
logger.error("Involving ranks:")
|
|
491
577
|
for rank in cycle_ranks:
|
|
492
|
-
logger.
|
|
578
|
+
logger.error(rank)
|
|
493
579
|
else:
|
|
494
|
-
logger.warning("Check success.")
|
|
580
|
+
logger.warning("Cycle Check success. There is no cycle in the graph.")
|
|
495
581
|
|
|
496
582
|
|
|
497
583
|
def runtime_execution_order_check(folders_, all_rank=None):
|
|
@@ -529,7 +615,7 @@ def runtime_execution_order_check(folders_, all_rank=None):
|
|
|
529
615
|
if all_rank is None:
|
|
530
616
|
all_rank = determine_all_rank(folders_)
|
|
531
617
|
|
|
532
|
-
if
|
|
618
|
+
if folders_ is None: # Input validation failed
|
|
533
619
|
logger.error("Invalid input. `folders_` must be a non-empty string or a list with at least one string element.")
|
|
534
620
|
return
|
|
535
621
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright 2025 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""SDC detect."""
|
|
16
|
+
from mindspore import _c_expression
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def sdc_detect_start():
|
|
20
|
+
"""
|
|
21
|
+
Start silent data corruption detection. It will check the inputs and outputs of MatMul operations during the
|
|
22
|
+
forward and backward computations on the current device, which may increase execution time. The overhead of the
|
|
23
|
+
check time decreases as the matrix shapes increase. Starting sdc detection results in approximately 100%
|
|
24
|
+
performance degradation for a single 4096-sized MatMul computation, and approximately 90% degradation on the
|
|
25
|
+
Llama2-7B model (model parallel is 4, pipeline parallel is 2, and using qkv concatenation and ffn concatenation in
|
|
26
|
+
decoder layers).
|
|
27
|
+
|
|
28
|
+
Supported Platforms:
|
|
29
|
+
``Ascend``
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
>>> from mindspore.utils import sdc_detect_start
|
|
33
|
+
>>> sdc_detect_start()
|
|
34
|
+
"""
|
|
35
|
+
_c_expression.sdc_detect_start()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def sdc_detect_stop():
|
|
39
|
+
"""
|
|
40
|
+
Stop silent data corruption detection.
|
|
41
|
+
|
|
42
|
+
Supported Platforms:
|
|
43
|
+
``Ascend``
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
>>> from mindspore.utils import sdc_detect_stop
|
|
47
|
+
>>> sdc_detect_stop()
|
|
48
|
+
"""
|
|
49
|
+
_c_expression.sdc_detect_stop()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_sdc_detect_result():
|
|
53
|
+
"""
|
|
54
|
+
Get the result of silent data corruption detection.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
bool, indicating whether silent data corruption has occurred after detection start.
|
|
58
|
+
|
|
59
|
+
Supported Platforms:
|
|
60
|
+
``Ascend``
|
|
61
|
+
|
|
62
|
+
Examples:
|
|
63
|
+
>>> from mindspore.utils import get_sdc_detect_result
|
|
64
|
+
>>> result = get_sdc_detect_result()
|
|
65
|
+
>>> print(result)
|
|
66
|
+
False
|
|
67
|
+
"""
|
|
68
|
+
return _c_expression.get_sdc_detect_result()
|
mindspore/utils/utils.py
CHANGED
|
@@ -117,30 +117,31 @@ class TftHandle:
|
|
|
117
117
|
tft_register_mindx_callback('report_result', report_result)
|
|
118
118
|
logger.warning('Stub register mindx func success.')
|
|
119
119
|
|
|
120
|
-
def init(self,
|
|
120
|
+
def init(self, **kwargs):
|
|
121
121
|
"""
|
|
122
122
|
TFT handle init fun. Mainly used to initialize the mindio component.
|
|
123
123
|
|
|
124
124
|
Args:
|
|
125
|
-
config (Dict): configs or the training model running. If ARF is not used, this parameter can be None.
|
|
126
125
|
**kwargs: Reserved parameters.
|
|
127
126
|
"""
|
|
128
127
|
tft_env = os.getenv("MS_ENABLE_TFT", "")
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
tft_opts = ["TTP:1", "UCE:1", "HCCE:1", "ARF:1", "TSP:1"]
|
|
129
|
+
tft_enabled = any([opt in tft_env for opt in tft_opts])
|
|
130
|
+
if not tft_enabled:
|
|
131
|
+
raise ValueError("MindIO TFT regitster need custom switch on[MS_ENABLE_TFT='{%s}']!" % ",".join(tft_opts))
|
|
131
132
|
if "ARF:1" in tft_env:
|
|
132
133
|
logger.warning(f"Disable hccl watchdog when using ARF.")
|
|
133
134
|
context.set_context(ascend_config={"hccl_watchdog": False})
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
135
|
+
if "TTP:1" not in tft_env:
|
|
136
|
+
logger.warning(f"Turn on TTP config when using ARF.")
|
|
137
|
+
tft_env = tft_env.replace("{", "").replace("}", "")
|
|
138
|
+
all_opts = [part.strip() for part in tft_env.split(",")] + ["TTP:1"]
|
|
139
|
+
os.environ["MS_ENABLE_TFT"] = "{" + ",".join(all_opts) + "}"
|
|
137
140
|
os.environ["MS_ENABLE_RECOVERY"] = "1"
|
|
138
141
|
|
|
139
|
-
mode = context.get_context("mode")
|
|
140
142
|
device_target = context.get_context("device_target")
|
|
141
|
-
if device_target != "Ascend"
|
|
142
|
-
logger.warning(f"MindIO adataper only support on Ascend device
|
|
143
|
-
f"device:{device_target}, run mode: {mode}")
|
|
143
|
+
if device_target != "Ascend":
|
|
144
|
+
logger.warning(f"MindIO adataper only support on Ascend device but got device {device_target}!")
|
|
144
145
|
return
|
|
145
146
|
|
|
146
147
|
ctrl_port = int(os.getenv("MS_TFT_PORT"))
|
|
@@ -153,7 +154,7 @@ class TftHandle:
|
|
|
153
154
|
from mindio_ttp import framework_ttp as tft
|
|
154
155
|
self.tft = tft
|
|
155
156
|
except BaseException as e:
|
|
156
|
-
raise ModuleNotFoundError(f"Module
|
|
157
|
+
raise ModuleNotFoundError(f"Module not found. Detail info {str(e)}")
|
|
157
158
|
world_size = int(os.getenv("MS_WORKER_NUM")) # from msrun
|
|
158
159
|
cur_rank = int(os.getenv("MS_NODE_ID")) # from msrun
|
|
159
160
|
enable_local_copy = False
|
|
@@ -181,14 +182,10 @@ class TftHandle:
|
|
|
181
182
|
logger.warning("tft report reboot init finish ")
|
|
182
183
|
tft.tft_report_error(tft.ReportState.RS_INIT_FINISH.value)
|
|
183
184
|
_set_recovery_context(is_reboot_node=True)
|
|
184
|
-
if config is None:
|
|
185
|
-
raise ValueError(
|
|
186
|
-
f"Param 'config' can not be None when using ARF, and 'config' should be a dict type.")
|
|
187
|
-
logger.warning("tft reboot node no need load checkpoint when using ARF.")
|
|
188
|
-
config["arf_skip_load"] = True
|
|
189
185
|
ret = tft.tft_wait_next_action()
|
|
190
186
|
if ret != tft.Action.RETRY.value:
|
|
191
187
|
raise RuntimeError(f"ARF init failed!")
|
|
188
|
+
logger.warning("tft reboot success.")
|
|
192
189
|
|
|
193
190
|
|
|
194
191
|
_tft_handler = TftHandle()
|
mindspore/vcmeta.dll
CHANGED
|
Binary file
|
mindspore/vcruntime140.dll
CHANGED
|
Binary file
|
mindspore/vcruntime140_1.dll
CHANGED
|
Binary file
|
mindspore/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.
|
|
1
|
+
__version__ = '2.7.0'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mindspore
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Summary: MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.
|
|
5
5
|
Home-page: https://www.mindspore.cn
|
|
6
6
|
Download-URL: https://github.com/mindspore-ai/mindspore/tags
|
|
@@ -319,11 +319,12 @@ Project stable branches will be in one of the following states:
|
|
|
319
319
|
|
|
320
320
|
## Maintenance status
|
|
321
321
|
|
|
322
|
-
| **Version
|
|
322
|
+
| **Version**| **Status** | **Initial Release Date**| **Next Phase** | **EOL Date**|
|
|
323
323
|
|------------|--------------|--------------------------|----------------------------------------|-------------|
|
|
324
|
-
| **r2.
|
|
324
|
+
| **r2.6** | Maintained | 2025-05-19 | Unmaintained <br> 2026-05-19 estimated | 2026-05-19 |
|
|
325
|
+
| **r2.5** | Maintained | 2025-02-08 | Unmaintained <br> 2026-02-08 estimated | 2026-02-08 |
|
|
325
326
|
| **r2.4** | Maintained | 2024-10-30 | Unmaintained <br> 2025-10-30 estimated | 2025-10-30 |
|
|
326
|
-
| **r2.3** |
|
|
327
|
+
| **r2.3** | End Of Life | 2024-07-15 | | 2025-07-15 |
|
|
327
328
|
| **r2.2** | End Of Life | 2023-10-18 | | 2024-10-18 |
|
|
328
329
|
| **r2.1** | End Of Life | 2023-07-29 | | 2024-07-29 |
|
|
329
330
|
| **r2.0** | End Of Life | 2023-06-15 | | 2024-06-15 |
|