mindspore 2.6.0__cp310-cp310-win_amd64.whl → 2.7.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +64 -83
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +47 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +177 -52
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +338 -208
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +84 -133
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +47 -38
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +69 -23
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +425 -19
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +125 -101
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +488 -620
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +2 -4
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
- mindspore/ops/auto_generate/gen_extend_func.py +5 -55
- mindspore/ops/auto_generate/gen_ops_def.py +753 -273
- mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +17 -100
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +65 -399
- mindspore/ops/function/nn_func.py +44 -61
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +486 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +4 -50
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +244 -175
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +6 -7
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +52 -11
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +9 -17
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +33 -12
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +5 -1
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +7 -6
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +9 -23
- mindspore/parallel/transform_safetensors.py +468 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +22 -30
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +9 -5
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +72 -18
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +49 -47
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
|
@@ -369,7 +369,19 @@ class SummaryRecord:
|
|
|
369
369
|
global SUMMARY_TENSOR_CACHE
|
|
370
370
|
for tag in tags:
|
|
371
371
|
item_name = name + tag
|
|
372
|
+
time_out = 30
|
|
373
|
+
start_time = time.time()
|
|
374
|
+
last_size = len(SUMMARY_TENSOR_CACHE)
|
|
372
375
|
while item_name not in SUMMARY_TENSOR_CACHE:
|
|
376
|
+
current_size = len(SUMMARY_TENSOR_CACHE)
|
|
377
|
+
if current_size != last_size:
|
|
378
|
+
start_time = time.time()
|
|
379
|
+
last_size = current_size
|
|
380
|
+
if time.time() - start_time > time_out:
|
|
381
|
+
raise RuntimeError(
|
|
382
|
+
f"For '{self.__class__.__name__}', {tag} summary op sync tag "
|
|
383
|
+
f"was not received within {time_out} seconds, indicating potential mbuf issues."
|
|
384
|
+
)
|
|
373
385
|
time.sleep(0.004)
|
|
374
386
|
|
|
375
387
|
with _summary_lock:
|
|
@@ -416,8 +428,7 @@ class SummaryRecord:
|
|
|
416
428
|
if graph_proto is None and train_network is not None:
|
|
417
429
|
graph_proto = _cell_graph_executor.get_optimize_graph_proto(train_network)
|
|
418
430
|
if graph_proto is None:
|
|
419
|
-
|
|
420
|
-
logger.error("Failed to get proto for graph.")
|
|
431
|
+
logger.warning("Failed to get proto for graph.")
|
|
421
432
|
else:
|
|
422
433
|
self._event_writer.write({'graph': [{'step': step, 'value': graph_proto}]})
|
|
423
434
|
self._status['has_graph'] = True
|
|
@@ -29,7 +29,7 @@ from mindspore import nn
|
|
|
29
29
|
from mindspore.train.model import Model
|
|
30
30
|
from mindspore.train.dataset_helper import connect_network_with_dataset
|
|
31
31
|
from mindspore.parallel._utils import _need_to_full, _to_full_tensor
|
|
32
|
-
from mindspore.common.dtype import
|
|
32
|
+
from mindspore.common.dtype import _pytype_to_dtype
|
|
33
33
|
from mindspore._c_expression import init_exec_dataset
|
|
34
34
|
from mindspore.train.train_thor.dataset_helper import DatasetHelper
|
|
35
35
|
|
|
@@ -46,7 +46,7 @@ def _convert_to_ms_type(types):
|
|
|
46
46
|
"""
|
|
47
47
|
ms_types = []
|
|
48
48
|
for numpy_type in types:
|
|
49
|
-
ms_type =
|
|
49
|
+
ms_type = _pytype_to_dtype(numpy_type) # pylint:disable=protected-access
|
|
50
50
|
ms_types.append(ms_type)
|
|
51
51
|
return ms_types
|
|
52
52
|
|
mindspore/turbojpeg.dll
CHANGED
|
Binary file
|
mindspore/utils/__init__.py
CHANGED
|
@@ -16,9 +16,10 @@
|
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
from mindspore._c_expression import stress_detect, _reuse_data_ptr
|
|
18
18
|
from .utils import ExitByRequest, _tft_handler
|
|
19
|
-
from .runtime_execution_order_check import runtime_execution_order_check
|
|
19
|
+
from .runtime_execution_order_check import runtime_execution_order_check, comm_exec_order_check
|
|
20
|
+
from .sdc_detect import sdc_detect_start, sdc_detect_stop, get_sdc_detect_result
|
|
20
21
|
from . import dryrun
|
|
21
22
|
|
|
22
23
|
# Symbols from utils module.
|
|
23
24
|
__all__ = ["stress_detect", "ExitByRequest", "runtime_execution_order_check", "dryrun", "_reuse_data_ptr",
|
|
24
|
-
"_tft_handler"]
|
|
25
|
+
"_tft_handler", "comm_exec_order_check", "sdc_detect_start", "sdc_detect_stop", "get_sdc_detect_result"]
|
mindspore/utils/dryrun.py
CHANGED
|
@@ -17,7 +17,6 @@ import traceback
|
|
|
17
17
|
import os
|
|
18
18
|
from mindspore.common import Tensor
|
|
19
19
|
from mindspore import log as logger
|
|
20
|
-
from mindspore.common._stub_tensor import StubTensor
|
|
21
20
|
from mindspore.common import dtype as mstype
|
|
22
21
|
from mindspore._checkparam import is_stub_tensor
|
|
23
22
|
|
|
@@ -78,12 +77,7 @@ def set_simulation():
|
|
|
78
77
|
Tensor._getitem = obj.inject(Tensor._getitem)
|
|
79
78
|
Tensor.is_contiguous = obj.inject(Tensor.is_contiguous)
|
|
80
79
|
Tensor.flush_from_cache = obj.inject(Tensor.flush_from_cache)
|
|
81
|
-
StubTensor.asnumpy = obj.inject(StubTensor.asnumpy)
|
|
82
|
-
StubTensor._getitem = obj.inject(StubTensor._getitem)
|
|
83
|
-
StubTensor.is_contiguous = obj.inject(StubTensor.is_contiguous)
|
|
84
|
-
StubTensor.flush_from_cache = obj.inject(StubTensor.flush_from_cache)
|
|
85
80
|
Tensor.__str__ = no_inject_traceback_for_print
|
|
86
|
-
StubTensor.__str__ = no_inject_traceback_for_print
|
|
87
81
|
Tensor.tolist = obj.inject(Tensor.tolist)
|
|
88
82
|
Tensor.__int__ = obj.inject(Tensor.__int__)
|
|
89
83
|
Tensor.__float__ = obj.inject(Tensor.__float__)
|
|
@@ -22,11 +22,86 @@ from multiprocessing import cpu_count
|
|
|
22
22
|
from typing import List, Dict, Union, Optional
|
|
23
23
|
import sys
|
|
24
24
|
import mindspore.log as logger
|
|
25
|
+
from mindspore._c_expression import CommExecOrderChecker
|
|
25
26
|
|
|
26
27
|
# Set Recursion Depth Limit
|
|
27
28
|
sys.setrecursionlimit(10000)
|
|
28
29
|
# support hccl group 150000 card
|
|
29
|
-
csv.field_size_limit(1024 * 1024)
|
|
30
|
+
csv.field_size_limit(1024 * 1024 * 10)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def comm_exec_order_check(action):
|
|
34
|
+
"""
|
|
35
|
+
Call the CommExecOrderCheck class to start the collection of communication operator execution sequences
|
|
36
|
+
or stop the collection and validate the execution order.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
action (str): Control command - 'start' to begin collection, 'end' to stop and validate.
|
|
40
|
+
|
|
41
|
+
Supported Platforms:
|
|
42
|
+
``Ascend``
|
|
43
|
+
|
|
44
|
+
Examples:
|
|
45
|
+
>>> import mindspore as ms
|
|
46
|
+
>>> from mindspore.utils import comm_exec_order_check
|
|
47
|
+
>>> comm_exec_order_check("start")
|
|
48
|
+
>>> model.train(1, train_dataset)
|
|
49
|
+
>>> comm_exec_order_check("end")
|
|
50
|
+
"""
|
|
51
|
+
if not isinstance(action, str):
|
|
52
|
+
raise TypeError("The 'action' parameter must be a string.")
|
|
53
|
+
checker = CommExecOrderCheck()
|
|
54
|
+
checker(action)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class CommExecOrderCheck:
|
|
58
|
+
"""Controller for communication execution order verification.
|
|
59
|
+
|
|
60
|
+
Provides interface for starting/stopping the collection of communication
|
|
61
|
+
operator execution sequences. Integrates with C++ backend for actual
|
|
62
|
+
order tracking.
|
|
63
|
+
"""
|
|
64
|
+
_instance = None
|
|
65
|
+
|
|
66
|
+
def __new__(cls, *args, **kwargs):
|
|
67
|
+
if cls._instance is None:
|
|
68
|
+
cls._instance = super().__new__(cls)
|
|
69
|
+
return cls._instance
|
|
70
|
+
|
|
71
|
+
def __init__(self):
|
|
72
|
+
if not hasattr(self, 'initialized'):
|
|
73
|
+
self.action = None
|
|
74
|
+
self.order_checker = CommExecOrderChecker.get_instance()
|
|
75
|
+
self.is_collecting = False
|
|
76
|
+
self.initialized = True
|
|
77
|
+
|
|
78
|
+
def __call__(self, action):
|
|
79
|
+
"""
|
|
80
|
+
Args:
|
|
81
|
+
action (str): Control command - 'start' to begin collection,
|
|
82
|
+
'end' to stop and validate
|
|
83
|
+
"""
|
|
84
|
+
self.action = action
|
|
85
|
+
if action == "start":
|
|
86
|
+
self.start_function()
|
|
87
|
+
elif action == "end":
|
|
88
|
+
self.end_function()
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError("Invalid action. Please use 'start' or 'end'.")
|
|
91
|
+
|
|
92
|
+
def start_function(self):
|
|
93
|
+
if self.is_collecting:
|
|
94
|
+
logger.error("The 'start' action cannot be called twice.")
|
|
95
|
+
return
|
|
96
|
+
self.is_collecting = True
|
|
97
|
+
self.order_checker.start_collect_exec_order()
|
|
98
|
+
|
|
99
|
+
def end_function(self):
|
|
100
|
+
if not self.is_collecting:
|
|
101
|
+
logger.error("The 'end' action cannot be called before the 'start' action.")
|
|
102
|
+
return
|
|
103
|
+
self.is_collecting = False
|
|
104
|
+
self.order_checker.stop_collect_exec_order()
|
|
30
105
|
|
|
31
106
|
|
|
32
107
|
class ExecuteOrder:
|
|
@@ -54,12 +129,12 @@ class ExecuteOrder:
|
|
|
54
129
|
comm_str = ",".join(self.comm_rank)
|
|
55
130
|
return f"{self.primitive}_{self.group}_({comm_str})"
|
|
56
131
|
|
|
57
|
-
if self.primitive
|
|
132
|
+
if self.primitive in ["Send", "DistCommIsend", "InnerCommIsend"]:
|
|
58
133
|
# Unique base key of the Send operation.
|
|
59
134
|
return f"Send_Receive_{self.group}_({rank})->({self.dest_rank})_{self.input_shape}"
|
|
60
135
|
|
|
61
|
-
if self.primitive
|
|
62
|
-
# Unique base key of the
|
|
136
|
+
if self.primitive in ["Receive", "DistCommIrecv", "InnerCommIrecv"]:
|
|
137
|
+
# Unique base key of the Recv operation
|
|
63
138
|
return f"Send_Receive_{self.group}_({self.src_rank})->({rank})_{self.output_shape}"
|
|
64
139
|
|
|
65
140
|
# Other operations, such as broadCast
|
|
@@ -202,10 +277,14 @@ class RankFolderParser:
|
|
|
202
277
|
rank_id = os.path.basename(path).split("_")[1]
|
|
203
278
|
# Adding one more layer to access the "execute_order" folder
|
|
204
279
|
execute_order_path = os.path.join(path, "execute_order")
|
|
205
|
-
if os.path.exists(execute_order_path):
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
280
|
+
if not os.path.exists(execute_order_path):
|
|
281
|
+
raise FileNotFoundError(
|
|
282
|
+
f"Execute order folder does not exist: {execute_order_path} "
|
|
283
|
+
f"for rank_{rank_id} folder."
|
|
284
|
+
)
|
|
285
|
+
rank_result = self.parse_rank_folder(execute_order_path, rank_id)
|
|
286
|
+
if rank_result:
|
|
287
|
+
result[rank_id] = rank_result[1] # Extract execute orders
|
|
209
288
|
return result
|
|
210
289
|
|
|
211
290
|
# If the path is a directory containing rank_{x} folders, parse all
|
|
@@ -217,8 +296,12 @@ class RankFolderParser:
|
|
|
217
296
|
rank_folder_path = os.path.join(path, d)
|
|
218
297
|
execute_order_path = os.path.join(rank_folder_path, "execute_order")
|
|
219
298
|
|
|
220
|
-
if os.path.exists(execute_order_path):
|
|
221
|
-
|
|
299
|
+
if not os.path.exists(execute_order_path):
|
|
300
|
+
raise FileNotFoundError(
|
|
301
|
+
f"Execute order folder does not exist: {execute_order_path} "
|
|
302
|
+
f"for rank_{rank_id} folder."
|
|
303
|
+
)
|
|
304
|
+
futures.append(thread_executor.submit(self.parse_rank_folder, execute_order_path, rank_id))
|
|
222
305
|
|
|
223
306
|
for future in as_completed(futures):
|
|
224
307
|
try:
|
|
@@ -285,23 +368,22 @@ class RankFolderParser:
|
|
|
285
368
|
|
|
286
369
|
def modify_execute_orders(execute_orders_map: dict) -> dict:
|
|
287
370
|
"""
|
|
288
|
-
|
|
371
|
+
Modify and generate unique execution order keys for each rank.
|
|
289
372
|
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
373
|
+
This function processes a mapping of execution orders grouped by ranks. For each order,
|
|
374
|
+
it generates a unique key by combining a base key and a counter, ensuring all orders
|
|
375
|
+
are uniquely identifiable. The result is a dictionary where the keys are rank identifiers
|
|
376
|
+
and the values are lists of unique execution order keys.
|
|
294
377
|
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
Returns:
|
|
301
|
-
dict: A dictionary where keys are rank identifiers and values are lists of unique string
|
|
302
|
-
keys representing the modified execution orders for each rank.
|
|
378
|
+
Args:
|
|
379
|
+
execute_orders_map (dict): A dictionary where keys are rank identifiers (e.g., "rank_0")
|
|
380
|
+
and values are lists of ExecuteOrder objects. If a rank has no
|
|
381
|
+
orders, its value may be `None`.
|
|
303
382
|
|
|
304
|
-
|
|
383
|
+
Returns:
|
|
384
|
+
dict: A dictionary where keys are rank identifiers and values are lists of unique string
|
|
385
|
+
keys representing the modified execution orders for each rank.
|
|
386
|
+
"""
|
|
305
387
|
result = {}
|
|
306
388
|
|
|
307
389
|
for rank, execute_orders in execute_orders_map.items():
|
|
@@ -331,26 +413,25 @@ def modify_execute_orders(execute_orders_map: dict) -> dict:
|
|
|
331
413
|
|
|
332
414
|
def parse_and_validate(data: dict, all_rank: bool = True):
|
|
333
415
|
"""
|
|
334
|
-
|
|
416
|
+
Parse and validate execution orders in a directed graph structure.
|
|
335
417
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
418
|
+
This function checks the integrity and consistency of a given dataset, ensuring all required
|
|
419
|
+
keys are present and correctly referenced. It also validates the structure of the input data
|
|
420
|
+
and parses string values to extract meaningful components.
|
|
339
421
|
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
Returns:
|
|
347
|
-
None: Log error messages to the console if validation fails, otherwise completes silently.
|
|
422
|
+
Args:
|
|
423
|
+
data (dict): A dictionary where keys are string identifiers and values are lists of strings.
|
|
424
|
+
Each value represents a dependency or reference to other keys.
|
|
425
|
+
all_rank (bool): If True, checks that all elements referenced in the data are present as keys
|
|
426
|
+
in the dictionary. If False, only checks intersections.
|
|
348
427
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
TypeError: Raised indirectly if data contains unexpected types.
|
|
428
|
+
Returns:
|
|
429
|
+
None: Log error messages to the console if validation fails, otherwise completes silently.
|
|
352
430
|
|
|
353
|
-
|
|
431
|
+
Raises:
|
|
432
|
+
ValueError: Raised indirectly if `parse_elements` encounters malformed input strings.
|
|
433
|
+
TypeError: Raised indirectly if data contains unexpected types.
|
|
434
|
+
"""
|
|
354
435
|
def parse_elements(value: str, max_groups: int = 2) -> set:
|
|
355
436
|
"""Extract unique elements inside the first one or two parentheses from a string."""
|
|
356
437
|
groups = re.findall(r'\((.*?)\)', value)
|
|
@@ -398,9 +479,8 @@ def detect_cycle_in_graph(ranks_map):
|
|
|
398
479
|
|
|
399
480
|
Returns:
|
|
400
481
|
- tuple: (cycle_path, cycle_ranks) where cycle_path is a list of nodes forming the cycle and cycle_ranks
|
|
401
|
-
|
|
482
|
+
is a list of rank transitions corresponding to the cycle path.
|
|
402
483
|
"""
|
|
403
|
-
# Step 1: Build the directed graph and track edges with ranks
|
|
404
484
|
graph = defaultdict(list)
|
|
405
485
|
rank_edges = {}
|
|
406
486
|
|
|
@@ -410,46 +490,50 @@ def detect_cycle_in_graph(ranks_map):
|
|
|
410
490
|
graph[u].append(v)
|
|
411
491
|
rank_edges[(u, v)] = rank
|
|
412
492
|
|
|
413
|
-
# Step 2: Detect cycle using DFS with path and rank tracking
|
|
414
493
|
visited = set()
|
|
415
|
-
recursion_stack = set()
|
|
416
494
|
path = []
|
|
495
|
+
node_indices = {}
|
|
417
496
|
cycle_path = []
|
|
418
497
|
cycle_ranks = []
|
|
419
498
|
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
for i in range(cycle_index, len(path) - 1):
|
|
426
|
-
u, v = path[i], path[i + 1]
|
|
427
|
-
cycle_ranks.append(f"{rank_edges[(u, v)]} {u} -> {v}")
|
|
428
|
-
# Add the closing edge for the cycle
|
|
429
|
-
cycle_ranks.append(f"{rank_edges[(path[-1], node)]} {path[-1]} -> {node}")
|
|
430
|
-
return True
|
|
499
|
+
# Use a stack to simulate recursion
|
|
500
|
+
stack = []
|
|
501
|
+
for node in list(graph.keys()):
|
|
502
|
+
if node not in visited:
|
|
503
|
+
stack.append((node, False)) # (node, is_processed)
|
|
431
504
|
|
|
432
|
-
|
|
433
|
-
|
|
505
|
+
while stack:
|
|
506
|
+
current_node, is_processed = stack.pop()
|
|
434
507
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
508
|
+
if is_processed:
|
|
509
|
+
# Post-processing: remove node from path and indices
|
|
510
|
+
path.pop()
|
|
511
|
+
del node_indices[current_node]
|
|
512
|
+
continue
|
|
513
|
+
|
|
514
|
+
if current_node in node_indices:
|
|
515
|
+
# Found a cycle
|
|
516
|
+
cycle_start = node_indices[current_node]
|
|
517
|
+
cycle_path = path[cycle_start:] + [current_node]
|
|
518
|
+
for i in range(cycle_start, len(path)):
|
|
519
|
+
u = path[i]
|
|
520
|
+
v = path[i + 1] if i + 1 < len(path) else current_node
|
|
521
|
+
cycle_ranks.append(f"{rank_edges[(u, v)]} {u} -> {v}")
|
|
522
|
+
return cycle_path, cycle_ranks
|
|
523
|
+
|
|
524
|
+
if current_node in visited:
|
|
525
|
+
continue
|
|
438
526
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
527
|
+
visited.add(current_node)
|
|
528
|
+
node_indices[current_node] = len(path)
|
|
529
|
+
path.append(current_node)
|
|
442
530
|
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
path.pop()
|
|
446
|
-
return False
|
|
531
|
+
# Mark current node as processed
|
|
532
|
+
stack.append((current_node, True))
|
|
447
533
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
if dfs(node):
|
|
452
|
-
return cycle_path, cycle_ranks
|
|
534
|
+
# Add neighbors to stack
|
|
535
|
+
for neighbor in reversed(graph[current_node]):
|
|
536
|
+
stack.append((neighbor, False))
|
|
453
537
|
|
|
454
538
|
return None, None
|
|
455
539
|
|
|
@@ -487,13 +571,13 @@ def output_cycle_results(cycle_path, cycle_ranks):
|
|
|
487
571
|
None: Outputs results to the console.
|
|
488
572
|
"""
|
|
489
573
|
if cycle_path:
|
|
490
|
-
logger.
|
|
491
|
-
logger.
|
|
492
|
-
logger.
|
|
574
|
+
logger.error("Cycle detected:")
|
|
575
|
+
logger.error(" -> ".join(cycle_path) + f" -> {cycle_path[0]}") # Close the cycle
|
|
576
|
+
logger.error("Involving ranks:")
|
|
493
577
|
for rank in cycle_ranks:
|
|
494
|
-
logger.
|
|
578
|
+
logger.error(rank)
|
|
495
579
|
else:
|
|
496
|
-
logger.warning("Check success.")
|
|
580
|
+
logger.warning("Cycle Check success. There is no cycle in the graph.")
|
|
497
581
|
|
|
498
582
|
|
|
499
583
|
def runtime_execution_order_check(folders_, all_rank=None):
|
|
@@ -531,7 +615,7 @@ def runtime_execution_order_check(folders_, all_rank=None):
|
|
|
531
615
|
if all_rank is None:
|
|
532
616
|
all_rank = determine_all_rank(folders_)
|
|
533
617
|
|
|
534
|
-
if
|
|
618
|
+
if folders_ is None: # Input validation failed
|
|
535
619
|
logger.error("Invalid input. `folders_` must be a non-empty string or a list with at least one string element.")
|
|
536
620
|
return
|
|
537
621
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Copyright 2025 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
"""SDC detect."""
|
|
16
|
+
from mindspore import _c_expression
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def sdc_detect_start():
|
|
20
|
+
"""
|
|
21
|
+
Start silent data corruption detection. It will check the inputs and outputs of MatMul operations during the
|
|
22
|
+
forward and backward computations on the current device, which may increase execution time. The overhead of the
|
|
23
|
+
check time decreases as the matrix shapes increase. Starting sdc detection results in approximately 100%
|
|
24
|
+
performance degradation for a single 4096-sized MatMul computation, and approximately 90% degradation on the
|
|
25
|
+
Llama2-7B model (model parallel is 4, pipeline parallel is 2, and using qkv concatenation and ffn concatenation in
|
|
26
|
+
decoder layers).
|
|
27
|
+
|
|
28
|
+
Supported Platforms:
|
|
29
|
+
``Ascend``
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
>>> from mindspore.utils import sdc_detect_start
|
|
33
|
+
>>> sdc_detect_start()
|
|
34
|
+
"""
|
|
35
|
+
_c_expression.sdc_detect_start()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def sdc_detect_stop():
|
|
39
|
+
"""
|
|
40
|
+
Stop silent data corruption detection.
|
|
41
|
+
|
|
42
|
+
Supported Platforms:
|
|
43
|
+
``Ascend``
|
|
44
|
+
|
|
45
|
+
Examples:
|
|
46
|
+
>>> from mindspore.utils import sdc_detect_stop
|
|
47
|
+
>>> sdc_detect_stop()
|
|
48
|
+
"""
|
|
49
|
+
_c_expression.sdc_detect_stop()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def get_sdc_detect_result():
|
|
53
|
+
"""
|
|
54
|
+
Get the result of silent data corruption detection.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
bool, indicating whether silent data corruption has occurred after detection start.
|
|
58
|
+
|
|
59
|
+
Supported Platforms:
|
|
60
|
+
``Ascend``
|
|
61
|
+
|
|
62
|
+
Examples:
|
|
63
|
+
>>> from mindspore.utils import get_sdc_detect_result
|
|
64
|
+
>>> result = get_sdc_detect_result()
|
|
65
|
+
>>> print(result)
|
|
66
|
+
False
|
|
67
|
+
"""
|
|
68
|
+
return _c_expression.get_sdc_detect_result()
|
mindspore/utils/utils.py
CHANGED
|
@@ -117,30 +117,31 @@ class TftHandle:
|
|
|
117
117
|
tft_register_mindx_callback('report_result', report_result)
|
|
118
118
|
logger.warning('Stub register mindx func success.')
|
|
119
119
|
|
|
120
|
-
def init(self,
|
|
120
|
+
def init(self, **kwargs):
|
|
121
121
|
"""
|
|
122
122
|
TFT handle init fun. Mainly used to initialize the mindio component.
|
|
123
123
|
|
|
124
124
|
Args:
|
|
125
|
-
config (Dict): configs or the training model running. If ARF is not used, this parameter can be None.
|
|
126
125
|
**kwargs: Reserved parameters.
|
|
127
126
|
"""
|
|
128
127
|
tft_env = os.getenv("MS_ENABLE_TFT", "")
|
|
129
|
-
|
|
130
|
-
|
|
128
|
+
tft_opts = ["TTP:1", "UCE:1", "HCCE:1", "ARF:1", "TSP:1"]
|
|
129
|
+
tft_enabled = any([opt in tft_env for opt in tft_opts])
|
|
130
|
+
if not tft_enabled:
|
|
131
|
+
raise ValueError("MindIO TFT regitster need custom switch on[MS_ENABLE_TFT='{%s}']!" % ",".join(tft_opts))
|
|
131
132
|
if "ARF:1" in tft_env:
|
|
132
133
|
logger.warning(f"Disable hccl watchdog when using ARF.")
|
|
133
134
|
context.set_context(ascend_config={"hccl_watchdog": False})
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
135
|
+
if "TTP:1" not in tft_env:
|
|
136
|
+
logger.warning(f"Turn on TTP config when using ARF.")
|
|
137
|
+
tft_env = tft_env.replace("{", "").replace("}", "")
|
|
138
|
+
all_opts = [part.strip() for part in tft_env.split(",")] + ["TTP:1"]
|
|
139
|
+
os.environ["MS_ENABLE_TFT"] = "{" + ",".join(all_opts) + "}"
|
|
137
140
|
os.environ["MS_ENABLE_RECOVERY"] = "1"
|
|
138
141
|
|
|
139
|
-
mode = context.get_context("mode")
|
|
140
142
|
device_target = context.get_context("device_target")
|
|
141
|
-
if device_target != "Ascend"
|
|
142
|
-
logger.warning(f"MindIO adataper only support on Ascend device
|
|
143
|
-
f"device:{device_target}, run mode: {mode}")
|
|
143
|
+
if device_target != "Ascend":
|
|
144
|
+
logger.warning(f"MindIO adataper only support on Ascend device but got device {device_target}!")
|
|
144
145
|
return
|
|
145
146
|
|
|
146
147
|
ctrl_port = int(os.getenv("MS_TFT_PORT"))
|
|
@@ -153,7 +154,7 @@ class TftHandle:
|
|
|
153
154
|
from mindio_ttp import framework_ttp as tft
|
|
154
155
|
self.tft = tft
|
|
155
156
|
except BaseException as e:
|
|
156
|
-
raise ModuleNotFoundError(f"Module
|
|
157
|
+
raise ModuleNotFoundError(f"Module not found. Detail info {str(e)}")
|
|
157
158
|
world_size = int(os.getenv("MS_WORKER_NUM")) # from msrun
|
|
158
159
|
cur_rank = int(os.getenv("MS_NODE_ID")) # from msrun
|
|
159
160
|
enable_local_copy = False
|
|
@@ -181,14 +182,10 @@ class TftHandle:
|
|
|
181
182
|
logger.warning("tft report reboot init finish ")
|
|
182
183
|
tft.tft_report_error(tft.ReportState.RS_INIT_FINISH.value)
|
|
183
184
|
_set_recovery_context(is_reboot_node=True)
|
|
184
|
-
if config is None:
|
|
185
|
-
raise ValueError(
|
|
186
|
-
f"Param 'config' can not be None when using ARF, and 'config' should be a dict type.")
|
|
187
|
-
logger.warning("tft reboot node no need load checkpoint when using ARF.")
|
|
188
|
-
config["arf_skip_load"] = True
|
|
189
185
|
ret = tft.tft_wait_next_action()
|
|
190
186
|
if ret != tft.Action.RETRY.value:
|
|
191
187
|
raise RuntimeError(f"ARF init failed!")
|
|
188
|
+
logger.warning("tft reboot success.")
|
|
192
189
|
|
|
193
190
|
|
|
194
191
|
_tft_handler = TftHandle()
|
mindspore/vcmeta.dll
CHANGED
|
Binary file
|
mindspore/vcruntime140.dll
CHANGED
|
Binary file
|
mindspore/vcruntime140_1.dll
CHANGED
|
Binary file
|
mindspore/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = '2.
|
|
1
|
+
__version__ = '2.7.0'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mindspore
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.7.0
|
|
4
4
|
Summary: MindSpore is a new open source deep learning training/inference framework that could be used for mobile, edge and cloud scenarios.
|
|
5
5
|
Home-page: https://www.mindspore.cn
|
|
6
6
|
Author: The MindSpore Authors
|
|
@@ -320,11 +320,12 @@ Project stable branches will be in one of the following states:
|
|
|
320
320
|
|
|
321
321
|
## Maintenance status
|
|
322
322
|
|
|
323
|
-
| **Version
|
|
323
|
+
| **Version**| **Status** | **Initial Release Date**| **Next Phase** | **EOL Date**|
|
|
324
324
|
|------------|--------------|--------------------------|----------------------------------------|-------------|
|
|
325
|
-
| **r2.
|
|
325
|
+
| **r2.6** | Maintained | 2025-05-19 | Unmaintained <br> 2026-05-19 estimated | 2026-05-19 |
|
|
326
|
+
| **r2.5** | Maintained | 2025-02-08 | Unmaintained <br> 2026-02-08 estimated | 2026-02-08 |
|
|
326
327
|
| **r2.4** | Maintained | 2024-10-30 | Unmaintained <br> 2025-10-30 estimated | 2025-10-30 |
|
|
327
|
-
| **r2.3** |
|
|
328
|
+
| **r2.3** | End Of Life | 2024-07-15 | | 2025-07-15 |
|
|
328
329
|
| **r2.2** | End Of Life | 2023-10-18 | | 2024-10-18 |
|
|
329
330
|
| **r2.1** | End Of Life | 2023-07-29 | | 2024-07-29 |
|
|
330
331
|
| **r2.0** | End Of Life | 2023-06-15 | | 2024-06-15 |
|