mindspore 2.6.0rc1__cp310-cp310-win_amd64.whl → 2.7.0rc1__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +37 -62
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +43 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +77 -16
- mindspore/common/api.py +238 -113
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +81 -81
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +58 -40
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +33 -3
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +212 -9
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +137 -101
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +328 -502
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +3 -3
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
- mindspore/ops/auto_generate/gen_extend_func.py +23 -141
- mindspore/ops/auto_generate/gen_ops_def.py +727 -321
- mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +9 -96
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +33 -540
- mindspore/ops/function/nn_func.py +28 -74
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +571 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +2 -2
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +294 -174
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +7 -39
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +47 -8
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +11 -8
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +14 -7
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +11 -7
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +6 -7
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +3 -4
- mindspore/parallel/transform_safetensors.py +463 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +30 -32
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +14 -4
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +97 -16
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +135 -55
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +356 -394
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -15,21 +15,45 @@
|
|
|
15
15
|
|
|
16
16
|
"""Executor manager interfaces."""
|
|
17
17
|
import subprocess
|
|
18
|
-
from dataclasses import dataclass
|
|
19
|
-
from typing import Union
|
|
20
18
|
import re
|
|
19
|
+
import os
|
|
20
|
+
import ast
|
|
21
21
|
from mindspore import log as logger
|
|
22
22
|
from mindspore import context
|
|
23
|
+
from mindspore.communication import get_local_rank_size
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def execute_command(cmd_list):
|
|
27
|
+
"""
|
|
28
|
+
Execute a system command and return its output.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
cmd_list (list): A list of strings representing the command and its arguments.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
str: The decoded standard output from the command execution.
|
|
35
|
+
"""
|
|
26
36
|
try:
|
|
27
37
|
with subprocess.Popen(cmd_list, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as p:
|
|
28
38
|
out, _ = p.communicate(timeout=1000)
|
|
29
39
|
res = out.decode()
|
|
30
40
|
return res
|
|
31
41
|
except FileNotFoundError as e:
|
|
32
|
-
raise RuntimeError(
|
|
42
|
+
raise RuntimeError("Failed to execute command") from e
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _adapt_to_dict(affinity_cpu_list):
|
|
46
|
+
"""
|
|
47
|
+
Adapt to dict type affinity_cpu_list.
|
|
48
|
+
"""
|
|
49
|
+
if not isinstance(affinity_cpu_list, dict):
|
|
50
|
+
return affinity_cpu_list
|
|
51
|
+
|
|
52
|
+
logical_device_id = context.get_context("device_id")
|
|
53
|
+
simulation_level = os.getenv("MS_SIMULATION_LEVEL", "").strip()
|
|
54
|
+
physical_device_id = _get_physical_device_id(logical_device_id, simulation_level)
|
|
55
|
+
device_key = f"device{physical_device_id}"
|
|
56
|
+
return affinity_cpu_list.get(device_key, False)
|
|
33
57
|
|
|
34
58
|
|
|
35
59
|
def _validate_affinity_cpu_list(affinity_cpu_list):
|
|
@@ -37,26 +61,49 @@ def _validate_affinity_cpu_list(affinity_cpu_list):
|
|
|
37
61
|
Validate the user-configured affinity_cpu_list.
|
|
38
62
|
|
|
39
63
|
Args:
|
|
40
|
-
affinity_cpu_list (
|
|
64
|
+
affinity_cpu_list (list): Customized bind-core strategy to be validated.
|
|
41
65
|
|
|
42
66
|
Returns:
|
|
43
67
|
None.
|
|
44
68
|
"""
|
|
45
|
-
|
|
69
|
+
if affinity_cpu_list is None:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
if not isinstance(affinity_cpu_list, list):
|
|
73
|
+
raise TypeError(f"The parameter '{affinity_cpu_list}' must be list, but got {type(affinity_cpu_list)}")
|
|
74
|
+
|
|
46
75
|
range_pattern = re.compile(r'^\d+-\d+$')
|
|
47
76
|
|
|
48
|
-
for
|
|
49
|
-
if not isinstance(
|
|
50
|
-
raise ValueError(f"
|
|
51
|
-
if not
|
|
52
|
-
raise ValueError(f"
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
77
|
+
for cpu_range in affinity_cpu_list:
|
|
78
|
+
if not isinstance(cpu_range, str):
|
|
79
|
+
raise ValueError(f"CPU range '{cpu_range}' in '{affinity_cpu_list}' should be a string.")
|
|
80
|
+
if not range_pattern.match(cpu_range):
|
|
81
|
+
raise ValueError(f"CPU range '{cpu_range}' in '{affinity_cpu_list}' should be in format 'cpuidX-cpuidY'.")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _validate_module_cpu_index(module_to_cpu_dict):
|
|
85
|
+
"""
|
|
86
|
+
Validate the user-configured module_to_cpu_dict.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
module_to_cpu_dict (dict): Customized module-to-CPU mapping to be validated.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
None.
|
|
93
|
+
"""
|
|
94
|
+
if module_to_cpu_dict is None:
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
if not isinstance(module_to_cpu_dict, dict):
|
|
98
|
+
raise TypeError(f"The parameter '{module_to_cpu_dict}' must be dict, but got {type(module_to_cpu_dict)}")
|
|
99
|
+
|
|
100
|
+
for module_name, cpu_indices in module_to_cpu_dict.items():
|
|
101
|
+
if not isinstance(cpu_indices, list):
|
|
102
|
+
raise ValueError(f"The value of module_to_cpu_dict: {cpu_indices} should be a list.")
|
|
103
|
+
for cpu_id in cpu_indices:
|
|
104
|
+
if not isinstance(cpu_id, int) or cpu_id < 0:
|
|
105
|
+
raise ValueError(f"CPU index '{cpu_id}' for module '{module_name}' in '{cpu_indices}' "
|
|
106
|
+
"should be a non-negative integer.")
|
|
60
107
|
|
|
61
108
|
|
|
62
109
|
def _get_cpu_available():
|
|
@@ -66,40 +113,40 @@ def _get_cpu_available():
|
|
|
66
113
|
Returns:
|
|
67
114
|
list: List of available CPUs on the environment.
|
|
68
115
|
"""
|
|
69
|
-
available_cpu_str = execute_command(["cat", "/sys/fs/cgroup/cpuset/cpuset.cpus"]).strip().split(",")
|
|
70
116
|
available_cpus = list()
|
|
117
|
+
|
|
118
|
+
available_cpu_str = execute_command(["cat", "/sys/fs/cgroup/cpuset/cpuset.cpus"]).strip().split(",")
|
|
71
119
|
for range_str in available_cpu_str:
|
|
72
120
|
endpoints = range_str.split("-")
|
|
73
121
|
if len(endpoints) != 2:
|
|
74
|
-
raise RuntimeError("'cat /sys/fs/cgroup/cpuset/cpuset.cpus'
|
|
75
|
-
available_cpus
|
|
122
|
+
raise RuntimeError("Failed to parse the result of executing 'cat /sys/fs/cgroup/cpuset/cpuset.cpus'.")
|
|
123
|
+
available_cpus.extend(range(int(endpoints[0]), int(endpoints[1]) + 1))
|
|
124
|
+
|
|
76
125
|
return available_cpus
|
|
77
126
|
|
|
78
127
|
|
|
79
|
-
@dataclass
|
|
80
128
|
class DeviceInfo:
|
|
81
129
|
"""
|
|
82
130
|
A class to represent information about an Ascend device.
|
|
83
131
|
|
|
84
132
|
Attributes:
|
|
85
|
-
_info_line (str): A raw string containing device information.
|
|
86
133
|
npu_id (int): The ID of the NPU.
|
|
87
134
|
chip_id (int): The ID of the chip.
|
|
88
|
-
chip_logic_id (
|
|
135
|
+
chip_logic_id (int, str): The logical ID of the chip, which can be an integer or a string.
|
|
89
136
|
chip_name (str): The name of the chip.
|
|
90
137
|
|
|
91
138
|
Methods:
|
|
92
|
-
|
|
139
|
+
_parse_info_line(info_line): Initializes the attributes by parsing input info_line.
|
|
93
140
|
"""
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
141
|
+
def __init__(self, info_line):
|
|
142
|
+
self.npu_id = 0
|
|
143
|
+
self.chip_id = 0
|
|
144
|
+
self.chip_logic_id = 0
|
|
145
|
+
self.chip_name = ""
|
|
146
|
+
self._parse_info_line(info_line)
|
|
147
|
+
|
|
148
|
+
def _parse_info_line(self, info_line):
|
|
149
|
+
self.npu_id, self.chip_id, self.chip_logic_id, self.chip_name = info_line.strip().split(None, 3)
|
|
103
150
|
self.npu_id = int(self.npu_id)
|
|
104
151
|
self.chip_id = int(self.chip_id)
|
|
105
152
|
if self.chip_logic_id.isnumeric():
|
|
@@ -114,15 +161,16 @@ def _get_device_map_info():
|
|
|
114
161
|
dict: Mapping of NPU logical ID to its details.
|
|
115
162
|
set: Contains all available NPU logical ids on the environment.
|
|
116
163
|
"""
|
|
117
|
-
device_map_info =
|
|
164
|
+
device_map_info = dict()
|
|
118
165
|
available_devices = set()
|
|
119
|
-
|
|
120
|
-
|
|
166
|
+
|
|
167
|
+
device_map = execute_command(["npu-smi", "info", "-m"]).strip().split("\n")[1:]
|
|
121
168
|
for line in device_map:
|
|
122
169
|
device_info = DeviceInfo(line.strip())
|
|
123
170
|
if isinstance(device_info.chip_logic_id, int):
|
|
124
171
|
device_map_info[device_info.chip_logic_id] = device_info
|
|
125
172
|
available_devices.add(device_info.chip_logic_id)
|
|
173
|
+
|
|
126
174
|
return device_map_info, available_devices
|
|
127
175
|
|
|
128
176
|
|
|
@@ -137,37 +185,38 @@ def _get_pcie_info(device_map_info, available_devices, keyword="PCIeBusInfo"):
|
|
|
137
185
|
Returns:
|
|
138
186
|
dict: Mapping of NPU logical ID to its PCIe number.
|
|
139
187
|
"""
|
|
140
|
-
|
|
188
|
+
device_to_pcie_map = dict()
|
|
189
|
+
|
|
141
190
|
for device in available_devices:
|
|
142
191
|
device_info = device_map_info.get(device)
|
|
143
192
|
if not device_info:
|
|
144
|
-
raise RuntimeError("
|
|
145
|
-
pcie_info =
|
|
146
|
-
|
|
147
|
-
"-c", f"{device_info.chip_id}"]).strip().split("\n")
|
|
193
|
+
raise RuntimeError("Failed to get device pcie info.")
|
|
194
|
+
pcie_info = execute_command(["npu-smi", "info", "-t", "board", "-i", f"{device_info.npu_id}",
|
|
195
|
+
"-c", f"{device_info.chip_id}"]).strip().split("\n")
|
|
148
196
|
for _ in pcie_info:
|
|
149
197
|
line = ''.join(_.split())
|
|
150
198
|
if line.startswith(keyword):
|
|
151
|
-
|
|
199
|
+
device_to_pcie_map[device] = line[len(keyword) + 1:]
|
|
152
200
|
break
|
|
153
|
-
|
|
201
|
+
|
|
202
|
+
return device_to_pcie_map
|
|
154
203
|
|
|
155
204
|
|
|
156
|
-
def _get_numa_info(
|
|
205
|
+
def _get_numa_info(device_to_pcie_map, keyword="NUMAnode"):
|
|
157
206
|
"""
|
|
158
207
|
Get NUNA node affinity for device based on PCIe.
|
|
159
208
|
|
|
160
209
|
Args:
|
|
161
|
-
|
|
210
|
+
device_to_pcie_map (dict): A map of NPU logical ID to its PCIe number.
|
|
162
211
|
|
|
163
212
|
Returns:
|
|
164
213
|
dict: Mapping of device ID to its affinity NUMA nodes.
|
|
165
214
|
dict: Mapping of NUMA node to its affinity device IDs.
|
|
166
215
|
"""
|
|
167
|
-
device_to_numa_map =
|
|
168
|
-
numa_to_device_map =
|
|
216
|
+
device_to_numa_map = dict()
|
|
217
|
+
numa_to_device_map = dict()
|
|
169
218
|
|
|
170
|
-
for device, pcie_no in
|
|
219
|
+
for device, pcie_no in device_to_pcie_map.items():
|
|
171
220
|
numa_info = execute_command(["lspci", "-s", f"{pcie_no}", "-vvv"]).strip().split("\n")
|
|
172
221
|
for _ in numa_info:
|
|
173
222
|
line = ''.join(_.split())
|
|
@@ -180,7 +229,8 @@ def _get_numa_info(device_pcie_map, keyword="NUMAnode"):
|
|
|
180
229
|
numa_to_device_map[numa_id] = list()
|
|
181
230
|
numa_to_device_map[numa_id].append(device)
|
|
182
231
|
break
|
|
183
|
-
numa_to_device_map[-1] = list(
|
|
232
|
+
numa_to_device_map[-1] = list(device_to_pcie_map.keys())
|
|
233
|
+
|
|
184
234
|
return device_to_numa_map, numa_to_device_map
|
|
185
235
|
|
|
186
236
|
|
|
@@ -211,191 +261,275 @@ def _get_cpu_info(numa_ids, available_cpus, keyword1="NUMAnode", keyword2="CPU(s
|
|
|
211
261
|
for range_str in cpu_id_ranges:
|
|
212
262
|
endpoints = range_str.split("-")
|
|
213
263
|
if len(endpoints) != 2:
|
|
214
|
-
raise RuntimeError("
|
|
264
|
+
raise RuntimeError("Failed to parse the result of executing 'lscpu'.")
|
|
215
265
|
ranges += [cid for cid in range(int(endpoints[0]), int(endpoints[1]) + 1) if cid in available_cpus]
|
|
216
266
|
if numa_id not in numa_ids:
|
|
217
267
|
numa_id = int(-1)
|
|
218
268
|
if numa_id not in numa_to_cpu_map:
|
|
219
269
|
numa_to_cpu_map[numa_id] = list()
|
|
220
270
|
numa_to_cpu_map[numa_id].extend(ranges)
|
|
271
|
+
|
|
221
272
|
return numa_to_cpu_map
|
|
222
273
|
|
|
223
274
|
|
|
224
|
-
def
|
|
275
|
+
def _get_physical_device_id(logical_device_id, simulation_level):
|
|
276
|
+
"""
|
|
277
|
+
Get physical device id from logical device id.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
logical_device_id (int): The logical device id for this process in the task.
|
|
281
|
+
simulation_level (string): Dryrun's simulation level.
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
int: The physical device id for this process in the host.
|
|
285
|
+
"""
|
|
286
|
+
env_visible_device = os.getenv("ASCEND_RT_VISIBLE_DEVICES", "").strip()
|
|
287
|
+
if context.get_context("device_target") == "Ascend" and env_visible_device and not simulation_level:
|
|
288
|
+
list_visible_device = list()
|
|
289
|
+
for item in env_visible_device.split(','):
|
|
290
|
+
list_visible_device.append(int(item))
|
|
291
|
+
list_visible_device.sort()
|
|
292
|
+
if logical_device_id >= len(list_visible_device):
|
|
293
|
+
raise RuntimeError("Device id exceeds the number of available devices.")
|
|
294
|
+
physical_device_id = list_visible_device[logical_device_id]
|
|
295
|
+
else:
|
|
296
|
+
physical_device_id = logical_device_id
|
|
297
|
+
|
|
298
|
+
return physical_device_id
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def _equal_distribution_strategy(device_count, available_cpus):
|
|
302
|
+
"""
|
|
303
|
+
Generate global bind core strategy by equally distributing available cpus.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
device_count(int): The total number of device in the task.
|
|
307
|
+
available_cpus(list): A list of cpus in the environment.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
dict: Mapping of device to its affinity CPUs.
|
|
311
|
+
"""
|
|
312
|
+
device_to_cpu_map = dict()
|
|
313
|
+
|
|
314
|
+
total_cpus = len(available_cpus)
|
|
315
|
+
cpu_num_per_device = total_cpus // device_count
|
|
316
|
+
if cpu_num_per_device < 1:
|
|
317
|
+
logger.warning(f"Available CPUs is less than 1. Will not enable bind core feature.")
|
|
318
|
+
return {}
|
|
319
|
+
|
|
320
|
+
for i in range(device_count):
|
|
321
|
+
cpu_start = i * cpu_num_per_device
|
|
322
|
+
cpu_end = (i + 1) * cpu_num_per_device if i != device_count - 1 else total_cpus
|
|
323
|
+
device_to_cpu_map[i] = available_cpus[cpu_start:cpu_end]
|
|
324
|
+
|
|
325
|
+
return device_to_cpu_map
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _assemble_env_info(available_devices, available_cpus, affinity_flag, numa_to_cpu_map, device_to_numa_map):
|
|
225
329
|
"""
|
|
226
|
-
|
|
330
|
+
Assemble all results of commands based on the hardware on the environment.
|
|
227
331
|
|
|
228
332
|
Args:
|
|
229
333
|
available_devices (list): All available NPU logical ids on the environment.
|
|
230
334
|
available_cpus (list): A list of available CPUs on the environment.
|
|
231
|
-
affinity_flag (bool): Whether or not it satisfies generating CPU affinity bind-core
|
|
232
|
-
resources on the environment.
|
|
335
|
+
affinity_flag (bool): Whether or not it satisfies generating CPU affinity bind-core
|
|
336
|
+
strategy based on the resources on the environment.
|
|
233
337
|
numa_to_cpu_map (dict): A map of NUMA node to its affinity CPUs.
|
|
234
338
|
device_to_numa_map (dict): A map of device ID to its affinity NUMA nodes.
|
|
235
339
|
|
|
236
340
|
Returns:
|
|
237
341
|
dict: Mapping of device to its affinity CPUs.
|
|
238
342
|
"""
|
|
239
|
-
device_to_cpu_map = {}
|
|
240
|
-
|
|
241
|
-
device_to_cpu_map[device_id] = list()
|
|
343
|
+
device_to_cpu_map = {device_id: [] for device_id in available_devices}
|
|
344
|
+
cpu_num_per_device = len(available_cpus) // len(available_devices)
|
|
242
345
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
if cpu_num_per_device < 7:
|
|
247
|
-
raise RuntimeError(f"Cpu num available for each device is {cpu_num_per_device}, "
|
|
248
|
-
"which is less than the minimum cpu num need. Will not enable bind core feature.")
|
|
346
|
+
if cpu_num_per_device < 1:
|
|
347
|
+
logger.warning("Available CPUs is less than 1. Will not enable bind core feature.")
|
|
348
|
+
return {}
|
|
249
349
|
|
|
250
350
|
if affinity_flag:
|
|
251
|
-
device_to_cpu_idx = {}
|
|
252
|
-
for numa_id in numa_to_cpu_map:
|
|
253
|
-
device_to_cpu_idx[numa_id] = 0
|
|
351
|
+
device_to_cpu_idx = {numa_id: 0 for numa_id in numa_to_cpu_map}
|
|
254
352
|
for device_id in available_devices:
|
|
255
|
-
numa_id = device_to_numa_map.get(device_id)
|
|
256
|
-
affinity_cpu_num = 0
|
|
257
353
|
# Prioritize the use of affinity cpu resources.
|
|
354
|
+
numa_id = device_to_numa_map.get(device_id)
|
|
258
355
|
affinity_cpu_start_idx = device_to_cpu_idx[numa_id]
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
affinity_cpu_start_idx:(affinity_cpu_start_idx + cpu_num_per_device)]
|
|
262
|
-
else:
|
|
263
|
-
affinity_cpu = numa_to_cpu_map[numa_id][affinity_cpu_start_idx:]
|
|
264
|
-
affinity_cpu_num = len(affinity_cpu)
|
|
356
|
+
affinity_cpu = numa_to_cpu_map[numa_id][
|
|
357
|
+
affinity_cpu_start_idx: affinity_cpu_start_idx + cpu_num_per_device]
|
|
265
358
|
device_to_cpu_map[device_id].extend(affinity_cpu)
|
|
266
|
-
device_to_cpu_idx[numa_id] = affinity_cpu_start_idx +
|
|
359
|
+
device_to_cpu_idx[numa_id] = affinity_cpu_start_idx + len(affinity_cpu)
|
|
360
|
+
|
|
267
361
|
# If the affinity cpu resources are insufficient then use resources from the non-affinity cpu pool.
|
|
268
|
-
if -1 in device_to_cpu_idx:
|
|
362
|
+
if -1 in device_to_cpu_idx and len(affinity_cpu) < cpu_num_per_device:
|
|
363
|
+
unaffinity_cpu_num = cpu_num_per_device - len(affinity_cpu)
|
|
269
364
|
unaffinity_cpu_start_idx = device_to_cpu_idx[-1]
|
|
270
|
-
unaffinity_cpu_num = cpu_num_per_device - affinity_cpu_num
|
|
271
365
|
unaffinity_cpu = numa_to_cpu_map[-1][
|
|
272
|
-
unaffinity_cpu_start_idx:
|
|
366
|
+
unaffinity_cpu_start_idx: unaffinity_cpu_start_idx + unaffinity_cpu_num]
|
|
273
367
|
device_to_cpu_map[device_id].extend(unaffinity_cpu)
|
|
274
368
|
device_to_cpu_idx[-1] = unaffinity_cpu_start_idx + unaffinity_cpu_num
|
|
275
369
|
else:
|
|
276
|
-
device_rank
|
|
277
|
-
for device_id in available_devices:
|
|
370
|
+
for device_rank, device_id in enumerate(available_devices):
|
|
278
371
|
cpu_start = device_rank * cpu_num_per_device
|
|
279
|
-
device_to_cpu_map[device_id] = available_cpus[cpu_start:
|
|
280
|
-
|
|
372
|
+
device_to_cpu_map[device_id] = available_cpus[cpu_start: cpu_start + cpu_num_per_device]
|
|
373
|
+
|
|
281
374
|
return device_to_cpu_map
|
|
282
375
|
|
|
283
376
|
|
|
284
|
-
def
|
|
377
|
+
def _auto_generate_strategy(device_count, available_cpus):
|
|
285
378
|
"""
|
|
286
|
-
|
|
379
|
+
Automatically generate bind-core strategy based on CPU affinity.
|
|
287
380
|
|
|
288
381
|
Args:
|
|
289
|
-
|
|
382
|
+
device_count(int): The total number of device in the task.
|
|
290
383
|
available_cpus (list): A list of available CPUs on the environment.
|
|
291
384
|
|
|
292
385
|
Returns:
|
|
293
386
|
dict: Mapping of device to its affinity CPUs.
|
|
294
387
|
"""
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
device_to_cpu_map
|
|
309
|
-
|
|
388
|
+
device_to_pcie_map = dict()
|
|
389
|
+
device_to_numa_map = dict()
|
|
390
|
+
numa_to_device_map = dict()
|
|
391
|
+
numa_to_cpu_map = dict()
|
|
392
|
+
affinity_flag = False
|
|
393
|
+
|
|
394
|
+
# Get the hardware resources in the environment. If this fails, will bind core not based on device.
|
|
395
|
+
try:
|
|
396
|
+
device_map_info, available_devices = _get_device_map_info()
|
|
397
|
+
except RuntimeError as e:
|
|
398
|
+
device_to_cpu_map = _equal_distribution_strategy(device_count, available_cpus)
|
|
399
|
+
logger.warning(f"Failed to acquire device to numa affinity info, from {e} "
|
|
400
|
+
"Will not bind core based on affinity.")
|
|
401
|
+
return device_to_cpu_map
|
|
402
|
+
|
|
403
|
+
# Get the affinity resources in the environment. If this fails, will bind core not based on affinity.
|
|
404
|
+
try:
|
|
405
|
+
device_to_pcie_map = _get_pcie_info(device_map_info, available_devices)
|
|
406
|
+
device_to_numa_map, numa_to_device_map = _get_numa_info(device_to_pcie_map)
|
|
407
|
+
numa_to_cpu_map = _get_cpu_info(list(numa_to_device_map.keys()), available_cpus)
|
|
408
|
+
except RuntimeError as e:
|
|
409
|
+
logger.warning(f"Failed to acquire device to numa affinity info, from {e} "
|
|
410
|
+
"Will not bind core based on affinity.")
|
|
411
|
+
affinity_flag = False
|
|
310
412
|
|
|
413
|
+
if device_to_pcie_map and device_to_numa_map and numa_to_device_map and numa_to_cpu_map:
|
|
414
|
+
affinity_flag = True
|
|
311
415
|
|
|
312
|
-
|
|
416
|
+
# Auto-generation of bind core strategy for Ascend.
|
|
417
|
+
try:
|
|
418
|
+
device_to_cpu_map = _assemble_env_info(available_devices, available_cpus, affinity_flag,
|
|
419
|
+
numa_to_cpu_map, device_to_numa_map)
|
|
420
|
+
return device_to_cpu_map
|
|
421
|
+
except (RuntimeError, ZeroDivisionError) as e:
|
|
422
|
+
logger.warning(f"Failed to auto generate bind core strategy, from {e} "
|
|
423
|
+
"Will not enable bind core feature.")
|
|
424
|
+
return {}
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _customize_generate_strategy(affinity_cpu_list, available_cpus):
|
|
428
|
+
"""
|
|
429
|
+
Generate customized bind-core strategy based on user-configured inputs.
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
affinity_cpu_list (list): User-configured inputs to generate customized bind-core strategy.
|
|
433
|
+
available_cpus (list): A list of available CPUs on the environment.
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
dict: Mapping of device to its affinity CPUs.
|
|
437
|
+
"""
|
|
438
|
+
cpu_list_for_device = list()
|
|
439
|
+
|
|
440
|
+
for cpu_range_str in affinity_cpu_list:
|
|
441
|
+
endpoints = cpu_range_str.split("-")
|
|
442
|
+
for cid in range(int(endpoints[0]), int(endpoints[1]) + 1):
|
|
443
|
+
if cid not in available_cpus:
|
|
444
|
+
raise RuntimeError(f"CPU id:{cid} set in affinity_cpu_list:{affinity_cpu_list} is not available.")
|
|
445
|
+
cpu_list_for_device.append(cid)
|
|
446
|
+
|
|
447
|
+
if not cpu_list_for_device:
|
|
448
|
+
logger.warning(f"Available CPUs is less than 1. Will not enable bind core feature.")
|
|
449
|
+
|
|
450
|
+
return cpu_list_for_device
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def _assign_cpu_to_module(cpu_list_for_device, module_to_cpu_dict):
|
|
313
454
|
"""
|
|
314
455
|
Assign specific CPUs to modules.
|
|
315
456
|
|
|
316
457
|
Args:
|
|
317
|
-
|
|
458
|
+
cpu_list_for_device (list): A map of device to its affinity CPUs.
|
|
459
|
+
module_to_cpu_dict (dict): A map of module to its affinity CPU index in cpu_list_for_device.
|
|
318
460
|
|
|
319
461
|
Returns:
|
|
320
462
|
dict: Mapping of device to its affinity CPUs based on module segmentation.
|
|
321
463
|
"""
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
464
|
+
module_bind_core_strategy = dict()
|
|
465
|
+
|
|
466
|
+
valid_module_names = {"main", "runtime", "pynative", "minddata"}
|
|
467
|
+
|
|
468
|
+
if module_to_cpu_dict is not None:
|
|
469
|
+
module_bind_core_strategy = {
|
|
470
|
+
module: [cpu_list_for_device[i] for i in indices if 0 <= i < len(cpu_list_for_device)]
|
|
471
|
+
for module, indices in module_to_cpu_dict.items() if module in valid_module_names
|
|
472
|
+
}
|
|
473
|
+
else:
|
|
474
|
+
module_bind_core_strategy["main"] = cpu_list_for_device
|
|
475
|
+
|
|
476
|
+
return module_bind_core_strategy
|
|
331
477
|
|
|
332
478
|
|
|
333
|
-
def
|
|
479
|
+
def _get_cpu_affinity_strategy(affinity_cpu_list=None, module_to_cpu_dict=None):
|
|
334
480
|
"""
|
|
335
|
-
The entry to get bind-core
|
|
481
|
+
The entry to get bind-core strategy.
|
|
336
482
|
|
|
337
483
|
Args:
|
|
338
|
-
affinity_cpu_list (
|
|
484
|
+
affinity_cpu_list (list, optional): User-configured CPU range to generate customized bind-core strategy.
|
|
339
485
|
Default: ``None``.
|
|
486
|
+
module_to_cpu_dict (dict, optional): User-configured module to CPU index to generate customized
|
|
487
|
+
bind-core strategy. Default: ``None``.
|
|
340
488
|
|
|
341
489
|
Returns:
|
|
342
490
|
dict: Mapping of device to its affinity CPUs based on module segmentation.
|
|
343
|
-
bool: Whether the generated bind-core policy is based on cpu affinity.
|
|
344
491
|
"""
|
|
345
492
|
device_target = context.get_context("device_target")
|
|
346
|
-
|
|
347
|
-
device_to_numa_map = {}
|
|
348
|
-
numa_to_device_map = {}
|
|
349
|
-
numa_to_cpu_map = {}
|
|
350
|
-
affinity_flag = False
|
|
351
|
-
bind_policy_flag = False
|
|
493
|
+
simulation_level = os.getenv("MS_SIMULATION_LEVEL", "").strip()
|
|
352
494
|
|
|
353
495
|
# Get the CPU resources in the environment. If this fails, the binding core feature will not be enabled.
|
|
354
496
|
try:
|
|
355
497
|
available_cpus = _get_cpu_available()
|
|
356
498
|
except RuntimeError as e:
|
|
357
|
-
logger.warning(f"Failed to acquire available cpu info,
|
|
358
|
-
return {}
|
|
359
|
-
|
|
360
|
-
if (affinity_cpu_list is None)
|
|
499
|
+
logger.warning(f"Failed to acquire available cpu info, from {e} Will not enable bind core feature.")
|
|
500
|
+
return {}
|
|
501
|
+
|
|
502
|
+
if (affinity_cpu_list is not None) and (affinity_cpu_list):
|
|
503
|
+
# User configured bind-core strategy.
|
|
504
|
+
cpu_list_for_device = _customize_generate_strategy(affinity_cpu_list, available_cpus)
|
|
505
|
+
else:
|
|
506
|
+
# Automatic generation of bind-core strategy based on resources on the environment.
|
|
507
|
+
env_msrun_cpu_list = os.getenv("MSRUN_CPU_LIST")
|
|
508
|
+
if env_msrun_cpu_list:
|
|
509
|
+
module_bind_core_strategy = _assign_cpu_to_module(ast.literal_eval(env_msrun_cpu_list), module_to_cpu_dict)
|
|
510
|
+
logger.warning(f"Module bind core policy from msrun: {module_bind_core_strategy}.")
|
|
511
|
+
return module_bind_core_strategy
|
|
512
|
+
try:
|
|
513
|
+
logical_device_id = context.get_context("device_id")
|
|
514
|
+
device_count = get_local_rank_size()
|
|
515
|
+
physical_device_id = _get_physical_device_id(logical_device_id, simulation_level)
|
|
516
|
+
except RuntimeError as e:
|
|
517
|
+
logger.warning(f"Fail to get device_id or device_count, from {e} Will not enable bind core feature.")
|
|
518
|
+
return {}
|
|
361
519
|
# If the device target is Ascend, the affinity between the device and NUMA node is taken into account
|
|
362
|
-
# to generate the binding core
|
|
363
|
-
if device_target == "Ascend":
|
|
364
|
-
|
|
365
|
-
try:
|
|
366
|
-
device_map_info, available_devices = _get_device_map_info()
|
|
367
|
-
except RuntimeError as e:
|
|
368
|
-
logger.warning(f"Failed to acquire device to numa affinity info, error: {e} "
|
|
369
|
-
"Will not bind core based on affinity. Module bind core policy "
|
|
370
|
-
f"generated: {available_cpus}.")
|
|
371
|
-
return available_cpus, bind_policy_flag
|
|
372
|
-
# Get the affinity resources in the environment. If this fails, will bind core not based on affinity.
|
|
373
|
-
try:
|
|
374
|
-
device_pcie_map = _get_pcie_info(device_map_info, available_devices)
|
|
375
|
-
device_to_numa_map, numa_to_device_map = _get_numa_info(device_pcie_map)
|
|
376
|
-
numa_to_cpu_map = _get_cpu_info(list(numa_to_device_map.keys()), available_cpus)
|
|
377
|
-
except RuntimeError as e:
|
|
378
|
-
logger.warning(f"Failed to acquire device to numa affinity info, error: {e} "
|
|
379
|
-
"Will not bind core based on affinity.")
|
|
380
|
-
affinity_flag = False
|
|
381
|
-
if device_pcie_map and device_to_numa_map and numa_to_device_map and numa_to_cpu_map:
|
|
382
|
-
affinity_flag = True
|
|
383
|
-
# Auto-generation of bind core policy for Ascned.
|
|
384
|
-
try:
|
|
385
|
-
device_to_cpu_map = _auto_generate_policy(available_devices, available_cpus, affinity_flag,
|
|
386
|
-
numa_to_cpu_map, device_to_numa_map)
|
|
387
|
-
except (RuntimeError, ZeroDivisionError) as e:
|
|
388
|
-
logger.warning(f"Failed to auto generate bind core policy, error: {e}. "
|
|
389
|
-
"Will not enable bind core feature.")
|
|
390
|
-
return {}, False
|
|
391
|
-
module_bind_core_policy = _assign_cpu_to_module(device_to_cpu_map)
|
|
392
|
-
bind_policy_flag = True
|
|
520
|
+
# to generate the binding core strategy.
|
|
521
|
+
if device_target == "Ascend" and not simulation_level:
|
|
522
|
+
device_to_cpu_map = _auto_generate_strategy(device_count, available_cpus)
|
|
393
523
|
else:
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
524
|
+
device_to_cpu_map = _equal_distribution_strategy(device_count, available_cpus)
|
|
525
|
+
# Get cpu_list for this process according to global device_to_cpu_map.
|
|
526
|
+
cpu_list_for_device = device_to_cpu_map.get(physical_device_id, [])
|
|
527
|
+
# cpu_list_for_device is empty, indicating that the basic conditions have not been met
|
|
528
|
+
# to enable the thread bind core feature.
|
|
529
|
+
if not cpu_list_for_device:
|
|
530
|
+
return {}
|
|
531
|
+
|
|
532
|
+
module_bind_core_strategy = _assign_cpu_to_module(cpu_list_for_device, module_to_cpu_dict)
|
|
533
|
+
logger.warning(f"Module bind core policy generated: {module_bind_core_strategy}.")
|
|
534
|
+
|
|
535
|
+
return module_bind_core_strategy
|