mindspore 2.6.0__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +36 -61
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +32 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +76 -15
- mindspore/common/api.py +193 -112
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +48 -83
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +38 -23
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +32 -2
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +208 -5
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +122 -98
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +325 -499
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
- mindspore/ops/auto_generate/gen_extend_func.py +1 -51
- mindspore/ops/auto_generate/gen_ops_def.py +463 -257
- mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +7 -94
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +21 -367
- mindspore/ops/function/nn_func.py +26 -41
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +0 -2
- mindspore/ops/functional_overload.py +463 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +235 -172
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +5 -6
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +36 -4
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +4 -2
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +12 -5
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +3 -1
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +1 -1
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +2 -2
- mindspore/parallel/transform_safetensors.py +462 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +21 -30
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +6 -2
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +71 -13
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +51 -33
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +329 -367
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/parallel/_tensor.py
CHANGED
|
@@ -21,9 +21,11 @@ import numpy as np
|
|
|
21
21
|
from mindspore.common.tensor import Tensor
|
|
22
22
|
from mindspore.communication.management import get_rank, get_group_size
|
|
23
23
|
from mindspore._c_expression import TensorTransform
|
|
24
|
+
from mindspore import log as logger
|
|
24
25
|
|
|
25
26
|
_tensor_transform = TensorTransform.get_instance()
|
|
26
|
-
|
|
27
|
+
COMM_TENSOR_CELL_CACHE = {}
|
|
28
|
+
RESHARD_OP_MAP_CACHE = {}
|
|
27
29
|
|
|
28
30
|
def _get_tensor_strategy(dev_mat, tensor_map):
|
|
29
31
|
"""
|
|
@@ -348,7 +350,7 @@ def _extract_layout_item(layout_item):
|
|
|
348
350
|
return dev_matrix, tensor_map, opt_shard_step, opt_shard_size
|
|
349
351
|
|
|
350
352
|
|
|
351
|
-
def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
|
|
353
|
+
def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id, enable_redist_opt=False):
|
|
352
354
|
"""
|
|
353
355
|
Transform tensor from source layout to the destination layout.
|
|
354
356
|
|
|
@@ -362,7 +364,7 @@ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
|
|
|
362
364
|
"""
|
|
363
365
|
if not isinstance(from_layout, tuple) or not isinstance(to_layout, tuple):
|
|
364
366
|
raise TypeError("The layout should be tuple! layout is {} and {}".format(from_layout, to_layout))
|
|
365
|
-
return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, rank_id)
|
|
367
|
+
return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, enable_redist_opt, rank_id)
|
|
366
368
|
|
|
367
369
|
|
|
368
370
|
def _construct_from_to_tensor_layout(from_full_tensor_shape, from_dev_matrix,
|
|
@@ -587,13 +589,15 @@ def _get_needed_rank_list_by_layouts(from_tensor_layout, to_tensor_layout, devic
|
|
|
587
589
|
return result_list
|
|
588
590
|
|
|
589
591
|
|
|
590
|
-
def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank
|
|
592
|
+
def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank,
|
|
593
|
+
enable_redist_opt=False):
|
|
591
594
|
"""
|
|
592
595
|
AllGather op: {op_name, group_ranks + axis}
|
|
593
596
|
"""
|
|
594
597
|
stack = []
|
|
595
598
|
index = 0
|
|
596
|
-
transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank
|
|
599
|
+
transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank,
|
|
600
|
+
enable_redist_opt)
|
|
597
601
|
result_map = {self_rank: transform_operators}
|
|
598
602
|
for operators in transform_operators:
|
|
599
603
|
op_name = operators[0]
|
|
@@ -606,7 +610,7 @@ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_te
|
|
|
606
610
|
for rank in group_info[1]:
|
|
607
611
|
if rank not in result_map:
|
|
608
612
|
new_transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout,
|
|
609
|
-
device_list, rank)
|
|
613
|
+
device_list, rank, enable_redist_opt)
|
|
610
614
|
result_map[rank] = new_transform_operators
|
|
611
615
|
index = 0
|
|
612
616
|
for operators in new_transform_operators:
|
|
@@ -710,8 +714,6 @@ def _apply_operator(operator_name):
|
|
|
710
714
|
Returns:
|
|
711
715
|
The data of tensor after apply operator.
|
|
712
716
|
"""
|
|
713
|
-
if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
|
|
714
|
-
numpy_data = numpy_data[:]
|
|
715
717
|
if not isinstance(numpy_data, np.ndarray):
|
|
716
718
|
raise TypeError("The data should be a numpy.ndarray.")
|
|
717
719
|
_check_operator(reshape_op)
|
|
@@ -732,10 +734,7 @@ def _apply_operator(operator_name):
|
|
|
732
734
|
raise TypeError("The data_list should be a list.")
|
|
733
735
|
new_numpy_data_list = []
|
|
734
736
|
for numpy_data in numpy_data_list:
|
|
735
|
-
|
|
736
|
-
new_numpy_data_list.append(numpy_data[:])
|
|
737
|
-
else:
|
|
738
|
-
new_numpy_data_list.append(numpy_data)
|
|
737
|
+
new_numpy_data_list.append(numpy_data)
|
|
739
738
|
numpy_data_list = new_numpy_data_list
|
|
740
739
|
_check_operator(allgather_op)
|
|
741
740
|
concat_group = allgather_op[1][:-1]
|
|
@@ -896,3 +895,159 @@ def _chunk_shape(np_tensor, strategy, depth):
|
|
|
896
895
|
output.extend(
|
|
897
896
|
_chunk_shape(ret_, strategy[len(strategy) - depth + 1:len(strategy)], depth - 1))
|
|
898
897
|
return output
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _infer_pp_op_map(from_layout, to_layout, self_rank):
|
|
901
|
+
"""
|
|
902
|
+
get the ops map for merging pp stages
|
|
903
|
+
"""
|
|
904
|
+
from_rank_list = from_layout[3]
|
|
905
|
+
to_rank_list = to_layout[3]
|
|
906
|
+
from_dev_num_in_stage = len(from_rank_list)
|
|
907
|
+
current_rank_stage_id = self_rank // from_dev_num_in_stage
|
|
908
|
+
diff_rank_id = [
|
|
909
|
+
rank_id for rank_id in to_rank_list if rank_id not in from_rank_list]
|
|
910
|
+
end_stage = from_dev_num_in_stage * (current_rank_stage_id + 1)
|
|
911
|
+
start_stage = from_dev_num_in_stage * current_rank_stage_id
|
|
912
|
+
rank_pos_in_stage = list(range(start_stage, end_stage)).index(self_rank)
|
|
913
|
+
root_idx = from_rank_list[rank_pos_in_stage]
|
|
914
|
+
broadcast_rank_list = [root_idx]
|
|
915
|
+
while rank_pos_in_stage < len(diff_rank_id):
|
|
916
|
+
broadcast_rank_list.append(diff_rank_id[rank_pos_in_stage])
|
|
917
|
+
rank_pos_in_stage += from_dev_num_in_stage
|
|
918
|
+
broadcast_rank_list.sort()
|
|
919
|
+
broadcast_map = {rank_id: [('Broadcast', root_idx, broadcast_rank_list)] for rank_id in broadcast_rank_list}
|
|
920
|
+
return broadcast_map
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _get_pipeline_operator_map(from_layout, to_layout, self_rank):
|
|
924
|
+
"""
|
|
925
|
+
If src_pp_stages is greater than dst_pp_stages, the weights of the corresponding cards need to
|
|
926
|
+
be communicated via broadcast to swap. Need to communicate src rank0's 01 to src rank2,
|
|
927
|
+
so that rank2 holds param0's data. Similarly, communicate rank1's 02 to rank3
|
|
928
|
+
rank0 01 01 11
|
|
929
|
+
rank1 02 02 12
|
|
930
|
+
pp2 -------> pp1
|
|
931
|
+
rank2 11 03 13
|
|
932
|
+
rank3 12 04 14
|
|
933
|
+
|
|
934
|
+
Args:
|
|
935
|
+
from_layout (tuple): Use tuple to present layout
|
|
936
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
937
|
+
to_layout (tuple): Use tuple to present layout
|
|
938
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
939
|
+
self_rank (int): rank_id
|
|
940
|
+
"""
|
|
941
|
+
if len(from_layout[3]) < len(to_layout[3]):
|
|
942
|
+
logger.debug(f"from {from_layout} to {to_layout} need to broadcast data across pp stages")
|
|
943
|
+
comm_tensor_cache_key = (
|
|
944
|
+
f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
|
|
945
|
+
f" -> "
|
|
946
|
+
f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
|
|
947
|
+
global COMM_TENSOR_CELL_CACHE
|
|
948
|
+
if comm_tensor_cache_key not in COMM_TENSOR_CELL_CACHE:
|
|
949
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, not match cache")
|
|
950
|
+
broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
|
|
951
|
+
broadcast_op_map_dict = {rank_id: broadcast_map for rank_id in broadcast_map.keys()}
|
|
952
|
+
COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key] = broadcast_op_map_dict
|
|
953
|
+
else:
|
|
954
|
+
comm_tensor_cache_key_rank_list = COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key]
|
|
955
|
+
if self_rank in comm_tensor_cache_key_rank_list:
|
|
956
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, match cache")
|
|
957
|
+
broadcast_map = comm_tensor_cache_key_rank_list[self_rank]
|
|
958
|
+
else:
|
|
959
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, but rank {self_rank} not match cache")
|
|
960
|
+
broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
|
|
961
|
+
for rank_id in broadcast_map.keys():
|
|
962
|
+
COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key][rank_id] = broadcast_map
|
|
963
|
+
return broadcast_map
|
|
964
|
+
logger.debug(f"from {from_layout} to {to_layout} no need to broadcast data across pp stages")
|
|
965
|
+
return {}
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def _is_multi_shard(in_tensor_map):
|
|
969
|
+
"""
|
|
970
|
+
whether the input tensor map is in multi shard
|
|
971
|
+
"""
|
|
972
|
+
for tensor_map in in_tensor_map:
|
|
973
|
+
if isinstance(tensor_map, (list, tuple)) and len(tensor_map) > 1:
|
|
974
|
+
return True
|
|
975
|
+
return False
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
|
|
979
|
+
insert_from_reshape, insert_to_reshape):
|
|
980
|
+
""" insert layout expand op reshape """
|
|
981
|
+
from_dev_matrix = from_info_tuple[0]
|
|
982
|
+
from_tensor_map = from_info_tuple[1]
|
|
983
|
+
from_full_tensor_shape = from_info_tuple[2]
|
|
984
|
+
to_dev_matrix_origin = to_info_tuple[0]
|
|
985
|
+
to_tensor_map_origin = to_info_tuple[1]
|
|
986
|
+
origin_tensor_shape = to_info_tuple[2]
|
|
987
|
+
for param_rank, _ in param_rank_map.items():
|
|
988
|
+
if insert_from_reshape:
|
|
989
|
+
from_slice_tensor_shape = ()
|
|
990
|
+
from_tensor_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
|
|
991
|
+
for i, item in enumerate(from_full_tensor_shape):
|
|
992
|
+
from_slice_tensor_shape += (item // from_tensor_strategy[i],)
|
|
993
|
+
param_rank_map.get(param_rank).insert(0, ('Reshape', list(from_slice_tensor_shape)))
|
|
994
|
+
if insert_to_reshape:
|
|
995
|
+
to_tensor_strategy = _get_tensor_strategy(to_dev_matrix_origin, to_tensor_map_origin)
|
|
996
|
+
to_slice_tensor_shape = ()
|
|
997
|
+
for i, item in enumerate(origin_tensor_shape):
|
|
998
|
+
to_slice_tensor_shape += (item // to_tensor_strategy[i],)
|
|
999
|
+
param_rank_map.get(param_rank).append(('Reshape', list(to_slice_tensor_shape)))
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def _infer_reshard_op_map(from_layout, to_layout, self_rank):
|
|
1003
|
+
"""infer reshard op map"""
|
|
1004
|
+
from_layout_without_rank_list = from_layout[:-1]
|
|
1005
|
+
to_layout_without_rank_list = to_layout[:-1]
|
|
1006
|
+
if _is_multi_shard(from_layout[1]):
|
|
1007
|
+
# ((2, 1), 1) --> (2, 1, 1) expand tensormap
|
|
1008
|
+
new_layout = _expand_layout(from_layout[0], from_layout[1], from_layout[2])
|
|
1009
|
+
from_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
|
|
1010
|
+
if _is_multi_shard(to_layout[1]):
|
|
1011
|
+
new_layout = _expand_layout(to_layout[0], to_layout[1], to_layout[2])
|
|
1012
|
+
to_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
|
|
1013
|
+
operator_map = _get_needed_rank_transform_operator_map_by_layouts(from_layout_without_rank_list,
|
|
1014
|
+
to_layout_without_rank_list,
|
|
1015
|
+
from_layout[3], self_rank,
|
|
1016
|
+
True)
|
|
1017
|
+
new_to_layout_info = to_layout[:-1]
|
|
1018
|
+
_insert_expand_layout_reshape(operator_map, from_layout_without_rank_list, new_to_layout_info,
|
|
1019
|
+
_is_multi_shard(from_layout[1]), _is_multi_shard(to_layout[1]))
|
|
1020
|
+
return operator_map
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def _get_resharding_operator_map(from_layout, to_layout, self_rank):
|
|
1024
|
+
"""
|
|
1025
|
+
Args:
|
|
1026
|
+
from_layout (tuple): Use tuple to present layout
|
|
1027
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
1028
|
+
to_layout (tuple): Use tuple to present layout
|
|
1029
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
1030
|
+
self_rank (int): rank_id
|
|
1031
|
+
"""
|
|
1032
|
+
reshard_op_cache_key = (
|
|
1033
|
+
f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
|
|
1034
|
+
f" -> "
|
|
1035
|
+
f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
|
|
1036
|
+
global RESHARD_OP_MAP_CACHE
|
|
1037
|
+
if reshard_op_cache_key not in RESHARD_OP_MAP_CACHE:
|
|
1038
|
+
operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
|
|
1039
|
+
op_map_dict = {rank_id: operator_map for rank_id in operator_map}
|
|
1040
|
+
RESHARD_OP_MAP_CACHE[reshard_op_cache_key] = op_map_dict
|
|
1041
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, not match cache")
|
|
1042
|
+
else:
|
|
1043
|
+
cache_rank_list_dict = RESHARD_OP_MAP_CACHE[reshard_op_cache_key]
|
|
1044
|
+
if self_rank in cache_rank_list_dict:
|
|
1045
|
+
operator_map = cache_rank_list_dict[self_rank]
|
|
1046
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, match cache")
|
|
1047
|
+
else:
|
|
1048
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, "
|
|
1049
|
+
f"but rank {self_rank} is not match cache")
|
|
1050
|
+
operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
|
|
1051
|
+
for rank_id in operator_map:
|
|
1052
|
+
RESHARD_OP_MAP_CACHE[reshard_op_cache_key][rank_id] = operator_map
|
|
1053
|
+
return operator_map
|
|
@@ -111,7 +111,7 @@ def _check_moe_config(moe_config=None, parallel_config=None):
|
|
|
111
111
|
"""
|
|
112
112
|
if not isinstance(moe_config, MoEConfig):
|
|
113
113
|
raise TypeError(f"'moe_config' must be an instance of MoEConfig, but got {type(moe_config).__name__}.")
|
|
114
|
-
use_moe =
|
|
114
|
+
use_moe = moe_config.expert_num > 1
|
|
115
115
|
if use_moe is False:
|
|
116
116
|
return
|
|
117
117
|
if moe_config.expert_num % parallel_config.expert_parallel != 0:
|
|
@@ -1365,7 +1365,12 @@ class MultiHeadAttention(Cell):
|
|
|
1365
1365
|
class TransformerEncoderLayer(Cell):
|
|
1366
1366
|
r"""
|
|
1367
1367
|
Transformer Encoder Layer. This is an implementation of the single layer of the transformer
|
|
1368
|
-
encoder layer, including
|
|
1368
|
+
encoder layer, mainly including Multi-Head Attention, Feed Forward, Add and LayerNorm layer.
|
|
1369
|
+
|
|
1370
|
+
The TransformerEncoderLayer structure is shown in the following figure:
|
|
1371
|
+
|
|
1372
|
+
.. image:: ../images/TransformerEncoderLayer.png
|
|
1373
|
+
:align: center
|
|
1369
1374
|
|
|
1370
1375
|
Args:
|
|
1371
1376
|
batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
|
|
@@ -1532,7 +1537,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1532
1537
|
"and parallel_config. model_parallel is {}."
|
|
1533
1538
|
.format(ffn_hidden_size, parallel_config.model_parallel))
|
|
1534
1539
|
_check_moe_config(moe_config, parallel_config)
|
|
1535
|
-
self.use_moe =
|
|
1540
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1536
1541
|
self.use_past = use_past
|
|
1537
1542
|
self.seq_length = seq_length
|
|
1538
1543
|
self.hidden_size = hidden_size
|
|
@@ -1607,7 +1612,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1607
1612
|
"and parallel_config. model_parallel is {}."
|
|
1608
1613
|
.format(ffn_hidden_size, parallel_config.model_parallel))
|
|
1609
1614
|
_check_moe_config(moe_config, parallel_config)
|
|
1610
|
-
self.use_moe =
|
|
1615
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1611
1616
|
self.use_past = use_past
|
|
1612
1617
|
self.seq_length = seq_length
|
|
1613
1618
|
self.hidden_size = hidden_size
|
|
@@ -1902,7 +1907,7 @@ class TransformerDecoderLayer(Cell):
|
|
|
1902
1907
|
parallel_config=default_dpmp_config):
|
|
1903
1908
|
super(TransformerDecoderLayer, self).__init__()
|
|
1904
1909
|
_check_moe_config(moe_config, parallel_config)
|
|
1905
|
-
self.use_moe =
|
|
1910
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1906
1911
|
config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
|
|
1907
1912
|
if batch_size or use_past:
|
|
1908
1913
|
Validator.check_positive_int(batch_size)
|
|
@@ -2440,7 +2445,7 @@ class TransformerEncoder(Cell):
|
|
|
2440
2445
|
super(TransformerEncoder, self).__init__()
|
|
2441
2446
|
_check_config(parallel_config)
|
|
2442
2447
|
_check_moe_config(moe_config, parallel_config)
|
|
2443
|
-
self.use_moe =
|
|
2448
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2444
2449
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2445
2450
|
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2446
2451
|
self.add = P.Add()
|
|
@@ -2677,7 +2682,7 @@ class TransformerDecoder(Cell):
|
|
|
2677
2682
|
super(TransformerDecoder, self).__init__()
|
|
2678
2683
|
_check_moe_config(moe_config, parallel_config)
|
|
2679
2684
|
_check_config(parallel_config)
|
|
2680
|
-
self.use_moe =
|
|
2685
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2681
2686
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2682
2687
|
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2683
2688
|
self.add = P.Add()
|
|
@@ -2959,7 +2964,7 @@ class Transformer(Cell):
|
|
|
2959
2964
|
if not lambda_func:
|
|
2960
2965
|
lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
|
|
2961
2966
|
_check_moe_config(moe_config, parallel_config)
|
|
2962
|
-
self.use_moe =
|
|
2967
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2963
2968
|
self.add = P.Add()
|
|
2964
2969
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
2965
2970
|
if encoder_layers > 0:
|
|
@@ -3026,7 +3031,7 @@ class Transformer(Cell):
|
|
|
3026
3031
|
if not lambda_func:
|
|
3027
3032
|
lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
|
|
3028
3033
|
_check_moe_config(moe_config, parallel_config)
|
|
3029
|
-
self.use_moe =
|
|
3034
|
+
self.use_moe = moe_config.expert_num > 1
|
|
3030
3035
|
self.add = P.Add().shard(((), ()))
|
|
3031
3036
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
3032
3037
|
if encoder_layers > 0:
|
|
@@ -162,6 +162,10 @@ class AutoParallel(Cell):
|
|
|
162
162
|
super(AutoParallel, self).__init__(auto_prefix=False)
|
|
163
163
|
self.network = network
|
|
164
164
|
|
|
165
|
+
if parallel_mode not in ["semi_auto", "sharding_propagation", "recursive_programming"]:
|
|
166
|
+
raise ValueError("the argument 'parallel_mode' must be one of ['semi_auto', 'sharding_propagation'," \
|
|
167
|
+
" 'recursive_programming'], but got the value : {} .".format(parallel_mode))
|
|
168
|
+
|
|
165
169
|
self._parallel_mode = parallel_mode
|
|
166
170
|
|
|
167
171
|
self._global_rank = get_rank()
|
|
@@ -260,8 +264,10 @@ class AutoParallel(Cell):
|
|
|
260
264
|
self._save_strategy_file_path = file_path
|
|
261
265
|
|
|
262
266
|
def disable_strategy_file_only_for_trainable_params(self):
|
|
263
|
-
"""
|
|
264
|
-
|
|
267
|
+
"""
|
|
268
|
+
By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
|
|
269
|
+
non-trainable parameters as well.
|
|
270
|
+
"""
|
|
265
271
|
self._only_trainable_params = False
|
|
266
272
|
|
|
267
273
|
def save_operator_strategy_file(self, file_path):
|
|
@@ -410,7 +416,7 @@ class AutoParallel(Cell):
|
|
|
410
416
|
raise ValueError("For 'AutoParallel.dataset_strategy', the argument "
|
|
411
417
|
"'config' must be 'full_batch' or 'data_parallel', but got the value : {}."
|
|
412
418
|
.format(config))
|
|
413
|
-
self._full_batch =
|
|
419
|
+
self._full_batch = config == "full_batch"
|
|
414
420
|
self._dataset_strategy_config = config
|
|
415
421
|
return
|
|
416
422
|
if not isinstance(config, tuple):
|
|
@@ -513,9 +519,10 @@ class AutoParallel(Cell):
|
|
|
513
519
|
if not isinstance(scheduler, str):
|
|
514
520
|
raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
515
521
|
"must be str type, but got the type : {}.".format(type(scheduler)))
|
|
516
|
-
if scheduler not in ("1f1b", "gpipe"):
|
|
522
|
+
if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"):
|
|
517
523
|
raise ValueError("For 'AutoParallel.pipeline', the argument "
|
|
518
|
-
"'scheduler' must be '1f1b'
|
|
524
|
+
"'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp' ," \
|
|
525
|
+
" but got the value : {}."
|
|
519
526
|
.format(scheduler))
|
|
520
527
|
self._pipeline_stages = stages
|
|
521
528
|
self._pipeline_result_broadcast = output_broadcast
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
"""Convert distributed checkpoint"""
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
__all__ = ["rank_list_for_convert", "convert_checkpoint_by_rank", "convert_checkpoints"]
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
|
|
21
|
+
transform_checkpoints
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def rank_list_for_convert(rank_id, src_strategy_file=None, dst_strategy_file=None):
|
|
@@ -582,6 +582,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
582
582
|
The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
|
|
583
583
|
too large, otherwise it may cause freezing.
|
|
584
584
|
|
|
585
|
+
This function does not support converting remove_redundancy's checkpoint file.
|
|
586
|
+
|
|
585
587
|
Args:
|
|
586
588
|
src_checkpoints_dir (str): The source checkpoints directory.
|
|
587
589
|
dst_checkpoints_dir (str): The destination checkpoints directory to save the converted checkpoints.
|
|
@@ -1187,7 +1189,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1187
1189
|
param_not_in_ckpt = []
|
|
1188
1190
|
for _, param in network.parameters_and_names():
|
|
1189
1191
|
sliced_params = []
|
|
1190
|
-
if param.name not in rank_list
|
|
1192
|
+
if param.name not in rank_list:
|
|
1191
1193
|
param_not_in_strategy.append(param.name)
|
|
1192
1194
|
continue
|
|
1193
1195
|
if param.name not in param_total_dict:
|
|
@@ -22,7 +22,8 @@ import socket
|
|
|
22
22
|
import psutil
|
|
23
23
|
import mindspore.log as logger
|
|
24
24
|
from ._utils import _generate_cmd_args_list, _generate_cmd_args_list_with_core, _generate_url, \
|
|
25
|
-
_is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip
|
|
25
|
+
_is_local_ip, _convert_addr_to_ip, _send_scale_num, _get_local_ip, _generate_auto_bind_core_strategy, \
|
|
26
|
+
_generate_bind_core_strategy
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class _Node:
|
|
@@ -79,11 +80,12 @@ class _ComputeGraphNode(_Node):
|
|
|
79
80
|
Worker node for dynamic networking. Inherits from the Node class.
|
|
80
81
|
"""
|
|
81
82
|
|
|
82
|
-
def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, args_list, output_file,
|
|
83
|
+
def __init__(self, worker_num, sched_host, sched_port, timeout, node_id, node_rank, args_list, output_file,
|
|
83
84
|
tail_worker_log, join, is_simulation):
|
|
84
85
|
super().__init__(worker_num, sched_host, sched_port, timeout, args_list, output_file,
|
|
85
86
|
tail_worker_log, join, is_simulation)
|
|
86
87
|
self.node_id = node_id
|
|
88
|
+
self.node_rank = node_rank
|
|
87
89
|
|
|
88
90
|
def run(self):
|
|
89
91
|
"""
|
|
@@ -95,6 +97,8 @@ class _ComputeGraphNode(_Node):
|
|
|
95
97
|
super().run()
|
|
96
98
|
if self.node_id is not None:
|
|
97
99
|
os.environ["MS_NODE_ID"] = str(self.node_id)
|
|
100
|
+
if self.node_rank is not None:
|
|
101
|
+
os.environ["MS_NODE_RANK"] = str(self.node_rank)
|
|
98
102
|
# If simulation level is set, environment variable 'MS_ROLE' will not be set.
|
|
99
103
|
if not self.is_simulation:
|
|
100
104
|
os.environ["MS_ROLE"] = "MS_WORKER"
|
|
@@ -119,6 +123,9 @@ class _ComputeGraphNode(_Node):
|
|
|
119
123
|
return subprocess.Popen(['/usr/bin/tail', '-f', self.output_file])
|
|
120
124
|
|
|
121
125
|
def enable_tail_worker_log(self):
|
|
126
|
+
"""
|
|
127
|
+
Get valid rank ID for tailing the corresponding worker log.
|
|
128
|
+
"""
|
|
122
129
|
tail_worker_log_list = []
|
|
123
130
|
if self.tail_worker_log != "-1":
|
|
124
131
|
tail_worker_log_list.extend([int(num) for num in self.tail_worker_log.split(',')])
|
|
@@ -169,7 +176,7 @@ class _ProcessManager:
|
|
|
169
176
|
|
|
170
177
|
self.sim_level = args.sim_level
|
|
171
178
|
self.sim_rank_id = args.sim_rank_id
|
|
172
|
-
self.is_simulation =
|
|
179
|
+
self.is_simulation = self.sim_level != -1
|
|
173
180
|
if self.is_simulation:
|
|
174
181
|
os.environ["MS_SIMULATION_LEVEL"] = str(self.sim_level)
|
|
175
182
|
elif os.getenv("MS_SIMULATION_LEVEL"):
|
|
@@ -205,15 +212,24 @@ class _ProcessManager:
|
|
|
205
212
|
finally:
|
|
206
213
|
os.umask(origin_mask)
|
|
207
214
|
|
|
215
|
+
self.device_to_cpu_map = {}
|
|
216
|
+
if self.bind_core is True:
|
|
217
|
+
self.device_to_cpu_map = _generate_auto_bind_core_strategy(self.local_worker_num)
|
|
218
|
+
|
|
208
219
|
self.proc_rank_map = {}
|
|
209
220
|
self.enable_mindx = False
|
|
221
|
+
self._check_taskd()
|
|
222
|
+
|
|
223
|
+
def _check_taskd(self):
|
|
224
|
+
"""check if enable taskd."""
|
|
210
225
|
tft_env = os.getenv("MS_ENABLE_TFT", "")
|
|
211
|
-
if (
|
|
226
|
+
if any(v in tft_env for v in ('TTP:1', 'UCE:1', 'ARF:1', 'TSP:1', 'RSC:1', 'HCCE:1')):
|
|
212
227
|
try:
|
|
213
228
|
from taskd.python.framework.agent.ms_mgr.msrun_plugin import MSRunPlugin
|
|
214
229
|
self.msmgr = MSRunPlugin()
|
|
215
230
|
self.msmgr.register_callbacks("KILL_WORKER", self.kill_workers)
|
|
216
231
|
self.msmgr.register_callbacks("START_ALL_WORKER", self.start_all_workers)
|
|
232
|
+
self.msmgr.register_callbacks("START_WORKER_LIST", self.start_worker_list)
|
|
217
233
|
self.msmgr.register_callbacks("MONITOR", self.monitor_rank_status)
|
|
218
234
|
self.enable_mindx = True
|
|
219
235
|
os.environ["MS_ENABLE_RECOVERY"] = str(1)
|
|
@@ -261,6 +277,45 @@ class _ProcessManager:
|
|
|
261
277
|
self.is_simulation)
|
|
262
278
|
self.msn_process = msn.run()
|
|
263
279
|
|
|
280
|
+
def _start_single_worker(self, local_rank):
|
|
281
|
+
"""
|
|
282
|
+
Start worker processor
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
local_rank: local rank id.
|
|
286
|
+
"""
|
|
287
|
+
os.environ["DEVICE_ID"] = str(local_rank)
|
|
288
|
+
node_id, log_name = self._get_node_id_and_log_path(local_rank)
|
|
289
|
+
if node_id is None:
|
|
290
|
+
logger.warning(f"Rank ids will be assigned automatically, "
|
|
291
|
+
"please use 'grep -rn 'rank id:' command to check each worker log's rank id.")
|
|
292
|
+
else:
|
|
293
|
+
# If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
|
|
294
|
+
# This is for rank_table method's compatibility consideration.
|
|
295
|
+
os.environ["RANK_ID"] = str(node_id)
|
|
296
|
+
print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
|
|
297
|
+
f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
|
|
298
|
+
if self.is_simulation and (self.sim_rank_id != -1):
|
|
299
|
+
# Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
|
|
300
|
+
os.environ["RANK_ID"] = str(self.sim_rank_id)
|
|
301
|
+
logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
|
|
302
|
+
|
|
303
|
+
if self.bind_core:
|
|
304
|
+
affinity_cpu_str = _generate_bind_core_strategy(local_rank, self.device_to_cpu_map, self.bind_core)
|
|
305
|
+
if affinity_cpu_str is not None:
|
|
306
|
+
cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, affinity_cpu_str)
|
|
307
|
+
else:
|
|
308
|
+
cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
|
|
309
|
+
else:
|
|
310
|
+
cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
|
|
311
|
+
cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
|
|
312
|
+
node_id, self.node_rank, cmd, log_name, self.tail_worker_log, self.join,
|
|
313
|
+
self.is_simulation)
|
|
314
|
+
process, tail_process = cgn.run()
|
|
315
|
+
self.cgn_processes.append(process)
|
|
316
|
+
self.tail_cgn_processes.append(tail_process)
|
|
317
|
+
self.proc_rank_map[local_rank] = process
|
|
318
|
+
|
|
264
319
|
def start_workers(self):
|
|
265
320
|
"""
|
|
266
321
|
Starts the worker nodes.
|
|
@@ -275,40 +330,8 @@ class _ProcessManager:
|
|
|
275
330
|
"'rank_id' of each process will be assigned after cluster is successfully built.\n"
|
|
276
331
|
"You can access 'RANK_ID' environment variable after calling "
|
|
277
332
|
"'mindspore.communication.init()'")
|
|
278
|
-
|
|
279
333
|
for i in range(self.local_worker_num):
|
|
280
|
-
|
|
281
|
-
node_id, log_name = self._get_node_id_and_log_path(i)
|
|
282
|
-
if node_id is None:
|
|
283
|
-
logger.warning(f"Rank ids will be assigned automatically, "
|
|
284
|
-
"please use 'grep -rn 'rank id:' command to check each worker log's rank id.")
|
|
285
|
-
else:
|
|
286
|
-
# If node_id is generated in '_get_node_id_and_log_path' method, export 'RANK_ID' environment variable.
|
|
287
|
-
# This is for rank_table method's compatibility consideration.
|
|
288
|
-
os.environ["RANK_ID"] = str(node_id)
|
|
289
|
-
print(f"Start worker process with rank id:{node_id}, log file:{log_name}. "
|
|
290
|
-
f"Environment variable [RANK_ID={node_id}] is exported.", flush=True)
|
|
291
|
-
if self.is_simulation and (self.sim_rank_id != -1):
|
|
292
|
-
# Reset RANK_ID env to sim_rank_id if sim_rank_id is set.
|
|
293
|
-
os.environ["RANK_ID"] = str(self.sim_rank_id)
|
|
294
|
-
logger.warning(f"In dryrun case, RANK_ID is assigned to {self.sim_rank_id}.")
|
|
295
|
-
|
|
296
|
-
if self.bind_core:
|
|
297
|
-
cpu_num = subprocess.getoutput("cat /proc/cpuinfo|grep processor|wc -l")
|
|
298
|
-
if not cpu_num.isdigit():
|
|
299
|
-
raise RuntimeError(f"Got cpu number from '/proc/cpuinfo' is {cpu_num}, failed to bind core.")
|
|
300
|
-
avg = int(cpu_num) // self.local_worker_num
|
|
301
|
-
cpu_start = avg * i
|
|
302
|
-
cpu_end = cpu_start + avg - 1
|
|
303
|
-
cmd = _generate_cmd_args_list_with_core(self.cmd, self.cmd_args, cpu_start, cpu_end)
|
|
304
|
-
else:
|
|
305
|
-
cmd = _generate_cmd_args_list(self.cmd, self.cmd_args)
|
|
306
|
-
cgn = _ComputeGraphNode(self.worker_num, self.master_addr, self.master_port, self.cluster_time_out,
|
|
307
|
-
node_id, cmd, log_name, self.tail_worker_log, self.join, self.is_simulation)
|
|
308
|
-
process, tail_process = cgn.run()
|
|
309
|
-
self.cgn_processes.append(process)
|
|
310
|
-
self.tail_cgn_processes.append(tail_process)
|
|
311
|
-
self.proc_rank_map[i] = process
|
|
334
|
+
self._start_single_worker(i)
|
|
312
335
|
|
|
313
336
|
def join_processes(self):
|
|
314
337
|
"""
|
|
@@ -334,7 +357,7 @@ class _ProcessManager:
|
|
|
334
357
|
continue
|
|
335
358
|
elif ret_code != 0:
|
|
336
359
|
has_exception = True
|
|
337
|
-
logger.error(f"Worker process {p.pid} exit with exception.")
|
|
360
|
+
logger.error(f"Worker process {p.pid} exit with exception. Error code: {ret_code}.")
|
|
338
361
|
break
|
|
339
362
|
else:
|
|
340
363
|
success_cgn_processes.add(p)
|
|
@@ -420,14 +443,9 @@ class _ProcessManager:
|
|
|
420
443
|
Args:
|
|
421
444
|
NA.
|
|
422
445
|
"""
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
p.kill()
|
|
446
|
+
self.kill_worker_processes()
|
|
447
|
+
self.kill_tail_log_processes()
|
|
426
448
|
self.cgn_processes.clear()
|
|
427
|
-
|
|
428
|
-
for p in self.tail_cgn_processes:
|
|
429
|
-
if p is not None:
|
|
430
|
-
p.kill()
|
|
431
449
|
self.tail_cgn_processes.clear()
|
|
432
450
|
|
|
433
451
|
def kill_single_worker(self, pid):
|
|
@@ -441,7 +459,7 @@ class _ProcessManager:
|
|
|
441
459
|
for i in range(len(self.cgn_processes)):
|
|
442
460
|
p = self.cgn_processes[i]
|
|
443
461
|
if p.pid == pid and p.poll() is None:
|
|
444
|
-
p.
|
|
462
|
+
os.killpg(os.getpgid(p.pid), signal.SIGKILL)
|
|
445
463
|
del self.cgn_processes[i]
|
|
446
464
|
tail_p = self.tail_cgn_processes[i]
|
|
447
465
|
if tail_p is not None:
|
|
@@ -499,7 +517,8 @@ class _ProcessManager:
|
|
|
499
517
|
p_status = p.poll()
|
|
500
518
|
if (not psutil.pid_exists(p.pid)) and (p_status != 0):
|
|
501
519
|
p_status = 300
|
|
502
|
-
return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id
|
|
520
|
+
return {"pid": p.pid, "status": p_status, "global_rank": global_rank_id, "local_rank": rank_id,
|
|
521
|
+
"node_id": self.node_rank}
|
|
503
522
|
except KeyError:
|
|
504
523
|
logger.info(f"Process rank {rank_id} has not been initialized.")
|
|
505
524
|
return {"pid": None, "status": 200, "global_rank": global_rank_id}
|
|
@@ -519,7 +538,24 @@ class _ProcessManager:
|
|
|
519
538
|
self.start_workers()
|
|
520
539
|
worker_status = self.monitor_rank_status([-1])
|
|
521
540
|
for i in range(self.local_worker_num):
|
|
522
|
-
if worker_status[i]["status"]
|
|
541
|
+
if worker_status[i]["status"] is not None:
|
|
542
|
+
return 1
|
|
543
|
+
return 0
|
|
544
|
+
|
|
545
|
+
def start_worker_list(self, rank_ids):
|
|
546
|
+
"""
|
|
547
|
+
Start worker processor by rank list.
|
|
548
|
+
|
|
549
|
+
Args:
|
|
550
|
+
rank_ids: worker process's local rank list, which is also device_id.
|
|
551
|
+
"""
|
|
552
|
+
if not isinstance(rank_ids, list):
|
|
553
|
+
raise TypeError(f"The type of 'rank_ids' must be a list, but got:{rank_ids}")
|
|
554
|
+
for idx in rank_ids:
|
|
555
|
+
self._start_single_worker(idx)
|
|
556
|
+
worker_status = self.monitor_rank_status(rank_ids)
|
|
557
|
+
for i in rank_ids:
|
|
558
|
+
if worker_status[i]["status"] is not None:
|
|
523
559
|
return 1
|
|
524
560
|
return 0
|
|
525
561
|
|