mindspore 2.6.0rc1__cp39-cp39-win_amd64.whl → 2.7.0rc1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +1 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +40 -9
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
- mindspore/_extends/parse/parser.py +37 -62
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +43 -13
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/amp.py +4 -4
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/common/__init__.py +27 -2
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +77 -16
- mindspore/common/api.py +238 -113
- mindspore/common/dtype.py +21 -11
- mindspore/common/dump.py +10 -15
- mindspore/common/generator.py +5 -3
- mindspore/common/hook_handle.py +11 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/parameter.py +26 -12
- mindspore/common/recompute.py +3 -3
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +81 -81
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +58 -40
- mindspore/dataset/core/config.py +3 -3
- mindspore/dataset/engine/datasets.py +20 -7
- mindspore/dataset/engine/datasets_user_defined.py +33 -3
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +7 -3
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -7
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +27 -7
- mindspore/include/api/cell.h +37 -1
- mindspore/include/api/delegate.h +10 -0
- mindspore/include/api/model.h +3 -0
- mindspore/include/api/types.h +2 -2
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +60 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +6 -46
- mindspore/mint/distributed/__init__.py +1 -0
- mindspore/mint/distributed/distributed.py +212 -9
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +164 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +137 -101
- mindspore/mint/nn/layer/normalization.py +8 -22
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/nn/cell.py +328 -502
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +32 -34
- mindspore/nn/layer/basic.py +67 -64
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +117 -110
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +37 -39
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +3 -3
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -0
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +34 -37
- mindspore/nn/wrap/grad_reducer.py +37 -37
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +5 -5
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_vmap/vmap_array_ops.py +31 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +42 -11
- mindspore/ops/auto_generate/gen_extend_func.py +23 -141
- mindspore/ops/auto_generate/gen_ops_def.py +727 -321
- mindspore/ops/auto_generate/gen_ops_prim.py +1721 -984
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +8 -4
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +3 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +9 -96
- mindspore/ops/function/debug_func.py +4 -3
- mindspore/ops/function/grad/grad_func.py +1 -1
- mindspore/ops/function/math_func.py +33 -540
- mindspore/ops/function/nn_func.py +28 -74
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +44 -5
- mindspore/ops/function/vmap_func.py +2 -1
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +571 -6
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +16 -11
- mindspore/ops/operations/_custom_ops_utils.py +689 -34
- mindspore/ops/operations/_inner_ops.py +3 -6
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +2 -2
- mindspore/ops/operations/comm_ops.py +185 -26
- mindspore/ops/operations/custom_ops.py +294 -174
- mindspore/ops/operations/debug_ops.py +59 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +15 -16
- mindspore/ops/operations/math_ops.py +3 -4
- mindspore/ops/operations/nn_ops.py +7 -39
- mindspore/ops/primitive.py +6 -10
- mindspore/ops/tensor_method.py +47 -8
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +8 -3
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +11 -8
- mindspore/parallel/_cell_wrapper.py +113 -45
- mindspore/parallel/_parallel_serialization.py +1 -1
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +13 -8
- mindspore/parallel/auto_parallel.py +14 -7
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +11 -7
- mindspore/parallel/cluster/process_entity/_api.py +84 -48
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +43 -4
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +6 -7
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
- mindspore/parallel/shard.py +3 -4
- mindspore/parallel/transform_safetensors.py +463 -174
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/profiler_context.py +25 -27
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_op_analyse.py +235 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +305 -314
- mindspore/profiler/envprofiler.py +12 -7
- mindspore/profiler/experimental_config.py +96 -6
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/npu_profiler.py +29 -19
- mindspore/profiler/profiler.py +35 -19
- mindspore/profiler/profiler_action_controller.py +64 -76
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +5 -5
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +60 -45
- mindspore/runtime/memory.py +30 -32
- mindspore/runtime/thread_bind_core.py +298 -164
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +14 -4
- mindspore/train/amp.py +43 -20
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_checkpoint.py +3 -6
- mindspore/train/callback/_flops_collector.py +1 -1
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +97 -16
- mindspore/train/data_sink.py +11 -2
- mindspore/train/dataset_helper.py +9 -0
- mindspore/train/model.py +135 -55
- mindspore/train/serialization.py +133 -111
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +163 -77
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +6 -9
- mindspore/version.py +1 -1
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +333 -371
- mindspore/_deprecated/jit.py +0 -198
- mindspore/experimental/es/__init__.py +0 -22
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
mindspore/parallel/_tensor.py
CHANGED
|
@@ -21,9 +21,11 @@ import numpy as np
|
|
|
21
21
|
from mindspore.common.tensor import Tensor
|
|
22
22
|
from mindspore.communication.management import get_rank, get_group_size
|
|
23
23
|
from mindspore._c_expression import TensorTransform
|
|
24
|
+
from mindspore import log as logger
|
|
24
25
|
|
|
25
26
|
_tensor_transform = TensorTransform.get_instance()
|
|
26
|
-
|
|
27
|
+
COMM_TENSOR_CELL_CACHE = {}
|
|
28
|
+
RESHARD_OP_MAP_CACHE = {}
|
|
27
29
|
|
|
28
30
|
def _get_tensor_strategy(dev_mat, tensor_map):
|
|
29
31
|
"""
|
|
@@ -348,7 +350,7 @@ def _extract_layout_item(layout_item):
|
|
|
348
350
|
return dev_matrix, tensor_map, opt_shard_step, opt_shard_size
|
|
349
351
|
|
|
350
352
|
|
|
351
|
-
def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
|
|
353
|
+
def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id, enable_redist_opt=False):
|
|
352
354
|
"""
|
|
353
355
|
Transform tensor from source layout to the destination layout.
|
|
354
356
|
|
|
@@ -362,7 +364,7 @@ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
|
|
|
362
364
|
"""
|
|
363
365
|
if not isinstance(from_layout, tuple) or not isinstance(to_layout, tuple):
|
|
364
366
|
raise TypeError("The layout should be tuple! layout is {} and {}".format(from_layout, to_layout))
|
|
365
|
-
return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, rank_id)
|
|
367
|
+
return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, enable_redist_opt, rank_id)
|
|
366
368
|
|
|
367
369
|
|
|
368
370
|
def _construct_from_to_tensor_layout(from_full_tensor_shape, from_dev_matrix,
|
|
@@ -587,13 +589,15 @@ def _get_needed_rank_list_by_layouts(from_tensor_layout, to_tensor_layout, devic
|
|
|
587
589
|
return result_list
|
|
588
590
|
|
|
589
591
|
|
|
590
|
-
def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank
|
|
592
|
+
def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank,
|
|
593
|
+
enable_redist_opt=False):
|
|
591
594
|
"""
|
|
592
595
|
AllGather op: {op_name, group_ranks + axis}
|
|
593
596
|
"""
|
|
594
597
|
stack = []
|
|
595
598
|
index = 0
|
|
596
|
-
transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank
|
|
599
|
+
transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank,
|
|
600
|
+
enable_redist_opt)
|
|
597
601
|
result_map = {self_rank: transform_operators}
|
|
598
602
|
for operators in transform_operators:
|
|
599
603
|
op_name = operators[0]
|
|
@@ -606,7 +610,7 @@ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_te
|
|
|
606
610
|
for rank in group_info[1]:
|
|
607
611
|
if rank not in result_map:
|
|
608
612
|
new_transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout,
|
|
609
|
-
device_list, rank)
|
|
613
|
+
device_list, rank, enable_redist_opt)
|
|
610
614
|
result_map[rank] = new_transform_operators
|
|
611
615
|
index = 0
|
|
612
616
|
for operators in new_transform_operators:
|
|
@@ -710,8 +714,6 @@ def _apply_operator(operator_name):
|
|
|
710
714
|
Returns:
|
|
711
715
|
The data of tensor after apply operator.
|
|
712
716
|
"""
|
|
713
|
-
if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
|
|
714
|
-
numpy_data = numpy_data[:]
|
|
715
717
|
if not isinstance(numpy_data, np.ndarray):
|
|
716
718
|
raise TypeError("The data should be a numpy.ndarray.")
|
|
717
719
|
_check_operator(reshape_op)
|
|
@@ -732,10 +734,7 @@ def _apply_operator(operator_name):
|
|
|
732
734
|
raise TypeError("The data_list should be a list.")
|
|
733
735
|
new_numpy_data_list = []
|
|
734
736
|
for numpy_data in numpy_data_list:
|
|
735
|
-
|
|
736
|
-
new_numpy_data_list.append(numpy_data[:])
|
|
737
|
-
else:
|
|
738
|
-
new_numpy_data_list.append(numpy_data)
|
|
737
|
+
new_numpy_data_list.append(numpy_data)
|
|
739
738
|
numpy_data_list = new_numpy_data_list
|
|
740
739
|
_check_operator(allgather_op)
|
|
741
740
|
concat_group = allgather_op[1][:-1]
|
|
@@ -896,3 +895,159 @@ def _chunk_shape(np_tensor, strategy, depth):
|
|
|
896
895
|
output.extend(
|
|
897
896
|
_chunk_shape(ret_, strategy[len(strategy) - depth + 1:len(strategy)], depth - 1))
|
|
898
897
|
return output
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _infer_pp_op_map(from_layout, to_layout, self_rank):
|
|
901
|
+
"""
|
|
902
|
+
get the ops map for merging pp stages
|
|
903
|
+
"""
|
|
904
|
+
from_rank_list = from_layout[3]
|
|
905
|
+
to_rank_list = to_layout[3]
|
|
906
|
+
from_dev_num_in_stage = len(from_rank_list)
|
|
907
|
+
current_rank_stage_id = self_rank // from_dev_num_in_stage
|
|
908
|
+
diff_rank_id = [
|
|
909
|
+
rank_id for rank_id in to_rank_list if rank_id not in from_rank_list]
|
|
910
|
+
end_stage = from_dev_num_in_stage * (current_rank_stage_id + 1)
|
|
911
|
+
start_stage = from_dev_num_in_stage * current_rank_stage_id
|
|
912
|
+
rank_pos_in_stage = list(range(start_stage, end_stage)).index(self_rank)
|
|
913
|
+
root_idx = from_rank_list[rank_pos_in_stage]
|
|
914
|
+
broadcast_rank_list = [root_idx]
|
|
915
|
+
while rank_pos_in_stage < len(diff_rank_id):
|
|
916
|
+
broadcast_rank_list.append(diff_rank_id[rank_pos_in_stage])
|
|
917
|
+
rank_pos_in_stage += from_dev_num_in_stage
|
|
918
|
+
broadcast_rank_list.sort()
|
|
919
|
+
broadcast_map = {rank_id: [('Broadcast', root_idx, broadcast_rank_list)] for rank_id in broadcast_rank_list}
|
|
920
|
+
return broadcast_map
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _get_pipeline_operator_map(from_layout, to_layout, self_rank):
|
|
924
|
+
"""
|
|
925
|
+
If src_pp_stages is greater than dst_pp_stages, the weights of the corresponding cards need to
|
|
926
|
+
be communicated via broadcast to swap. Need to communicate src rank0's 01 to src rank2,
|
|
927
|
+
so that rank2 holds param0's data. Similarly, communicate rank1's 02 to rank3
|
|
928
|
+
rank0 01 01 11
|
|
929
|
+
rank1 02 02 12
|
|
930
|
+
pp2 -------> pp1
|
|
931
|
+
rank2 11 03 13
|
|
932
|
+
rank3 12 04 14
|
|
933
|
+
|
|
934
|
+
Args:
|
|
935
|
+
from_layout (tuple): Use tuple to present layout
|
|
936
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
937
|
+
to_layout (tuple): Use tuple to present layout
|
|
938
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
939
|
+
self_rank (int): rank_id
|
|
940
|
+
"""
|
|
941
|
+
if len(from_layout[3]) < len(to_layout[3]):
|
|
942
|
+
logger.debug(f"from {from_layout} to {to_layout} need to broadcast data across pp stages")
|
|
943
|
+
comm_tensor_cache_key = (
|
|
944
|
+
f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
|
|
945
|
+
f" -> "
|
|
946
|
+
f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
|
|
947
|
+
global COMM_TENSOR_CELL_CACHE
|
|
948
|
+
if comm_tensor_cache_key not in COMM_TENSOR_CELL_CACHE:
|
|
949
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, not match cache")
|
|
950
|
+
broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
|
|
951
|
+
broadcast_op_map_dict = {rank_id: broadcast_map for rank_id in broadcast_map.keys()}
|
|
952
|
+
COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key] = broadcast_op_map_dict
|
|
953
|
+
else:
|
|
954
|
+
comm_tensor_cache_key_rank_list = COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key]
|
|
955
|
+
if self_rank in comm_tensor_cache_key_rank_list:
|
|
956
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, match cache")
|
|
957
|
+
broadcast_map = comm_tensor_cache_key_rank_list[self_rank]
|
|
958
|
+
else:
|
|
959
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, but rank {self_rank} not match cache")
|
|
960
|
+
broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
|
|
961
|
+
for rank_id in broadcast_map.keys():
|
|
962
|
+
COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key][rank_id] = broadcast_map
|
|
963
|
+
return broadcast_map
|
|
964
|
+
logger.debug(f"from {from_layout} to {to_layout} no need to broadcast data across pp stages")
|
|
965
|
+
return {}
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def _is_multi_shard(in_tensor_map):
|
|
969
|
+
"""
|
|
970
|
+
whether the input tensor map is in multi shard
|
|
971
|
+
"""
|
|
972
|
+
for tensor_map in in_tensor_map:
|
|
973
|
+
if isinstance(tensor_map, (list, tuple)) and len(tensor_map) > 1:
|
|
974
|
+
return True
|
|
975
|
+
return False
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
|
|
979
|
+
insert_from_reshape, insert_to_reshape):
|
|
980
|
+
""" insert layout expand op reshape """
|
|
981
|
+
from_dev_matrix = from_info_tuple[0]
|
|
982
|
+
from_tensor_map = from_info_tuple[1]
|
|
983
|
+
from_full_tensor_shape = from_info_tuple[2]
|
|
984
|
+
to_dev_matrix_origin = to_info_tuple[0]
|
|
985
|
+
to_tensor_map_origin = to_info_tuple[1]
|
|
986
|
+
origin_tensor_shape = to_info_tuple[2]
|
|
987
|
+
for param_rank, _ in param_rank_map.items():
|
|
988
|
+
if insert_from_reshape:
|
|
989
|
+
from_slice_tensor_shape = ()
|
|
990
|
+
from_tensor_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
|
|
991
|
+
for i, item in enumerate(from_full_tensor_shape):
|
|
992
|
+
from_slice_tensor_shape += (item // from_tensor_strategy[i],)
|
|
993
|
+
param_rank_map.get(param_rank).insert(0, ('Reshape', list(from_slice_tensor_shape)))
|
|
994
|
+
if insert_to_reshape:
|
|
995
|
+
to_tensor_strategy = _get_tensor_strategy(to_dev_matrix_origin, to_tensor_map_origin)
|
|
996
|
+
to_slice_tensor_shape = ()
|
|
997
|
+
for i, item in enumerate(origin_tensor_shape):
|
|
998
|
+
to_slice_tensor_shape += (item // to_tensor_strategy[i],)
|
|
999
|
+
param_rank_map.get(param_rank).append(('Reshape', list(to_slice_tensor_shape)))
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def _infer_reshard_op_map(from_layout, to_layout, self_rank):
|
|
1003
|
+
"""infer reshard op map"""
|
|
1004
|
+
from_layout_without_rank_list = from_layout[:-1]
|
|
1005
|
+
to_layout_without_rank_list = to_layout[:-1]
|
|
1006
|
+
if _is_multi_shard(from_layout[1]):
|
|
1007
|
+
# ((2, 1), 1) --> (2, 1, 1) expand tensormap
|
|
1008
|
+
new_layout = _expand_layout(from_layout[0], from_layout[1], from_layout[2])
|
|
1009
|
+
from_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
|
|
1010
|
+
if _is_multi_shard(to_layout[1]):
|
|
1011
|
+
new_layout = _expand_layout(to_layout[0], to_layout[1], to_layout[2])
|
|
1012
|
+
to_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
|
|
1013
|
+
operator_map = _get_needed_rank_transform_operator_map_by_layouts(from_layout_without_rank_list,
|
|
1014
|
+
to_layout_without_rank_list,
|
|
1015
|
+
from_layout[3], self_rank,
|
|
1016
|
+
True)
|
|
1017
|
+
new_to_layout_info = to_layout[:-1]
|
|
1018
|
+
_insert_expand_layout_reshape(operator_map, from_layout_without_rank_list, new_to_layout_info,
|
|
1019
|
+
_is_multi_shard(from_layout[1]), _is_multi_shard(to_layout[1]))
|
|
1020
|
+
return operator_map
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def _get_resharding_operator_map(from_layout, to_layout, self_rank):
|
|
1024
|
+
"""
|
|
1025
|
+
Args:
|
|
1026
|
+
from_layout (tuple): Use tuple to present layout
|
|
1027
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
1028
|
+
to_layout (tuple): Use tuple to present layout
|
|
1029
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
1030
|
+
self_rank (int): rank_id
|
|
1031
|
+
"""
|
|
1032
|
+
reshard_op_cache_key = (
|
|
1033
|
+
f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
|
|
1034
|
+
f" -> "
|
|
1035
|
+
f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
|
|
1036
|
+
global RESHARD_OP_MAP_CACHE
|
|
1037
|
+
if reshard_op_cache_key not in RESHARD_OP_MAP_CACHE:
|
|
1038
|
+
operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
|
|
1039
|
+
op_map_dict = {rank_id: operator_map for rank_id in operator_map}
|
|
1040
|
+
RESHARD_OP_MAP_CACHE[reshard_op_cache_key] = op_map_dict
|
|
1041
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, not match cache")
|
|
1042
|
+
else:
|
|
1043
|
+
cache_rank_list_dict = RESHARD_OP_MAP_CACHE[reshard_op_cache_key]
|
|
1044
|
+
if self_rank in cache_rank_list_dict:
|
|
1045
|
+
operator_map = cache_rank_list_dict[self_rank]
|
|
1046
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, match cache")
|
|
1047
|
+
else:
|
|
1048
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, "
|
|
1049
|
+
f"but rank {self_rank} is not match cache")
|
|
1050
|
+
operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
|
|
1051
|
+
for rank_id in operator_map:
|
|
1052
|
+
RESHARD_OP_MAP_CACHE[reshard_op_cache_key][rank_id] = operator_map
|
|
1053
|
+
return operator_map
|
|
@@ -111,7 +111,7 @@ def _check_moe_config(moe_config=None, parallel_config=None):
|
|
|
111
111
|
"""
|
|
112
112
|
if not isinstance(moe_config, MoEConfig):
|
|
113
113
|
raise TypeError(f"'moe_config' must be an instance of MoEConfig, but got {type(moe_config).__name__}.")
|
|
114
|
-
use_moe =
|
|
114
|
+
use_moe = moe_config.expert_num > 1
|
|
115
115
|
if use_moe is False:
|
|
116
116
|
return
|
|
117
117
|
if moe_config.expert_num % parallel_config.expert_parallel != 0:
|
|
@@ -1365,7 +1365,12 @@ class MultiHeadAttention(Cell):
|
|
|
1365
1365
|
class TransformerEncoderLayer(Cell):
|
|
1366
1366
|
r"""
|
|
1367
1367
|
Transformer Encoder Layer. This is an implementation of the single layer of the transformer
|
|
1368
|
-
encoder layer, including
|
|
1368
|
+
encoder layer, mainly including Multi-Head Attention, Feed Forward, Add and LayerNorm layer.
|
|
1369
|
+
|
|
1370
|
+
The TransformerEncoderLayer structure is shown in the following figure:
|
|
1371
|
+
|
|
1372
|
+
.. image:: ../images/TransformerEncoderLayer.png
|
|
1373
|
+
:align: center
|
|
1369
1374
|
|
|
1370
1375
|
Args:
|
|
1371
1376
|
batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
|
|
@@ -1532,7 +1537,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1532
1537
|
"and parallel_config. model_parallel is {}."
|
|
1533
1538
|
.format(ffn_hidden_size, parallel_config.model_parallel))
|
|
1534
1539
|
_check_moe_config(moe_config, parallel_config)
|
|
1535
|
-
self.use_moe =
|
|
1540
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1536
1541
|
self.use_past = use_past
|
|
1537
1542
|
self.seq_length = seq_length
|
|
1538
1543
|
self.hidden_size = hidden_size
|
|
@@ -1607,7 +1612,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1607
1612
|
"and parallel_config. model_parallel is {}."
|
|
1608
1613
|
.format(ffn_hidden_size, parallel_config.model_parallel))
|
|
1609
1614
|
_check_moe_config(moe_config, parallel_config)
|
|
1610
|
-
self.use_moe =
|
|
1615
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1611
1616
|
self.use_past = use_past
|
|
1612
1617
|
self.seq_length = seq_length
|
|
1613
1618
|
self.hidden_size = hidden_size
|
|
@@ -1902,7 +1907,7 @@ class TransformerDecoderLayer(Cell):
|
|
|
1902
1907
|
parallel_config=default_dpmp_config):
|
|
1903
1908
|
super(TransformerDecoderLayer, self).__init__()
|
|
1904
1909
|
_check_moe_config(moe_config, parallel_config)
|
|
1905
|
-
self.use_moe =
|
|
1910
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1906
1911
|
config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
|
|
1907
1912
|
if batch_size or use_past:
|
|
1908
1913
|
Validator.check_positive_int(batch_size)
|
|
@@ -2440,7 +2445,7 @@ class TransformerEncoder(Cell):
|
|
|
2440
2445
|
super(TransformerEncoder, self).__init__()
|
|
2441
2446
|
_check_config(parallel_config)
|
|
2442
2447
|
_check_moe_config(moe_config, parallel_config)
|
|
2443
|
-
self.use_moe =
|
|
2448
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2444
2449
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2445
2450
|
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2446
2451
|
self.add = P.Add()
|
|
@@ -2677,7 +2682,7 @@ class TransformerDecoder(Cell):
|
|
|
2677
2682
|
super(TransformerDecoder, self).__init__()
|
|
2678
2683
|
_check_moe_config(moe_config, parallel_config)
|
|
2679
2684
|
_check_config(parallel_config)
|
|
2680
|
-
self.use_moe =
|
|
2685
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2681
2686
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2682
2687
|
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2683
2688
|
self.add = P.Add()
|
|
@@ -2959,7 +2964,7 @@ class Transformer(Cell):
|
|
|
2959
2964
|
if not lambda_func:
|
|
2960
2965
|
lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
|
|
2961
2966
|
_check_moe_config(moe_config, parallel_config)
|
|
2962
|
-
self.use_moe =
|
|
2967
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2963
2968
|
self.add = P.Add()
|
|
2964
2969
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
2965
2970
|
if encoder_layers > 0:
|
|
@@ -3026,7 +3031,7 @@ class Transformer(Cell):
|
|
|
3026
3031
|
if not lambda_func:
|
|
3027
3032
|
lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
|
|
3028
3033
|
_check_moe_config(moe_config, parallel_config)
|
|
3029
|
-
self.use_moe =
|
|
3034
|
+
self.use_moe = moe_config.expert_num > 1
|
|
3030
3035
|
self.add = P.Add().shard(((), ()))
|
|
3031
3036
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
3032
3037
|
if encoder_layers > 0:
|
|
@@ -162,6 +162,10 @@ class AutoParallel(Cell):
|
|
|
162
162
|
super(AutoParallel, self).__init__(auto_prefix=False)
|
|
163
163
|
self.network = network
|
|
164
164
|
|
|
165
|
+
if parallel_mode not in ["semi_auto", "sharding_propagation", "recursive_programming"]:
|
|
166
|
+
raise ValueError("the argument 'parallel_mode' must be one of ['semi_auto', 'sharding_propagation'," \
|
|
167
|
+
" 'recursive_programming'], but got the value : {} .".format(parallel_mode))
|
|
168
|
+
|
|
165
169
|
self._parallel_mode = parallel_mode
|
|
166
170
|
|
|
167
171
|
self._global_rank = get_rank()
|
|
@@ -260,8 +264,10 @@ class AutoParallel(Cell):
|
|
|
260
264
|
self._save_strategy_file_path = file_path
|
|
261
265
|
|
|
262
266
|
def disable_strategy_file_only_for_trainable_params(self):
|
|
263
|
-
"""
|
|
264
|
-
|
|
267
|
+
"""
|
|
268
|
+
By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
|
|
269
|
+
non-trainable parameters as well.
|
|
270
|
+
"""
|
|
265
271
|
self._only_trainable_params = False
|
|
266
272
|
|
|
267
273
|
def save_operator_strategy_file(self, file_path):
|
|
@@ -410,7 +416,7 @@ class AutoParallel(Cell):
|
|
|
410
416
|
raise ValueError("For 'AutoParallel.dataset_strategy', the argument "
|
|
411
417
|
"'config' must be 'full_batch' or 'data_parallel', but got the value : {}."
|
|
412
418
|
.format(config))
|
|
413
|
-
self._full_batch =
|
|
419
|
+
self._full_batch = config == "full_batch"
|
|
414
420
|
self._dataset_strategy_config = config
|
|
415
421
|
return
|
|
416
422
|
if not isinstance(config, tuple):
|
|
@@ -436,9 +442,9 @@ class AutoParallel(Cell):
|
|
|
436
442
|
Args:
|
|
437
443
|
shard_size (int, optional): Set the optimizer weight shard group size if you want to specific the
|
|
438
444
|
maximum group size across devices when the parallel optimizer is
|
|
439
|
-
enabled. The numerical range can be (0, device_num]. Default value
|
|
445
|
+
enabled. The numerical range can be (0, device_num] or -1. Default value
|
|
440
446
|
is -1, which means the optimizer weight shard group size will
|
|
441
|
-
the data parallel group of each parameter.
|
|
447
|
+
the data parallel group of each parameter.
|
|
442
448
|
threshold (int, optional): Set the threshold of parallel optimizer. When parallel optimizer is
|
|
443
449
|
enabled, parameters with size smaller than this threshold will not be
|
|
444
450
|
sharded across the devices. Parameter size = shape[0] \* ... \*
|
|
@@ -513,9 +519,10 @@ class AutoParallel(Cell):
|
|
|
513
519
|
if not isinstance(scheduler, str):
|
|
514
520
|
raise TypeError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
515
521
|
"must be str type, but got the type : {}.".format(type(scheduler)))
|
|
516
|
-
if scheduler not in ("1f1b", "gpipe"):
|
|
522
|
+
if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp"):
|
|
517
523
|
raise ValueError("For 'AutoParallel.pipeline', the argument "
|
|
518
|
-
"'scheduler' must be '1f1b'
|
|
524
|
+
"'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp' ," \
|
|
525
|
+
" but got the value : {}."
|
|
519
526
|
.format(scheduler))
|
|
520
527
|
self._pipeline_stages = stages
|
|
521
528
|
self._pipeline_result_broadcast = output_broadcast
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
"""Convert distributed checkpoint"""
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
__all__ = ["rank_list_for_convert", "convert_checkpoint_by_rank", "convert_checkpoints"]
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
|
|
21
|
+
transform_checkpoints
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def rank_list_for_convert(rank_id, src_strategy_file=None, dst_strategy_file=None):
|
|
@@ -582,6 +582,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
582
582
|
The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
|
|
583
583
|
too large, otherwise it may cause freezing.
|
|
584
584
|
|
|
585
|
+
This function does not support converting remove_redundancy's checkpoint file.
|
|
586
|
+
|
|
585
587
|
Args:
|
|
586
588
|
src_checkpoints_dir (str): The source checkpoints directory.
|
|
587
589
|
dst_checkpoints_dir (str): The destination checkpoints directory to save the converted checkpoints.
|
|
@@ -924,8 +926,8 @@ def set_op_strategy_config(mode="SAVE", path=""):
|
|
|
924
926
|
|
|
925
927
|
def build_searched_strategy(strategy_filename):
|
|
926
928
|
"""
|
|
927
|
-
Extract the sharding strategy for each parameter in the network
|
|
928
|
-
|
|
929
|
+
Extract the sharding strategy for each parameter in the network from the strategy file
|
|
930
|
+
for distributed inference scenarios.
|
|
929
931
|
|
|
930
932
|
Args:
|
|
931
933
|
strategy_filename (str): Name of strategy file.
|
|
@@ -1025,8 +1027,10 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1025
1027
|
>>> from mindspore.parallel.auto_parallel import AutoParallel
|
|
1026
1028
|
>>> from mindspore.nn.utils import no_init_parameters
|
|
1027
1029
|
>>> from mindspore.common.initializer import initializer, One
|
|
1030
|
+
>>> from mindspore.communication.management import get_group_size
|
|
1028
1031
|
>>>
|
|
1029
1032
|
>>> step_per_epoch = 4
|
|
1033
|
+
>>> device_num = get_group_size()
|
|
1030
1034
|
>>>
|
|
1031
1035
|
>>> # Define the network structure.
|
|
1032
1036
|
>>> class Net(nn.Cell):
|
|
@@ -1070,7 +1074,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1070
1074
|
... network = AutoParallel(network, parallel_mode="semi_auto")
|
|
1071
1075
|
... network.save_param_strategy_file(file_path="./train_strategy.ckpt")
|
|
1072
1076
|
... model = ms.Model(network=network, loss_fn=net_loss, optimizer=net_opt)
|
|
1073
|
-
... ckpt_config = train.CheckpointConfig(keep_checkpoint_max=1, integrated_save=
|
|
1077
|
+
... ckpt_config = train.CheckpointConfig(keep_checkpoint_max=1, integrated_save=True)
|
|
1074
1078
|
... global_rank_id = int(os.getenv("RANK_ID"))
|
|
1075
1079
|
... ckpt_path = "./rank_{}_ckpt".format(global_rank_id)
|
|
1076
1080
|
... ckpt_callback = train.ModelCheckpoint(prefix="parallel", directory=ckpt_path, config=ckpt_config)
|
|
@@ -1096,10 +1100,10 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1096
1100
|
>>>
|
|
1097
1101
|
>>> train_net()
|
|
1098
1102
|
>>> load_model()
|
|
1099
|
-
[[-
|
|
1100
|
-
[
|
|
1103
|
+
[[-9.62929535e+00, -9.76258755e+00, -9.70192051e+00 ... -9.67151260e+00, -9.71998310e+00, -9.64571190e+00],
|
|
1104
|
+
[-4.63218540e-01, -4.07317460e-01, -3.78161550e-01 ... -3.95918339e-01, -2.87363172e-01, -3.48693460e-01],
|
|
1101
1105
|
...
|
|
1102
|
-
[
|
|
1106
|
+
[-4.28075647e+00, -4.36630344e+00, -4.25664043e+00 ... -4.32012939e+00, -4.30337954e+00, -4.27571440e+00]]
|
|
1103
1107
|
"""
|
|
1104
1108
|
if format not in ['safetensors', 'ckpt'] or output_format not in ['safetensors', 'ckpt']:
|
|
1105
1109
|
raise ValueError(
|
|
@@ -1185,7 +1189,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1185
1189
|
param_not_in_ckpt = []
|
|
1186
1190
|
for _, param in network.parameters_and_names():
|
|
1187
1191
|
sliced_params = []
|
|
1188
|
-
if param.name not in rank_list
|
|
1192
|
+
if param.name not in rank_list:
|
|
1189
1193
|
param_not_in_strategy.append(param.name)
|
|
1190
1194
|
continue
|
|
1191
1195
|
if param.name not in param_total_dict:
|