mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +2 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +42 -11
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
- mindspore/_extends/optimize/cell_utils.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +44 -22
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
- mindspore/_extends/parse/parser.py +64 -83
- mindspore/_extends/parse/resources.py +39 -0
- mindspore/_extends/parse/standard_method.py +47 -14
- mindspore/_extends/parse/trope.py +8 -1
- mindspore/_extends/pijit/__init__.py +1 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
- mindspore/amp.py +4 -22
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +4 -4
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +43 -12
- mindspore/common/_grad_function.py +2 -1
- mindspore/common/_pijit_context.py +28 -7
- mindspore/common/_stub_tensor.py +1 -209
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +177 -52
- mindspore/common/_utils.py +9 -1
- mindspore/common/api.py +338 -208
- mindspore/common/dtype.py +108 -57
- mindspore/common/dump.py +11 -16
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/generator.py +2 -3
- mindspore/common/hook_handle.py +33 -5
- mindspore/common/jit_config.py +1 -1
- mindspore/common/jit_trace.py +84 -105
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +27 -29
- mindspore/common/recompute.py +5 -7
- mindspore/common/sparse_tensor.py +0 -3
- mindspore/common/symbol.py +0 -1
- mindspore/common/tensor.py +84 -133
- mindspore/communication/_comm_helper.py +46 -4
- mindspore/communication/management.py +79 -7
- mindspore/context.py +47 -38
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +38 -4
- mindspore/dataset/engine/datasets.py +350 -322
- mindspore/dataset/engine/datasets_user_defined.py +69 -23
- mindspore/dataset/engine/iterators.py +2 -2
- mindspore/dataset/engine/obs/config_loader.py +2 -2
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms.py +7 -3
- mindspore/dataset/transforms/transforms.py +10 -6
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/dataset/vision/validators.py +1 -0
- mindspore/device_context/ascend/device.py +1 -1
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/device_context/gpu/__init__.py +2 -2
- mindspore/device_context/gpu/device.py +1 -1
- mindspore/device_context/gpu/op_precision.py +4 -2
- mindspore/device_context/gpu/op_tuning.py +6 -3
- mindspore/device_manager.py +16 -9
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/optim/adadelta.py +13 -20
- mindspore/experimental/optim/adagrad.py +15 -22
- mindspore/experimental/optim/adam.py +17 -24
- mindspore/experimental/optim/adamax.py +14 -22
- mindspore/experimental/optim/adamw.py +28 -34
- mindspore/experimental/optim/asgd.py +15 -25
- mindspore/experimental/optim/lr_scheduler.py +27 -45
- mindspore/experimental/optim/nadam.py +14 -24
- mindspore/experimental/optim/optimizer.py +13 -23
- mindspore/experimental/optim/radam.py +18 -24
- mindspore/experimental/optim/rmsprop.py +14 -25
- mindspore/experimental/optim/rprop.py +15 -26
- mindspore/experimental/optim/sgd.py +9 -19
- mindspore/hal/__init__.py +4 -4
- mindspore/hal/contiguous_tensors_handle.py +2 -2
- mindspore/hal/memory.py +1 -0
- mindspore/include/api/cell.h +65 -5
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +10 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +8 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +8 -3
- mindspore/include/c_api/model_c.h +0 -58
- mindspore/include/c_api/tensor_c.h +0 -26
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/vision_ascend.h +1 -1
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar10.py +61 -11
- mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mindspore_ops_host.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +4 -44
- mindspore/mint/distributed/__init__.py +5 -0
- mindspore/mint/distributed/distributed.py +425 -19
- mindspore/mint/nn/__init__.py +1 -1
- mindspore/mint/nn/functional.py +53 -6
- mindspore/mint/nn/layer/_functions.py +163 -294
- mindspore/mint/nn/layer/activation.py +8 -6
- mindspore/mint/nn/layer/conv.py +125 -101
- mindspore/mint/nn/layer/normalization.py +11 -25
- mindspore/mint/optim/adam.py +19 -18
- mindspore/mint/optim/adamw.py +14 -8
- mindspore/mint/optim/sgd.py +5 -5
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/cell.py +488 -620
- mindspore/nn/grad/cell_grad.py +11 -12
- mindspore/nn/layer/activation.py +36 -36
- mindspore/nn/layer/basic.py +74 -77
- mindspore/nn/layer/channel_shuffle.py +4 -4
- mindspore/nn/layer/combined.py +4 -2
- mindspore/nn/layer/conv.py +86 -85
- mindspore/nn/layer/dense.py +9 -7
- mindspore/nn/layer/embedding.py +50 -52
- mindspore/nn/layer/image.py +38 -40
- mindspore/nn/layer/math.py +111 -112
- mindspore/nn/layer/normalization.py +56 -44
- mindspore/nn/layer/pooling.py +58 -63
- mindspore/nn/layer/rnn_cells.py +33 -33
- mindspore/nn/layer/rnns.py +56 -56
- mindspore/nn/layer/thor_layer.py +74 -73
- mindspore/nn/layer/transformer.py +11 -1
- mindspore/nn/learning_rate_schedule.py +20 -20
- mindspore/nn/loss/loss.py +79 -81
- mindspore/nn/optim/adam.py +2 -4
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/lamb.py +1 -3
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +2 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -1
- mindspore/nn/probability/distribution/poisson.py +2 -1
- mindspore/nn/sparse/sparse.py +3 -3
- mindspore/nn/wrap/cell_wrapper.py +73 -42
- mindspore/nn/wrap/grad_reducer.py +37 -52
- mindspore/nn/wrap/loss_scale.py +72 -74
- mindspore/numpy/array_creations.py +7 -7
- mindspore/numpy/fft.py +1 -1
- mindspore/numpy/math_ops.py +1 -1
- mindspore/numpy/utils_const.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
- mindspore/ops/_vmap/vmap_array_ops.py +6 -13
- mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
- mindspore/ops/auto_generate/gen_extend_func.py +5 -55
- mindspore/ops/auto_generate/gen_ops_def.py +753 -273
- mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
- mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
- mindspore/ops/composite/__init__.py +10 -0
- mindspore/ops/composite/base.py +9 -5
- mindspore/ops/composite/multitype_ops/__init__.py +12 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
- mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
- mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
- mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/_add_attr_func.py +11 -6
- mindspore/ops/function/array_func.py +17 -100
- mindspore/ops/function/debug_func.py +8 -5
- mindspore/ops/function/grad/grad_func.py +5 -13
- mindspore/ops/function/math_func.py +65 -399
- mindspore/ops/function/nn_func.py +44 -61
- mindspore/ops/function/other_func.py +4 -1
- mindspore/ops/function/random_func.py +31 -4
- mindspore/ops/functional.py +2 -3
- mindspore/ops/functional_overload.py +486 -18
- mindspore/ops/op_info_register.py +21 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_custom_ops_utils.py +675 -8
- mindspore/ops/operations/_inner_ops.py +14 -18
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +4 -50
- mindspore/ops/operations/comm_ops.py +186 -41
- mindspore/ops/operations/custom_ops.py +244 -175
- mindspore/ops/operations/debug_ops.py +55 -4
- mindspore/ops/operations/image_ops.py +13 -13
- mindspore/ops/operations/manually_defined/ops_def.py +27 -28
- mindspore/ops/operations/math_ops.py +8 -9
- mindspore/ops/operations/nn_ops.py +6 -7
- mindspore/ops/primitive.py +9 -20
- mindspore/ops/tensor_method.py +52 -11
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
- mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
- mindspore/ops_generate/api/functions_cc_generator.py +58 -10
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
- mindspore/ops_generate/common/base_generator.py +14 -0
- mindspore/ops_generate/common/gen_constants.py +7 -2
- mindspore/ops_generate/common/gen_utils.py +0 -19
- mindspore/ops_generate/common/op_proto.py +11 -4
- mindspore/ops_generate/common/template.py +88 -11
- mindspore/ops_generate/gen_ops.py +1 -1
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
- mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
- mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
- mindspore/parallel/_auto_parallel_context.py +9 -17
- mindspore/parallel/_cell_wrapper.py +106 -40
- mindspore/parallel/_parallel_serialization.py +4 -3
- mindspore/parallel/_ps_context.py +4 -6
- mindspore/parallel/_tensor.py +167 -12
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/transformer.py +17 -12
- mindspore/parallel/_utils.py +5 -11
- mindspore/parallel/auto_parallel.py +33 -12
- mindspore/parallel/checkpoint_convert.py +3 -3
- mindspore/parallel/checkpoint_transform.py +5 -1
- mindspore/parallel/cluster/process_entity/_api.py +88 -49
- mindspore/parallel/cluster/process_entity/_utils.py +95 -7
- mindspore/parallel/cluster/run.py +48 -7
- mindspore/parallel/function/__init__.py +8 -1
- mindspore/parallel/function/reshard_func.py +7 -6
- mindspore/parallel/nn/__init__.py +15 -2
- mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
- mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
- mindspore/parallel/shard.py +9 -23
- mindspore/parallel/transform_safetensors.py +468 -174
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
- mindspore/profiler/analysis/task_manager.py +1 -1
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
- mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
- mindspore/profiler/common/constant.py +16 -0
- mindspore/profiler/common/msprof_cmd_tool.py +2 -2
- mindspore/profiler/common/path_manager.py +9 -0
- mindspore/profiler/common/profiler_context.py +50 -29
- mindspore/profiler/common/profiler_info.py +0 -16
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +239 -0
- mindspore/profiler/common/profiler_output_path.py +23 -8
- mindspore/profiler/common/profiler_parameters.py +128 -35
- mindspore/profiler/dynamic_profile/__init__.py +0 -0
- mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
- mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
- mindspore/profiler/dynamic_profiler.py +374 -338
- mindspore/profiler/envprofiler.py +42 -12
- mindspore/profiler/experimental_config.py +112 -7
- mindspore/profiler/mstx.py +33 -12
- mindspore/profiler/platform/__init__.py +2 -3
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +30 -20
- mindspore/profiler/profiler.py +218 -154
- mindspore/profiler/profiler_action_controller.py +65 -77
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/profiler/schedule.py +10 -4
- mindspore/rewrite/common/config.py +1 -0
- mindspore/rewrite/common/namer.py +1 -0
- mindspore/rewrite/common/namespace.py +1 -0
- mindspore/rewrite/node/node.py +31 -11
- mindspore/rewrite/parsers/assign_parser.py +1 -1
- mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
- mindspore/run_check/_check_version.py +7 -10
- mindspore/runtime/__init__.py +8 -6
- mindspore/runtime/event.py +10 -4
- mindspore/runtime/executor.py +87 -45
- mindspore/runtime/memory.py +22 -30
- mindspore/runtime/thread_bind_core.py +299 -165
- mindspore/safeguard/rewrite_obfuscation.py +12 -13
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +9 -5
- mindspore/train/amp.py +43 -23
- mindspore/train/callback/__init__.py +5 -5
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +4 -14
- mindspore/train/callback/_flops_collector.py +11 -7
- mindspore/train/callback/_landscape.py +0 -1
- mindspore/train/callback/_train_fault_tolerance.py +72 -18
- mindspore/train/data_sink.py +15 -6
- mindspore/train/dataset_helper.py +14 -5
- mindspore/train/model.py +49 -47
- mindspore/train/serialization.py +168 -126
- mindspore/train/summary/summary_record.py +13 -2
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +3 -2
- mindspore/utils/dryrun.py +0 -6
- mindspore/utils/runtime_execution_order_check.py +162 -78
- mindspore/utils/sdc_detect.py +68 -0
- mindspore/utils/utils.py +14 -17
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
- mindspore/_deprecated/jit.py +0 -198
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/es/embedding_service.py +0 -891
- mindspore/experimental/es/embedding_service_layer.py +0 -581
- mindspore/profiler/common/validator/__init__.py +0 -14
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/profiler/parser/__init__.py +0 -14
- mindspore/profiler/parser/aicpu_data_parser.py +0 -272
- mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
- mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
- mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
- mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
- mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
- mindspore/profiler/parser/ascend_flops_generator.py +0 -116
- mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
- mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
- mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
- mindspore/profiler/parser/ascend_op_generator.py +0 -334
- mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
- mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
- mindspore/profiler/parser/base_timeline_generator.py +0 -483
- mindspore/profiler/parser/container.py +0 -229
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
- mindspore/profiler/parser/flops_parser.py +0 -531
- mindspore/profiler/parser/framework_enum.py +0 -111
- mindspore/profiler/parser/framework_parser.py +0 -464
- mindspore/profiler/parser/framework_struct.py +0 -61
- mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
- mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
- mindspore/profiler/parser/hccl_parser.py +0 -573
- mindspore/profiler/parser/hwts_log_parser.py +0 -122
- mindspore/profiler/parser/integrator.py +0 -526
- mindspore/profiler/parser/memory_usage_parser.py +0 -277
- mindspore/profiler/parser/minddata_analyzer.py +0 -800
- mindspore/profiler/parser/minddata_parser.py +0 -186
- mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
- mindspore/profiler/parser/op_intermediate_parser.py +0 -149
- mindspore/profiler/parser/optime_parser.py +0 -250
- mindspore/profiler/parser/profiler_info.py +0 -213
- mindspore/profiler/parser/step_trace_parser.py +0 -666
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
- {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
mindspore/parallel/_tensor.py
CHANGED
|
@@ -21,9 +21,11 @@ import numpy as np
|
|
|
21
21
|
from mindspore.common.tensor import Tensor
|
|
22
22
|
from mindspore.communication.management import get_rank, get_group_size
|
|
23
23
|
from mindspore._c_expression import TensorTransform
|
|
24
|
+
from mindspore import log as logger
|
|
24
25
|
|
|
25
26
|
_tensor_transform = TensorTransform.get_instance()
|
|
26
|
-
|
|
27
|
+
COMM_TENSOR_CELL_CACHE = {}
|
|
28
|
+
RESHARD_OP_MAP_CACHE = {}
|
|
27
29
|
|
|
28
30
|
def _get_tensor_strategy(dev_mat, tensor_map):
|
|
29
31
|
"""
|
|
@@ -348,7 +350,7 @@ def _extract_layout_item(layout_item):
|
|
|
348
350
|
return dev_matrix, tensor_map, opt_shard_step, opt_shard_size
|
|
349
351
|
|
|
350
352
|
|
|
351
|
-
def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
|
|
353
|
+
def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id, enable_redist_opt=False):
|
|
352
354
|
"""
|
|
353
355
|
Transform tensor from source layout to the destination layout.
|
|
354
356
|
|
|
@@ -362,7 +364,7 @@ def _transform_tensor_by_layout(from_layout, to_layout, device_list, rank_id):
|
|
|
362
364
|
"""
|
|
363
365
|
if not isinstance(from_layout, tuple) or not isinstance(to_layout, tuple):
|
|
364
366
|
raise TypeError("The layout should be tuple! layout is {} and {}".format(from_layout, to_layout))
|
|
365
|
-
return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, rank_id)
|
|
367
|
+
return _tensor_transform.transform_tensor_sharding(from_layout, to_layout, device_list, enable_redist_opt, rank_id)
|
|
366
368
|
|
|
367
369
|
|
|
368
370
|
def _construct_from_to_tensor_layout(from_full_tensor_shape, from_dev_matrix,
|
|
@@ -587,13 +589,15 @@ def _get_needed_rank_list_by_layouts(from_tensor_layout, to_tensor_layout, devic
|
|
|
587
589
|
return result_list
|
|
588
590
|
|
|
589
591
|
|
|
590
|
-
def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank
|
|
592
|
+
def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_tensor_layout, device_list, self_rank,
|
|
593
|
+
enable_redist_opt=False):
|
|
591
594
|
"""
|
|
592
595
|
AllGather op: {op_name, group_ranks + axis}
|
|
593
596
|
"""
|
|
594
597
|
stack = []
|
|
595
598
|
index = 0
|
|
596
|
-
transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank
|
|
599
|
+
transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout, device_list, self_rank,
|
|
600
|
+
enable_redist_opt)
|
|
597
601
|
result_map = {self_rank: transform_operators}
|
|
598
602
|
for operators in transform_operators:
|
|
599
603
|
op_name = operators[0]
|
|
@@ -606,7 +610,7 @@ def _get_needed_rank_transform_operator_map_by_layouts(from_tensor_layout, to_te
|
|
|
606
610
|
for rank in group_info[1]:
|
|
607
611
|
if rank not in result_map:
|
|
608
612
|
new_transform_operators = _transform_tensor_by_layout(from_tensor_layout, to_tensor_layout,
|
|
609
|
-
device_list, rank)
|
|
613
|
+
device_list, rank, enable_redist_opt)
|
|
610
614
|
result_map[rank] = new_transform_operators
|
|
611
615
|
index = 0
|
|
612
616
|
for operators in new_transform_operators:
|
|
@@ -710,8 +714,6 @@ def _apply_operator(operator_name):
|
|
|
710
714
|
Returns:
|
|
711
715
|
The data of tensor after apply operator.
|
|
712
716
|
"""
|
|
713
|
-
if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
|
|
714
|
-
numpy_data = numpy_data[:]
|
|
715
717
|
if not isinstance(numpy_data, np.ndarray):
|
|
716
718
|
raise TypeError("The data should be a numpy.ndarray.")
|
|
717
719
|
_check_operator(reshape_op)
|
|
@@ -732,10 +734,7 @@ def _apply_operator(operator_name):
|
|
|
732
734
|
raise TypeError("The data_list should be a list.")
|
|
733
735
|
new_numpy_data_list = []
|
|
734
736
|
for numpy_data in numpy_data_list:
|
|
735
|
-
|
|
736
|
-
new_numpy_data_list.append(numpy_data[:])
|
|
737
|
-
else:
|
|
738
|
-
new_numpy_data_list.append(numpy_data)
|
|
737
|
+
new_numpy_data_list.append(numpy_data)
|
|
739
738
|
numpy_data_list = new_numpy_data_list
|
|
740
739
|
_check_operator(allgather_op)
|
|
741
740
|
concat_group = allgather_op[1][:-1]
|
|
@@ -896,3 +895,159 @@ def _chunk_shape(np_tensor, strategy, depth):
|
|
|
896
895
|
output.extend(
|
|
897
896
|
_chunk_shape(ret_, strategy[len(strategy) - depth + 1:len(strategy)], depth - 1))
|
|
898
897
|
return output
|
|
898
|
+
|
|
899
|
+
|
|
900
|
+
def _infer_pp_op_map(from_layout, to_layout, self_rank):
|
|
901
|
+
"""
|
|
902
|
+
get the ops map for merging pp stages
|
|
903
|
+
"""
|
|
904
|
+
from_rank_list = from_layout[3]
|
|
905
|
+
to_rank_list = to_layout[3]
|
|
906
|
+
from_dev_num_in_stage = len(from_rank_list)
|
|
907
|
+
current_rank_stage_id = self_rank // from_dev_num_in_stage
|
|
908
|
+
diff_rank_id = [
|
|
909
|
+
rank_id for rank_id in to_rank_list if rank_id not in from_rank_list]
|
|
910
|
+
end_stage = from_dev_num_in_stage * (current_rank_stage_id + 1)
|
|
911
|
+
start_stage = from_dev_num_in_stage * current_rank_stage_id
|
|
912
|
+
rank_pos_in_stage = list(range(start_stage, end_stage)).index(self_rank)
|
|
913
|
+
root_idx = from_rank_list[rank_pos_in_stage]
|
|
914
|
+
broadcast_rank_list = [root_idx]
|
|
915
|
+
while rank_pos_in_stage < len(diff_rank_id):
|
|
916
|
+
broadcast_rank_list.append(diff_rank_id[rank_pos_in_stage])
|
|
917
|
+
rank_pos_in_stage += from_dev_num_in_stage
|
|
918
|
+
broadcast_rank_list.sort()
|
|
919
|
+
broadcast_map = {rank_id: [('Broadcast', root_idx, broadcast_rank_list)] for rank_id in broadcast_rank_list}
|
|
920
|
+
return broadcast_map
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _get_pipeline_operator_map(from_layout, to_layout, self_rank):
|
|
924
|
+
"""
|
|
925
|
+
If src_pp_stages is greater than dst_pp_stages, the weights of the corresponding cards need to
|
|
926
|
+
be communicated via broadcast to swap. Need to communicate src rank0's 01 to src rank2,
|
|
927
|
+
so that rank2 holds param0's data. Similarly, communicate rank1's 02 to rank3
|
|
928
|
+
rank0 01 01 11
|
|
929
|
+
rank1 02 02 12
|
|
930
|
+
pp2 -------> pp1
|
|
931
|
+
rank2 11 03 13
|
|
932
|
+
rank3 12 04 14
|
|
933
|
+
|
|
934
|
+
Args:
|
|
935
|
+
from_layout (tuple): Use tuple to present layout
|
|
936
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
937
|
+
to_layout (tuple): Use tuple to present layout
|
|
938
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
939
|
+
self_rank (int): rank_id
|
|
940
|
+
"""
|
|
941
|
+
if len(from_layout[3]) < len(to_layout[3]):
|
|
942
|
+
logger.debug(f"from {from_layout} to {to_layout} need to broadcast data across pp stages")
|
|
943
|
+
comm_tensor_cache_key = (
|
|
944
|
+
f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
|
|
945
|
+
f" -> "
|
|
946
|
+
f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
|
|
947
|
+
global COMM_TENSOR_CELL_CACHE
|
|
948
|
+
if comm_tensor_cache_key not in COMM_TENSOR_CELL_CACHE:
|
|
949
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, not match cache")
|
|
950
|
+
broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
|
|
951
|
+
broadcast_op_map_dict = {rank_id: broadcast_map for rank_id in broadcast_map.keys()}
|
|
952
|
+
COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key] = broadcast_op_map_dict
|
|
953
|
+
else:
|
|
954
|
+
comm_tensor_cache_key_rank_list = COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key]
|
|
955
|
+
if self_rank in comm_tensor_cache_key_rank_list:
|
|
956
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, match cache")
|
|
957
|
+
broadcast_map = comm_tensor_cache_key_rank_list[self_rank]
|
|
958
|
+
else:
|
|
959
|
+
logger.debug(f"comm_tensor_cache_key is {comm_tensor_cache_key}, but rank {self_rank} not match cache")
|
|
960
|
+
broadcast_map = _infer_pp_op_map(from_layout, to_layout, self_rank)
|
|
961
|
+
for rank_id in broadcast_map.keys():
|
|
962
|
+
COMM_TENSOR_CELL_CACHE[comm_tensor_cache_key][rank_id] = broadcast_map
|
|
963
|
+
return broadcast_map
|
|
964
|
+
logger.debug(f"from {from_layout} to {to_layout} no need to broadcast data across pp stages")
|
|
965
|
+
return {}
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def _is_multi_shard(in_tensor_map):
|
|
969
|
+
"""
|
|
970
|
+
whether the input tensor map is in multi shard
|
|
971
|
+
"""
|
|
972
|
+
for tensor_map in in_tensor_map:
|
|
973
|
+
if isinstance(tensor_map, (list, tuple)) and len(tensor_map) > 1:
|
|
974
|
+
return True
|
|
975
|
+
return False
|
|
976
|
+
|
|
977
|
+
|
|
978
|
+
def _insert_expand_layout_reshape(param_rank_map, from_info_tuple, to_info_tuple,
|
|
979
|
+
insert_from_reshape, insert_to_reshape):
|
|
980
|
+
""" insert layout expand op reshape """
|
|
981
|
+
from_dev_matrix = from_info_tuple[0]
|
|
982
|
+
from_tensor_map = from_info_tuple[1]
|
|
983
|
+
from_full_tensor_shape = from_info_tuple[2]
|
|
984
|
+
to_dev_matrix_origin = to_info_tuple[0]
|
|
985
|
+
to_tensor_map_origin = to_info_tuple[1]
|
|
986
|
+
origin_tensor_shape = to_info_tuple[2]
|
|
987
|
+
for param_rank, _ in param_rank_map.items():
|
|
988
|
+
if insert_from_reshape:
|
|
989
|
+
from_slice_tensor_shape = ()
|
|
990
|
+
from_tensor_strategy = _get_tensor_strategy(from_dev_matrix, from_tensor_map)
|
|
991
|
+
for i, item in enumerate(from_full_tensor_shape):
|
|
992
|
+
from_slice_tensor_shape += (item // from_tensor_strategy[i],)
|
|
993
|
+
param_rank_map.get(param_rank).insert(0, ('Reshape', list(from_slice_tensor_shape)))
|
|
994
|
+
if insert_to_reshape:
|
|
995
|
+
to_tensor_strategy = _get_tensor_strategy(to_dev_matrix_origin, to_tensor_map_origin)
|
|
996
|
+
to_slice_tensor_shape = ()
|
|
997
|
+
for i, item in enumerate(origin_tensor_shape):
|
|
998
|
+
to_slice_tensor_shape += (item // to_tensor_strategy[i],)
|
|
999
|
+
param_rank_map.get(param_rank).append(('Reshape', list(to_slice_tensor_shape)))
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
def _infer_reshard_op_map(from_layout, to_layout, self_rank):
|
|
1003
|
+
"""infer reshard op map"""
|
|
1004
|
+
from_layout_without_rank_list = from_layout[:-1]
|
|
1005
|
+
to_layout_without_rank_list = to_layout[:-1]
|
|
1006
|
+
if _is_multi_shard(from_layout[1]):
|
|
1007
|
+
# ((2, 1), 1) --> (2, 1, 1) expand tensormap
|
|
1008
|
+
new_layout = _expand_layout(from_layout[0], from_layout[1], from_layout[2])
|
|
1009
|
+
from_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
|
|
1010
|
+
if _is_multi_shard(to_layout[1]):
|
|
1011
|
+
new_layout = _expand_layout(to_layout[0], to_layout[1], to_layout[2])
|
|
1012
|
+
to_layout_without_rank_list = (new_layout[0], new_layout[1], new_layout[2])
|
|
1013
|
+
operator_map = _get_needed_rank_transform_operator_map_by_layouts(from_layout_without_rank_list,
|
|
1014
|
+
to_layout_without_rank_list,
|
|
1015
|
+
from_layout[3], self_rank,
|
|
1016
|
+
True)
|
|
1017
|
+
new_to_layout_info = to_layout[:-1]
|
|
1018
|
+
_insert_expand_layout_reshape(operator_map, from_layout_without_rank_list, new_to_layout_info,
|
|
1019
|
+
_is_multi_shard(from_layout[1]), _is_multi_shard(to_layout[1]))
|
|
1020
|
+
return operator_map
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def _get_resharding_operator_map(from_layout, to_layout, self_rank):
|
|
1024
|
+
"""
|
|
1025
|
+
Args:
|
|
1026
|
+
from_layout (tuple): Use tuple to present layout
|
|
1027
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
1028
|
+
to_layout (tuple): Use tuple to present layout
|
|
1029
|
+
(device_matrix(list), tensor_map(list), global_shape(list), rank_list(list))
|
|
1030
|
+
self_rank (int): rank_id
|
|
1031
|
+
"""
|
|
1032
|
+
reshard_op_cache_key = (
|
|
1033
|
+
f"{from_layout[0]}, {from_layout[1]}, {from_layout[2]}, {from_layout[3]}"
|
|
1034
|
+
f" -> "
|
|
1035
|
+
f"{to_layout[0]}, {to_layout[1]}, {from_layout[2]}, {to_layout[3]}")
|
|
1036
|
+
global RESHARD_OP_MAP_CACHE
|
|
1037
|
+
if reshard_op_cache_key not in RESHARD_OP_MAP_CACHE:
|
|
1038
|
+
operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
|
|
1039
|
+
op_map_dict = {rank_id: operator_map for rank_id in operator_map}
|
|
1040
|
+
RESHARD_OP_MAP_CACHE[reshard_op_cache_key] = op_map_dict
|
|
1041
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, not match cache")
|
|
1042
|
+
else:
|
|
1043
|
+
cache_rank_list_dict = RESHARD_OP_MAP_CACHE[reshard_op_cache_key]
|
|
1044
|
+
if self_rank in cache_rank_list_dict:
|
|
1045
|
+
operator_map = cache_rank_list_dict[self_rank]
|
|
1046
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, match cache")
|
|
1047
|
+
else:
|
|
1048
|
+
logger.debug(f"reshard_op_cache_key is {reshard_op_cache_key}, "
|
|
1049
|
+
f"but rank {self_rank} is not match cache")
|
|
1050
|
+
operator_map = _infer_reshard_op_map(from_layout, to_layout, self_rank)
|
|
1051
|
+
for rank_id in operator_map:
|
|
1052
|
+
RESHARD_OP_MAP_CACHE[reshard_op_cache_key][rank_id] = operator_map
|
|
1053
|
+
return operator_map
|
|
@@ -111,7 +111,7 @@ def _check_moe_config(moe_config=None, parallel_config=None):
|
|
|
111
111
|
"""
|
|
112
112
|
if not isinstance(moe_config, MoEConfig):
|
|
113
113
|
raise TypeError(f"'moe_config' must be an instance of MoEConfig, but got {type(moe_config).__name__}.")
|
|
114
|
-
use_moe =
|
|
114
|
+
use_moe = moe_config.expert_num > 1
|
|
115
115
|
if use_moe is False:
|
|
116
116
|
return
|
|
117
117
|
if moe_config.expert_num % parallel_config.expert_parallel != 0:
|
|
@@ -1365,7 +1365,12 @@ class MultiHeadAttention(Cell):
|
|
|
1365
1365
|
class TransformerEncoderLayer(Cell):
|
|
1366
1366
|
r"""
|
|
1367
1367
|
Transformer Encoder Layer. This is an implementation of the single layer of the transformer
|
|
1368
|
-
encoder layer, including
|
|
1368
|
+
encoder layer, mainly including Multi-Head Attention, Feed Forward, Add and LayerNorm layer.
|
|
1369
|
+
|
|
1370
|
+
The TransformerEncoderLayer structure is shown in the following figure:
|
|
1371
|
+
|
|
1372
|
+
.. image:: ../images/TransformerEncoderLayer.png
|
|
1373
|
+
:align: center
|
|
1369
1374
|
|
|
1370
1375
|
Args:
|
|
1371
1376
|
batch_size(int): The batch size of the input tensor when do increnmental prediction. Should be a positive
|
|
@@ -1448,7 +1453,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1448
1453
|
>>> # When use use_past=True, it includes two steps to implement the incremental prediction.
|
|
1449
1454
|
>>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
|
|
1450
1455
|
>>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
|
|
1451
|
-
>>> init_reset = Tensor([True], mstype.
|
|
1456
|
+
>>> init_reset = Tensor([True], mstype.bool)
|
|
1452
1457
|
>>> # Set is_first_iteration=True to generate the full memory states
|
|
1453
1458
|
>>> model = TransformerEncoderLayer(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
|
|
1454
1459
|
... num_heads=2, use_past=True)
|
|
@@ -1462,7 +1467,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1462
1467
|
(2, 2, 16, 4)
|
|
1463
1468
|
>>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
|
|
1464
1469
|
>>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
|
|
1465
|
-
>>> init_reset = Tensor([False], mstype.
|
|
1470
|
+
>>> init_reset = Tensor([False], mstype.bool)
|
|
1466
1471
|
>>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
|
|
1467
1472
|
>>> # the full sequence.
|
|
1468
1473
|
>>> model.add_flags_recursive(is_first_iteration=False)
|
|
@@ -1532,7 +1537,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1532
1537
|
"and parallel_config. model_parallel is {}."
|
|
1533
1538
|
.format(ffn_hidden_size, parallel_config.model_parallel))
|
|
1534
1539
|
_check_moe_config(moe_config, parallel_config)
|
|
1535
|
-
self.use_moe =
|
|
1540
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1536
1541
|
self.use_past = use_past
|
|
1537
1542
|
self.seq_length = seq_length
|
|
1538
1543
|
self.hidden_size = hidden_size
|
|
@@ -1607,7 +1612,7 @@ class TransformerEncoderLayer(Cell):
|
|
|
1607
1612
|
"and parallel_config. model_parallel is {}."
|
|
1608
1613
|
.format(ffn_hidden_size, parallel_config.model_parallel))
|
|
1609
1614
|
_check_moe_config(moe_config, parallel_config)
|
|
1610
|
-
self.use_moe =
|
|
1615
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1611
1616
|
self.use_past = use_past
|
|
1612
1617
|
self.seq_length = seq_length
|
|
1613
1618
|
self.hidden_size = hidden_size
|
|
@@ -1902,7 +1907,7 @@ class TransformerDecoderLayer(Cell):
|
|
|
1902
1907
|
parallel_config=default_dpmp_config):
|
|
1903
1908
|
super(TransformerDecoderLayer, self).__init__()
|
|
1904
1909
|
_check_moe_config(moe_config, parallel_config)
|
|
1905
|
-
self.use_moe =
|
|
1910
|
+
self.use_moe = moe_config.expert_num > 1
|
|
1906
1911
|
config_to_attention = parallel_config.dpmp if self.use_moe else parallel_config
|
|
1907
1912
|
if batch_size or use_past:
|
|
1908
1913
|
Validator.check_positive_int(batch_size)
|
|
@@ -2370,7 +2375,7 @@ class TransformerEncoder(Cell):
|
|
|
2370
2375
|
>>> # When use use_past=True, it includes two steps to implement the incremental prediction.
|
|
2371
2376
|
>>> # Step 1: set is_first_iteration=True, and input the full sequence length's state.
|
|
2372
2377
|
>>> batch_valid_length = Tensor(np.ones((2,)), mstype.int32)
|
|
2373
|
-
>>> init_reset = Tensor([True], mstype.
|
|
2378
|
+
>>> init_reset = Tensor([True], mstype.bool)
|
|
2374
2379
|
>>> # Set is_first_iteration=True to generate the full memory states
|
|
2375
2380
|
>>> model = TransformerEncoder(batch_size=2, hidden_size=8, ffn_hidden_size=64, seq_length=16,
|
|
2376
2381
|
... num_heads=2, num_layers=2, use_past=True)
|
|
@@ -2384,7 +2389,7 @@ class TransformerEncoder(Cell):
|
|
|
2384
2389
|
(2, 2, 16, 4)
|
|
2385
2390
|
>>> encoder_input_value = Tensor(np.ones((2, 1, 8)), mstype.float32)
|
|
2386
2391
|
>>> encoder_input_mask = Tensor(np.ones((2, 1, 16)), mstype.float16)
|
|
2387
|
-
>>> init_reset = Tensor([False], mstype.
|
|
2392
|
+
>>> init_reset = Tensor([False], mstype.bool)
|
|
2388
2393
|
>>> # Step 2: set is_first_iteration=False, and pass the single word to run the prediction rather than
|
|
2389
2394
|
>>> # the full sequence.
|
|
2390
2395
|
>>> model.add_flags_recursive(is_first_iteration=False)
|
|
@@ -2440,7 +2445,7 @@ class TransformerEncoder(Cell):
|
|
|
2440
2445
|
super(TransformerEncoder, self).__init__()
|
|
2441
2446
|
_check_config(parallel_config)
|
|
2442
2447
|
_check_moe_config(moe_config, parallel_config)
|
|
2443
|
-
self.use_moe =
|
|
2448
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2444
2449
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2445
2450
|
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2446
2451
|
self.add = P.Add()
|
|
@@ -2677,7 +2682,7 @@ class TransformerDecoder(Cell):
|
|
|
2677
2682
|
super(TransformerDecoder, self).__init__()
|
|
2678
2683
|
_check_moe_config(moe_config, parallel_config)
|
|
2679
2684
|
_check_config(parallel_config)
|
|
2680
|
-
self.use_moe =
|
|
2685
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2681
2686
|
config_to_layer = parallel_config.moe_parallel_config if self.use_moe else parallel_config.dp_mp_config
|
|
2682
2687
|
if _get_parallel_mode() in (ParallelMode.AUTO_PARALLEL,):
|
|
2683
2688
|
self.add = P.Add()
|
|
@@ -2959,7 +2964,7 @@ class Transformer(Cell):
|
|
|
2959
2964
|
if not lambda_func:
|
|
2960
2965
|
lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
|
|
2961
2966
|
_check_moe_config(moe_config, parallel_config)
|
|
2962
|
-
self.use_moe =
|
|
2967
|
+
self.use_moe = moe_config.expert_num > 1
|
|
2963
2968
|
self.add = P.Add()
|
|
2964
2969
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
2965
2970
|
if encoder_layers > 0:
|
|
@@ -3026,7 +3031,7 @@ class Transformer(Cell):
|
|
|
3026
3031
|
if not lambda_func:
|
|
3027
3032
|
lambda_func = _get_lambda_func(total_layer=encoder_layers + decoder_layers)
|
|
3028
3033
|
_check_moe_config(moe_config, parallel_config)
|
|
3029
|
-
self.use_moe =
|
|
3034
|
+
self.use_moe = moe_config.expert_num > 1
|
|
3030
3035
|
self.add = P.Add().shard(((), ()))
|
|
3031
3036
|
self.aux_loss = Tensor(0.0, mstype.float32)
|
|
3032
3037
|
if encoder_layers > 0:
|
mindspore/parallel/_utils.py
CHANGED
|
@@ -21,7 +21,7 @@ import mindspore as ms
|
|
|
21
21
|
from mindspore import context, log as logger
|
|
22
22
|
from mindspore._c_expression import reset_op_id, reset_op_id_with_offset
|
|
23
23
|
from mindspore.common.tensor import Tensor
|
|
24
|
-
from mindspore.common.dtype import
|
|
24
|
+
from mindspore.common.dtype import _dtype_to_nptype
|
|
25
25
|
from mindspore.common import dtype as mstype
|
|
26
26
|
from mindspore.communication.management import get_group_size, get_rank
|
|
27
27
|
from mindspore.communication._comm_helper import _is_initialized
|
|
@@ -156,7 +156,7 @@ def _is_in_auto_parallel_mode():
|
|
|
156
156
|
|
|
157
157
|
|
|
158
158
|
def _is_parallel_mode():
|
|
159
|
-
if not _is_initialized()
|
|
159
|
+
if not _is_initialized():
|
|
160
160
|
return False
|
|
161
161
|
if os.getenv("RUN_MODE") != "predict":
|
|
162
162
|
return False
|
|
@@ -173,12 +173,6 @@ def _is_in_hybrid_parallel_mode():
|
|
|
173
173
|
return _get_parallel_mode() == ms.ParallelMode.HYBRID_PARALLEL
|
|
174
174
|
|
|
175
175
|
|
|
176
|
-
def _is_pynative_parallel():
|
|
177
|
-
parallel_mode = context.get_auto_parallel_context('parallel_mode')
|
|
178
|
-
return context.get_context('mode') == context.PYNATIVE_MODE and parallel_mode in (
|
|
179
|
-
context.ParallelMode.SEMI_AUTO_PARALLEL, context.ParallelMode.AUTO_PARALLEL)
|
|
180
|
-
|
|
181
|
-
|
|
182
176
|
def _get_full_batch():
|
|
183
177
|
"""Get whether to use full_batch."""
|
|
184
178
|
return auto_parallel_context().get_full_batch()
|
|
@@ -452,7 +446,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
|
|
|
452
446
|
batchsize_per_device = item
|
|
453
447
|
else:
|
|
454
448
|
new_shape += (item,)
|
|
455
|
-
new_tensor_numpy = np.zeros(new_shape,
|
|
449
|
+
new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_)) # pylint:disable=protected-access
|
|
456
450
|
start = stage_rank * batchsize_per_device
|
|
457
451
|
new_tensor_numpy[start: start + batchsize_per_device] = data.asnumpy()
|
|
458
452
|
else:
|
|
@@ -466,7 +460,7 @@ def _to_full_tensor(elem, global_device_num, global_rank, scaling_sens=None):
|
|
|
466
460
|
end = (stage_rank % dataset_strategy[index][i] + 1) * item
|
|
467
461
|
s = slice(start, end, 1)
|
|
468
462
|
slice_index += (s,)
|
|
469
|
-
new_tensor_numpy = np.zeros(new_shape,
|
|
463
|
+
new_tensor_numpy = np.zeros(new_shape, _dtype_to_nptype(type_)) # pylint:disable=protected-access
|
|
470
464
|
new_tensor_numpy[slice_index] = data.asnumpy()
|
|
471
465
|
new_tensor = Tensor(new_tensor_numpy, dtype=type_)
|
|
472
466
|
lst.append(new_tensor)
|
|
@@ -773,7 +767,7 @@ def _grads_divided_by_device_num_if_recomputation(grads):
|
|
|
773
767
|
"""
|
|
774
768
|
If in pynative parallel and full_batch is True, divide grads by device num to ensure that the gradients is correct.
|
|
775
769
|
"""
|
|
776
|
-
if not
|
|
770
|
+
if not _get_full_batch():
|
|
777
771
|
return grads
|
|
778
772
|
|
|
779
773
|
device_num = _get_device_num()
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Cell of auto parallel"""
|
|
16
16
|
import os
|
|
17
|
+
from mindspore import jit
|
|
17
18
|
from mindspore.nn.cell import Cell
|
|
18
19
|
from mindspore.parallel.shard import Layout
|
|
19
20
|
from mindspore.communication.management import get_rank, get_group_size
|
|
@@ -162,6 +163,10 @@ class AutoParallel(Cell):
|
|
|
162
163
|
super(AutoParallel, self).__init__(auto_prefix=False)
|
|
163
164
|
self.network = network
|
|
164
165
|
|
|
166
|
+
if parallel_mode not in ["semi_auto", "sharding_propagation", "recursive_programming"]:
|
|
167
|
+
raise ValueError("the argument 'parallel_mode' must be one of ['semi_auto', 'sharding_propagation'," \
|
|
168
|
+
" 'recursive_programming'], but got the value : {} .".format(parallel_mode))
|
|
169
|
+
|
|
165
170
|
self._parallel_mode = parallel_mode
|
|
166
171
|
|
|
167
172
|
self._global_rank = get_rank()
|
|
@@ -260,8 +265,10 @@ class AutoParallel(Cell):
|
|
|
260
265
|
self._save_strategy_file_path = file_path
|
|
261
266
|
|
|
262
267
|
def disable_strategy_file_only_for_trainable_params(self):
|
|
263
|
-
"""
|
|
264
|
-
|
|
268
|
+
"""
|
|
269
|
+
By default, MindSpore only loads and saves trainable parameters. This API enables the loading and saving of
|
|
270
|
+
non-trainable parameters as well.
|
|
271
|
+
"""
|
|
265
272
|
self._only_trainable_params = False
|
|
266
273
|
|
|
267
274
|
def save_operator_strategy_file(self, file_path):
|
|
@@ -275,7 +282,8 @@ class AutoParallel(Cell):
|
|
|
275
282
|
Note:
|
|
276
283
|
- It only works when `parallel_mode=sharding_propagation`.
|
|
277
284
|
- When performing distributed training, users can first save the strategy using dryrun on a single device
|
|
278
|
-
and then load strategy to perform distributed training.
|
|
285
|
+
and then load strategy to perform distributed training. Note that only the first device of each node will
|
|
286
|
+
save the strategy file, so the simulated rank id specified by Dryrun must be divisible by 8.
|
|
279
287
|
|
|
280
288
|
Args:
|
|
281
289
|
file_path (str): Path to save parallel strategy json, must be an absolute path.
|
|
@@ -410,7 +418,7 @@ class AutoParallel(Cell):
|
|
|
410
418
|
raise ValueError("For 'AutoParallel.dataset_strategy', the argument "
|
|
411
419
|
"'config' must be 'full_batch' or 'data_parallel', but got the value : {}."
|
|
412
420
|
.format(config))
|
|
413
|
-
self._full_batch =
|
|
421
|
+
self._full_batch = config == "full_batch"
|
|
414
422
|
self._dataset_strategy_config = config
|
|
415
423
|
return
|
|
416
424
|
if not isinstance(config, tuple):
|
|
@@ -505,17 +513,18 @@ class AutoParallel(Cell):
|
|
|
505
513
|
raise ValueError("For 'AutoParallel.pipeline', the argument 'stages' "
|
|
506
514
|
"must be larger than zero, but got value: {}.".format(stages))
|
|
507
515
|
if not isinstance(output_broadcast, bool):
|
|
508
|
-
raise TypeError("For 'AutoParallel.pipeline', the argument '
|
|
516
|
+
raise TypeError("For 'AutoParallel.pipeline', the argument 'output_broadcast' "
|
|
509
517
|
"must be bool type, but got the type : {}.".format(type(output_broadcast)))
|
|
510
518
|
if not isinstance(interleave, bool):
|
|
511
|
-
raise TypeError("For 'AutoParallel.pipeline', the argument '
|
|
519
|
+
raise TypeError("For 'AutoParallel.pipeline', the argument 'interleave' "
|
|
512
520
|
"must be bool type, but got the type : {}.".format(type(interleave)))
|
|
513
521
|
if not isinstance(scheduler, str):
|
|
514
|
-
raise TypeError("For 'AutoParallel.pipeline', the argument '
|
|
522
|
+
raise TypeError("For 'AutoParallel.pipeline', the argument 'scheduler' "
|
|
515
523
|
"must be str type, but got the type : {}.".format(type(scheduler)))
|
|
516
|
-
if scheduler not in ("1f1b", "gpipe"):
|
|
524
|
+
if scheduler not in ("1f1b", "gpipe", "seqpipe", "seqvpp", "seqsmartvpp", "zero_bubble_v"):
|
|
517
525
|
raise ValueError("For 'AutoParallel.pipeline', the argument "
|
|
518
|
-
"'scheduler' must be '1f1b'
|
|
526
|
+
"'scheduler' must be '1f1b'/'gpipe'/'seqpipe'/'seqvpp'/'seqsmartvpp'/'zero_bubble_v' ," \
|
|
527
|
+
" but got the value : {}."
|
|
519
528
|
.format(scheduler))
|
|
520
529
|
self._pipeline_stages = stages
|
|
521
530
|
self._pipeline_result_broadcast = output_broadcast
|
|
@@ -658,8 +667,11 @@ class AutoParallel(Cell):
|
|
|
658
667
|
- recomputation_communication_overlap (bool): Enable overlap between recompute ops and communication ops
|
|
659
668
|
if True.
|
|
660
669
|
Default: False.
|
|
661
|
-
- grad_matmul_communication_overlap (bool):
|
|
662
|
-
tensor parallel communication
|
|
670
|
+
- grad_matmul_communication_overlap (bool, str): When set to ``True``, it indicates that overlap
|
|
671
|
+
between dw matmul and tensor parallel communication is enabled. When set to ``False``, it indicates
|
|
672
|
+
that this feature is disabled. When set to str, it only optimizes the specified communication
|
|
673
|
+
operator types, with operators separated by ``,``. For example, "AlltoAll,AlltoAllV" indicates that
|
|
674
|
+
only ``AlltoAll`` and ``AlltoAllV`` are optimized. Default: ``False``.
|
|
663
675
|
- grad_fa_allgather_overlap (bool): Enable overlap between duplicated allgather by recomputing
|
|
664
676
|
in sequence parallel and flashattentionscoregrad ops if True. Default: False.
|
|
665
677
|
- enable_communication_fusion (bool): Enable communication fusion to optimize the number of
|
|
@@ -674,7 +686,9 @@ class AutoParallel(Cell):
|
|
|
674
686
|
and optimizer parallel allgather communication if True. Currently, do not support
|
|
675
687
|
`O2 <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.JitConfig.html>`_
|
|
676
688
|
Default: False.
|
|
677
|
-
- computation_communication_fusion_level (int): Enable the fusion between compute and communicate
|
|
689
|
+
- computation_communication_fusion_level (int): Enable the fusion between compute and communicate,
|
|
690
|
+
which fuses communication tasks and computing tasks, allows for partial pipelining and parallel
|
|
691
|
+
execution of these tasks during operation, thereby enhancing performance.
|
|
678
692
|
Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
|
|
679
693
|
This is an experimental configuration, may be changed or canceled in the future.
|
|
680
694
|
|
|
@@ -685,6 +699,12 @@ class AutoParallel(Cell):
|
|
|
685
699
|
- 2: Apply fusion to backward nodes.
|
|
686
700
|
|
|
687
701
|
- 3: Apply fusion to all nodes.
|
|
702
|
+
|
|
703
|
+
.. warning::
|
|
704
|
+
After setting ``export MS_ENABLE_LCCL=on``, the fusion operator based on memory semantics will be
|
|
705
|
+
used. Please note that this operator is still in an experimental stage and may be changed or
|
|
706
|
+
removed in the future.
|
|
707
|
+
|
|
688
708
|
- dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
|
|
689
709
|
support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
|
|
690
710
|
|
|
@@ -728,5 +748,6 @@ class AutoParallel(Cell):
|
|
|
728
748
|
self._transformer_opt_config = file_path
|
|
729
749
|
ctx.ascend_config['parallel_speed_up_json_path'] = file_path
|
|
730
750
|
|
|
751
|
+
@jit
|
|
731
752
|
def construct(self, *args, **kwargs):
|
|
732
753
|
return self.network(*args, **kwargs)
|
|
@@ -15,10 +15,10 @@
|
|
|
15
15
|
"""Convert distributed checkpoint"""
|
|
16
16
|
from __future__ import absolute_import
|
|
17
17
|
|
|
18
|
-
|
|
18
|
+
__all__ = ["rank_list_for_convert", "convert_checkpoint_by_rank", "convert_checkpoints"]
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
|
|
21
|
+
transform_checkpoints
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
def rank_list_for_convert(rank_id, src_strategy_file=None, dst_strategy_file=None):
|
|
@@ -582,6 +582,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
|
|
|
582
582
|
The number of multiprocess settings is related to the size of the host, and it is not recommended to set it
|
|
583
583
|
too large, otherwise it may cause freezing.
|
|
584
584
|
|
|
585
|
+
This function does not support converting remove_redundancy's checkpoint file.
|
|
586
|
+
|
|
585
587
|
Args:
|
|
586
588
|
src_checkpoints_dir (str): The source checkpoints directory.
|
|
587
589
|
dst_checkpoints_dir (str): The destination checkpoints directory to save the converted checkpoints.
|
|
@@ -1163,6 +1165,8 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1163
1165
|
train_strategy_filename = ms.context.get_auto_parallel_context("strategy_ckpt_load_file")
|
|
1164
1166
|
|
|
1165
1167
|
_train_strategy = build_searched_strategy(train_strategy_filename)
|
|
1168
|
+
if not _train_strategy:
|
|
1169
|
+
return True
|
|
1166
1170
|
train_strategy = _convert_to_list(_train_strategy)
|
|
1167
1171
|
|
|
1168
1172
|
train_dev_count = 1
|
|
@@ -1187,7 +1191,7 @@ def load_distributed_checkpoint(network, checkpoint_filenames=None, predict_stra
|
|
|
1187
1191
|
param_not_in_ckpt = []
|
|
1188
1192
|
for _, param in network.parameters_and_names():
|
|
1189
1193
|
sliced_params = []
|
|
1190
|
-
if param.name not in rank_list
|
|
1194
|
+
if param.name not in rank_list:
|
|
1191
1195
|
param_not_in_strategy.append(param.name)
|
|
1192
1196
|
continue
|
|
1193
1197
|
if param.name not in param_total_dict:
|