mindspore 2.3.0__cp39-none-any.whl → 2.3.0rc2__cp39-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +0 -1512
- mindspore/__init__.py +1 -2
- mindspore/_c_dataengine.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +25 -5
- mindspore/_extends/graph_kernel/model/graph_parallel.py +1 -1
- mindspore/_extends/parse/__init__.py +2 -2
- mindspore/_extends/parse/compile_config.py +0 -29
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +5 -21
- mindspore/_extends/parse/resources.py +7 -5
- mindspore/_extends/parse/standard_method.py +59 -40
- mindspore/_mindspore_offline_debug.cpython-39-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +5 -26
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +1 -1
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/less_batch_normalization.py +6 -9
- mindspore/common/__init__.py +1 -8
- mindspore/common/_register_for_tensor.py +9 -8
- mindspore/common/api.py +65 -275
- mindspore/common/dtype.py +4 -8
- mindspore/common/dump.py +5 -2
- mindspore/common/jit_config.py +1 -1
- mindspore/common/lazy_inline.py +2 -14
- mindspore/common/parameter.py +15 -14
- mindspore/common/recompute.py +5 -20
- mindspore/common/sparse_tensor.py +6 -21
- mindspore/common/tensor.py +52 -100
- mindspore/communication/__init__.py +11 -6
- mindspore/communication/management.py +94 -92
- mindspore/context.py +18 -180
- mindspore/dataset/engine/datasets.py +46 -69
- mindspore/dataset/engine/datasets_user_defined.py +53 -72
- mindspore/dataset/engine/datasets_vision.py +2 -2
- mindspore/dataset/engine/queue.py +38 -56
- mindspore/dataset/engine/validators.py +5 -11
- mindspore/dataset/vision/__init__.py +5 -5
- mindspore/dataset/vision/c_transforms.py +5 -5
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +46 -591
- mindspore/dataset/vision/utils.py +1 -121
- mindspore/dataset/vision/validators.py +3 -9
- mindspore/hal/__init__.py +1 -7
- mindspore/hal/device.py +1 -1
- mindspore/include/api/model.h +0 -3
- mindspore/include/dataset/vision.h +2 -54
- mindspore/include/mindapi/base/types.h +0 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +0 -35
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +0 -2
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +0 -72
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/{aclnn_all_finite.h → aclnn_add_custom.h} +11 -9
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_decoder_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/include/aclnn_prompt_kv_cache.h +1 -1
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_api/lib/libcust_opapi.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +12 -184
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +15 -7
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.cpp +81 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/add_custom.py +134 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/decoder_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/prompt_kv_cache.py +31 -77
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/op_tiling/liboptiling.so +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/inc/op_proto.h +5 -4
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/liblowlatency_collective.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/DeviceBin +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/device/ascend910b/bin/ascend910b.bin +286 -275
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/add/add_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/apply_rotary_pos_emb_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/asdop/asd_op_impl.h +0 -3
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/backend_param.h +0 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/cast/cast_tiling.h +45 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/compare/compare_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_impl.h +4 -8
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/flash_attention_score_tiling.h +4 -11
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/flash_attention_score/kernel/flash_attention_score_mix_hwsync.h +0 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_kernel.h +0 -6
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/internal_rtbackend.h +75 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/kernel/matmul.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul/matmul_impl.h +3 -18
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_common_tiling.h +5 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/pp_matmul_info.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_common/tiling_data.h +3 -36
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/kernel/matmul_stridedslice_fusion.h +2 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/matmul_stridedslice/matmul_stridedslice_fusion_impl.h +4 -22
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/op_param.h +2 -16
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/kernel/paged_attention_mix_hwsync.h +3 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_impl.h +4 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/paged_attention/paged_attention_tiling.h +4 -9
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/attention_param.h +2 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_ext_param.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/matmul_qkv_param.h +4 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/sub_param.h +12 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/rms_norm/rms_norm_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/sub/sub_impl.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/tune_repo/matmul_table.h +1 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/backend.h +2 -10
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/elewise_utils.h +1 -5
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log.h +0 -1
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_tiling.h +0 -17
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/math.h +7 -2
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libAdd_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libSub_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_layernorm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libapply_rotary_pos_emb_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libcast_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libgelu_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libmatmul_stridedslice_fusion_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libnot_equal_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libreshape_and_cache_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/librms_norm_impl.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_bf16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bnsd_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/flash_attention_score_fp16_bsh_tri_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_bf16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bnsd_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/BSAttention/paged_attention_fp16_bsh_full_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblcal.so +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/lccl/lib/liblccl_wrapper.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/filewriter.py +2 -2
- mindspore/mint/__init__.py +40 -720
- mindspore/mint/nn/__init__.py +7 -89
- mindspore/mint/nn/functional.py +16 -165
- mindspore/mint/optim/adamw.py +16 -15
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +98 -97
- mindspore/nn/extend/basic.py +2 -2
- mindspore/nn/extend/embedding.py +1 -1
- mindspore/nn/extend/layer/normalization.py +5 -7
- mindspore/nn/generator.py +297 -0
- mindspore/nn/layer/activation.py +3 -4
- mindspore/nn/layer/basic.py +16 -79
- mindspore/nn/layer/conv.py +8 -17
- mindspore/nn/layer/embedding.py +4 -1
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +1 -1
- mindspore/nn/layer/pooling.py +0 -5
- mindspore/nn/layer/rnn_cells.py +2 -2
- mindspore/nn/loss/loss.py +19 -19
- mindspore/nn/optim/adasum.py +1 -1
- mindspore/nn/optim/sgd.py +2 -3
- mindspore/nn/probability/distribution/exponential.py +1 -1
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/wrap/cell_wrapper.py +1 -25
- mindspore/nn/wrap/loss_scale.py +1 -24
- mindspore/numpy/array_ops.py +1 -5
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/math_ops.py +8 -8
- mindspore/ops/__init__.py +1 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +16 -75
- mindspore/ops/_vmap/vmap_array_ops.py +0 -27
- mindspore/ops/_vmap/vmap_math_ops.py +1 -29
- mindspore/ops/_vmap/vmap_nn_ops.py +18 -19
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +8 -34
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +9 -2
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -26
- mindspore/ops/auto_generate/gen_extend_func.py +27 -603
- mindspore/ops/auto_generate/gen_ops_def.py +203 -993
- mindspore/ops/auto_generate/gen_ops_prim.py +402 -1946
- mindspore/ops/auto_generate/pyboost_inner_prim.py +20 -90
- mindspore/ops/composite/base.py +6 -3
- mindspore/ops/composite/math_ops.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +17 -24
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/extend/__init__.py +3 -2
- mindspore/ops/extend/array_func.py +51 -10
- mindspore/ops/extend/nn_func.py +78 -2
- mindspore/ops/function/__init__.py +13 -8
- mindspore/ops/function/array_func.py +179 -455
- mindspore/ops/function/clip_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +3 -3
- mindspore/ops/function/math_func.py +103 -117
- mindspore/ops/function/nn_func.py +163 -275
- mindspore/ops/function/other_func.py +2 -2
- mindspore/ops/function/random_func.py +69 -202
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/functional.py +327 -332
- mindspore/ops/operations/__init__.py +3 -13
- mindspore/ops/operations/_grad_ops.py +27 -3
- mindspore/ops/operations/_inner_ops.py +356 -53
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +65 -82
- mindspore/ops/operations/comm_ops.py +93 -784
- mindspore/ops/operations/custom_ops.py +28 -51
- mindspore/ops/operations/debug_ops.py +4 -4
- mindspore/ops/operations/inner_ops.py +2 -2
- mindspore/ops/operations/manually_defined/ops_def.py +4 -304
- mindspore/ops/operations/math_ops.py +50 -3
- mindspore/ops/operations/nn_ops.py +247 -14
- mindspore/ops/operations/other_ops.py +3 -3
- mindspore/ops/operations/random_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +1 -1
- mindspore/ops/primitive.py +8 -9
- mindspore/ops/silent_check.py +5 -5
- mindspore/ops_generate/arg_dtype_cast.py +9 -2
- mindspore/ops_generate/arg_handler.py +0 -26
- mindspore/ops_generate/gen_aclnn_implement.py +4 -1
- mindspore/ops_generate/gen_ops.py +4 -26
- mindspore/ops_generate/gen_pyboost_func.py +12 -41
- mindspore/ops_generate/gen_utils.py +0 -21
- mindspore/ops_generate/pyboost_utils.py +2 -7
- mindspore/ops_generate/template.py +0 -1
- mindspore/parallel/_auto_parallel_context.py +1 -21
- mindspore/parallel/_tensor.py +5 -0
- mindspore/parallel/_transformer/transformer.py +1 -1
- mindspore/parallel/_utils.py +1 -15
- mindspore/parallel/algo_parameter_config.py +3 -1
- mindspore/parallel/checkpoint_transform.py +9 -12
- mindspore/parallel/cluster/process_entity/_api.py +29 -28
- mindspore/parallel/cluster/process_entity/_utils.py +3 -13
- mindspore/parallel/cluster/run.py +16 -13
- mindspore/parallel/parameter_broadcast.py +2 -2
- mindspore/parallel/shard.py +17 -31
- mindspore/profiler/__init__.py +2 -3
- mindspore/profiler/common/util.py +2 -107
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +21 -8
- mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -82
- mindspore/profiler/parser/ascend_analysis/function_event.py +28 -43
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +27 -49
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +10 -15
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +20 -25
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +5 -5
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +1 -10
- mindspore/profiler/parser/ascend_hccl_generator.py +1 -4
- mindspore/profiler/parser/ascend_msprof_exporter.py +22 -43
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -7
- mindspore/profiler/parser/minddata_parser.py +3 -72
- mindspore/profiler/profiling.py +59 -176
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/common/namespace.py +5 -5
- mindspore/rewrite/parsers/assign_parser.py +0 -2
- mindspore/rewrite/parsers/class_def_parser.py +4 -8
- mindspore/run_check/_check_version.py +1 -1
- mindspore/scipy/fft.py +3 -1
- mindspore/scipy/linalg.py +3 -2
- mindspore/scipy/ops.py +3 -5
- mindspore/scipy/optimize/__init__.py +2 -2
- mindspore/train/__init__.py +4 -4
- mindspore/train/anf_ir_pb2.py +2 -8
- mindspore/train/callback/__init__.py +2 -5
- mindspore/train/callback/_backup_and_restore.py +2 -2
- mindspore/train/callback/_checkpoint.py +16 -104
- mindspore/train/callback/_landscape.py +1 -1
- mindspore/train/callback/_time_monitor.py +1 -1
- mindspore/train/data_sink.py +4 -5
- mindspore/train/dataset_helper.py +20 -45
- mindspore/train/model.py +38 -266
- mindspore/train/serialization.py +105 -256
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/METADATA +2 -2
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/RECORD +303 -420
- mindspore/_extends/pijit/__init__.py +0 -23
- mindspore/_extends/pijit/pijit_func_white_list.py +0 -343
- mindspore/common/file_system.py +0 -48
- mindspore/common/generator.py +0 -260
- mindspore/common/no_inline.py +0 -54
- mindspore/common/np_dtype.py +0 -25
- mindspore/communication/comm_func.py +0 -1140
- mindspore/hal/memory.py +0 -326
- mindspore/lib/libavcodec.so.59 +0 -0
- mindspore/lib/libavdevice.so.59 +0 -0
- mindspore/lib/libavfilter.so.8 +0 -0
- mindspore/lib/libavformat.so.59 +0 -0
- mindspore/lib/libavutil.so.57 +0 -0
- mindspore/lib/libmindspore_np_dtype.so +0 -0
- mindspore/lib/libswresample.so.4 +0 -0
- mindspore/lib/libswscale.so.6 +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.cpp +0 -326
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/custom_ascendc_ops_impl/dynamic/all_finite.py +0 -180
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_576ceaeef5870c451cab59af55ea46ad.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_86a73ff6e28d734c96bb8d3054f7dd18.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.json +0 -58
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/ascend910b/all_finite/AllFinite_f55e0ebaad1f2f572e43677336992fa0.o +0 -0
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/all_finite.json +0 -109
- mindspore/lib/plugin/ascend/custom_ascendc_ops/op_impl/ai_core/tbe/kernel/config/ascend910b/binary_info_config.json +0 -38
- mindspore/lib/plugin/ascend/custom_compiler/OWNERS +0 -12
- mindspore/lib/plugin/ascend/custom_compiler/setup.py +0 -255
- mindspore/lib/plugin/ascend/custom_compiler/start.sh +0 -26
- mindspore/lib/plugin/ascend/custom_compiler/template.json +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/acme_op.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/base_type.h +0 -133
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_creator.h +0 -32
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/op_param.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/include/tiling_info.h +0 -60
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/kernel_register.h +0 -37
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/platform_configs.h +0 -89
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/core/platform/rt_funcs.h +0 -135
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/add_op.h +0 -34
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_backoff_base.h +0 -62
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_elewise_op.h +0 -33
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_ops.h +0 -88
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/asd_pa_op.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/cast_op.h +0 -52
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/matmul_op.h +0 -95
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/asd_utils.h +0 -84
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/utils/comm_utils.h +0 -61
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/apply_rotary_pos_emb/kernel/apply_rotary_pos_emb_fp32.h +0 -224
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/and_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/div_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_impl.h +0 -48
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/elewise_binary_tiling.h +0 -25
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/and_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/div_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_base.h +0 -260
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/elewise_binary_kernel.h +0 -35
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/max_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/min_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/mul_kernel.h +0 -66
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/kernel/or_kernel.h +0 -46
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/max_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/min_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/mul_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_binary/or_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/abs_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_impl.h +0 -47
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/elewise_unary_tiling.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/exp_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/abs_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_base.h +0 -148
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/elewise_unary_kernel.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/exp_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/ln_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/not_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/reciprocal_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/relu_kernel.h +0 -55
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/rsqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/kernel/sqrt_kernel.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/ln_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/not_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/reciprocal_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/relu_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/rsqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/elewise_unary/sqrt_impl.h +0 -29
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_impl.h +0 -45
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/grouped_matmul_tiling.h +0 -187
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul.h +0 -245
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_interface.h +0 -24
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/kernel/grouped_matmul_utils.h +0 -111
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/grouped_matmul/tiling_data.h +0 -54
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/compare_param.h +0 -31
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/elewise_param.h +0 -41
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/param/grouped_matmul_param.h +0 -40
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/profiling_util.h +0 -364
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/log/log_utils.h +0 -69
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_creator.h +0 -39
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/register/kernel_registry.h +0 -114
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/utils/utils.h +0 -98
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MatMulPostFusionMixTactic/matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.json +0 -19
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aic_0.o +0 -0
- mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/MultiMatMulPostFusionMixTactic/multi_matmul_postfusion_mix_mix_aiv_0.o +0 -0
- mindspore/mint/linalg/__init__.py +0 -22
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/nn/layer/embedding_service_layer.py +0 -393
- mindspore/ops/function/reshard_func.py +0 -102
- mindspore/ops/operations/_infer_ops.py +0 -19
- mindspore/ops/operations/reshard_ops.py +0 -53
- mindspore/profiler/common/process_pool.py +0 -41
- mindspore/profiler/common/singleton.py +0 -28
- mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
- mindspore/profiler/parser/ascend_memory_generator.py +0 -185
- mindspore/train/callback/_cluster_monitor.py +0 -201
- mindspore/train/callback/_flops_collector.py +0 -238
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/WHEEL +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.3.0rc2.dist-info}/top_level.txt +0 -0
|
@@ -51,12 +51,17 @@ class ReduceOp:
|
|
|
51
51
|
.. note::
|
|
52
52
|
Before running the following examples, you need to configure the communication environment variables.
|
|
53
53
|
|
|
54
|
-
For Ascend
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
54
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
55
|
+
Please see the `rank table Startup
|
|
56
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
58
57
|
for more details.
|
|
59
58
|
|
|
59
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
60
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
61
|
+
|
|
62
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
63
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
64
|
+
|
|
60
65
|
This example should be run with multiple devices.
|
|
61
66
|
|
|
62
67
|
>>> import numpy as np
|
|
@@ -108,16 +113,15 @@ def check_hcom_group_valid(group, prim_name=None):
|
|
|
108
113
|
|
|
109
114
|
class AllReduce(Primitive):
|
|
110
115
|
"""
|
|
111
|
-
Reduces
|
|
112
|
-
returns the tensor which is all reduced.
|
|
116
|
+
Reduces the tensor data across all devices in such a way that all devices will get the same final result.
|
|
113
117
|
|
|
114
118
|
Note:
|
|
115
119
|
The tensors must have the same shape and format in all processes of the collection.
|
|
116
120
|
|
|
117
121
|
Args:
|
|
118
|
-
op (str
|
|
122
|
+
op (str): Specifies an operation used for element-wise reductions, like sum, prod, max, and min.
|
|
119
123
|
On the CPU, only 'sum' is supported. Default: ``ReduceOp.SUM`` .
|
|
120
|
-
group (str
|
|
124
|
+
group (str): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP`` , which
|
|
121
125
|
means ``"hccl_world_group"`` in Ascend, and ``"nccl_world_group"`` in GPU.
|
|
122
126
|
|
|
123
127
|
Inputs:
|
|
@@ -128,8 +132,8 @@ class AllReduce(Primitive):
|
|
|
128
132
|
The contents depend on the specified operation.
|
|
129
133
|
|
|
130
134
|
Raises:
|
|
131
|
-
TypeError: If any of `op` and `group` is not a str
|
|
132
|
-
|
|
135
|
+
TypeError: If any of `op` and `group` is not a str,
|
|
136
|
+
or fusion is not an integer, or the input's dtype is bool.
|
|
133
137
|
|
|
134
138
|
Supported Platforms:
|
|
135
139
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -138,12 +142,17 @@ class AllReduce(Primitive):
|
|
|
138
142
|
.. note::
|
|
139
143
|
Before running the following examples, you need to configure the communication environment variables.
|
|
140
144
|
|
|
141
|
-
For Ascend
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
145
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
146
|
+
Please see the `rank table Startup
|
|
147
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
145
148
|
for more details.
|
|
146
149
|
|
|
150
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
151
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
152
|
+
|
|
153
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
154
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
155
|
+
|
|
147
156
|
This example should be run with 2 devices.
|
|
148
157
|
|
|
149
158
|
>>> import numpy as np
|
|
@@ -151,7 +160,7 @@ class AllReduce(Primitive):
|
|
|
151
160
|
>>> from mindspore import Tensor
|
|
152
161
|
>>> from mindspore.ops import ReduceOp
|
|
153
162
|
>>> import mindspore.nn as nn
|
|
154
|
-
>>>
|
|
163
|
+
>>> import mindspore.ops as ops
|
|
155
164
|
>>>
|
|
156
165
|
>>> init()
|
|
157
166
|
>>> class Net(nn.Cell):
|
|
@@ -191,104 +200,16 @@ class AllReduce(Primitive):
|
|
|
191
200
|
self.add_prim_attr('no_eliminate', True)
|
|
192
201
|
|
|
193
202
|
|
|
194
|
-
class Reduce(PrimitiveWithInfer):
|
|
195
|
-
"""
|
|
196
|
-
Reduces tensors across the processes in the specified communication group, sends the result
|
|
197
|
-
to the target dest_rank(local rank), and returns the tensor which is sent to the target process.
|
|
198
|
-
|
|
199
|
-
Note:
|
|
200
|
-
Only process with destination rank receives the reduced output.
|
|
201
|
-
Support PyNative mode and Graph mode, but Graph mode only supports scenes with a graph compilation level of O0.
|
|
202
|
-
Other processes only get a tensor with shape [1], which has no mathematical meaning.
|
|
203
|
-
|
|
204
|
-
Args:
|
|
205
|
-
dest_rank (int): The target process(local rank) in the specific group that receives the reduced output.
|
|
206
|
-
op (str, optional): Specifies an operation used for element-wise reductions, like sum, prod, max, and min.
|
|
207
|
-
On the CPU, only 'sum' is supported. Default: ``ReduceOp.SUM`` .
|
|
208
|
-
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP`` , which
|
|
209
|
-
means ``"hccl_world_group"`` in Ascend, and ``"nccl_world_group"`` in GPU.
|
|
210
|
-
|
|
211
|
-
Inputs:
|
|
212
|
-
- **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
213
|
-
|
|
214
|
-
Outputs:
|
|
215
|
-
Tensor. Return the tensor in the specific rank of the process after reduction.
|
|
216
|
-
The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
217
|
-
|
|
218
|
-
Raises:
|
|
219
|
-
TypeError: If the type of the first input parameter is not Tensor,
|
|
220
|
-
or any of `op` and `group` is not a str.
|
|
221
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
222
|
-
|
|
223
|
-
Supported Platforms:
|
|
224
|
-
``Ascend``
|
|
225
|
-
|
|
226
|
-
Examples:
|
|
227
|
-
.. note::
|
|
228
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
229
|
-
|
|
230
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method without any third-party
|
|
231
|
-
or configuration file dependencies.
|
|
232
|
-
Please see the `msrun start up
|
|
233
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
234
|
-
for more details.
|
|
235
|
-
|
|
236
|
-
This example should be run with 4 devices.
|
|
237
|
-
|
|
238
|
-
>>> from mindspore import ops
|
|
239
|
-
>>> import mindspore.nn as nn
|
|
240
|
-
>>> from mindspore.communication import init
|
|
241
|
-
>>> from mindspore import Tensor
|
|
242
|
-
>>> import numpy as np
|
|
243
|
-
>>> # Launch 4 processes.
|
|
244
|
-
>>> init()
|
|
245
|
-
>>> class ReduceNet(nn.Cell):
|
|
246
|
-
>>> def __init__(self):
|
|
247
|
-
>>> super(Net, self).__init__()
|
|
248
|
-
>>> self.reduce = ops.Reduce(dest_rank=1)
|
|
249
|
-
>>>
|
|
250
|
-
>>> def construct(self, x):
|
|
251
|
-
>>> out = self.reduce(x)
|
|
252
|
-
>>> return out
|
|
253
|
-
>>> input = Tensor(np.ones([2, 8]).astype(np.float32))
|
|
254
|
-
>>> net = ReduceNet()
|
|
255
|
-
>>> output = net(input)
|
|
256
|
-
>>> print(output)
|
|
257
|
-
Process with rank 1: [[4. 4. 4. 4. 4. 4. 4. 4.]
|
|
258
|
-
[4. 4. 4. 4. 4. 4. 4. 4.]],
|
|
259
|
-
Other proesses: [0.].
|
|
260
|
-
"""
|
|
261
|
-
|
|
262
|
-
@prim_attr_register
|
|
263
|
-
def __init__(self, dest_rank, op=ReduceOp.SUM, group=GlobalComm.WORLD_COMM_GROUP):
|
|
264
|
-
validator.check_value_type('group', _get_group(group), (str,), self.name)
|
|
265
|
-
validator.check_value_type('op', op, (type(ReduceOp.SUM),), self.name)
|
|
266
|
-
self.dest_rank = dest_rank
|
|
267
|
-
self.op = op
|
|
268
|
-
self.group = _get_group(group)
|
|
269
|
-
self.add_prim_attr('group', _get_group(group))
|
|
270
|
-
self.add_prim_attr('dest_rank', dest_rank)
|
|
271
|
-
|
|
272
|
-
def infer_shape(self, x_shape):
|
|
273
|
-
# The process with dest_rank returns the reduced output.
|
|
274
|
-
# Other processes only gets a tensor with shape [1], which has no mathematical meaning.
|
|
275
|
-
if self.dest_rank == get_rank():
|
|
276
|
-
return x_shape
|
|
277
|
-
return [1]
|
|
278
|
-
|
|
279
|
-
def infer_dtype(self, x_dtype):
|
|
280
|
-
return x_dtype
|
|
281
|
-
|
|
282
|
-
|
|
283
203
|
class AllGather(PrimitiveWithInfer):
|
|
284
204
|
"""
|
|
285
|
-
Gathers tensors from the specified communication group
|
|
205
|
+
Gathers tensors from the specified communication group.
|
|
286
206
|
|
|
287
207
|
Note:
|
|
288
208
|
- The tensors must have the same shape and format in all processes of the collection.
|
|
209
|
+
- Currently only supports GRAPH_MODE and it should be called in Cell.
|
|
289
210
|
|
|
290
211
|
Args:
|
|
291
|
-
group (str
|
|
212
|
+
group (str): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP`` , which
|
|
292
213
|
means ``"hccl_world_group"`` in Ascend, and ``"nccl_world_group"`` in GPU.
|
|
293
214
|
|
|
294
215
|
Inputs:
|
|
@@ -302,7 +223,6 @@ class AllGather(PrimitiveWithInfer):
|
|
|
302
223
|
TypeError: If `group` is not a str.
|
|
303
224
|
ValueError: If the local rank id of the calling process in the group
|
|
304
225
|
is larger than the group's rank size.
|
|
305
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
306
226
|
|
|
307
227
|
Supported Platforms:
|
|
308
228
|
``Ascend`` ``GPU``
|
|
@@ -311,17 +231,22 @@ class AllGather(PrimitiveWithInfer):
|
|
|
311
231
|
.. note::
|
|
312
232
|
Before running the following examples, you need to configure the communication environment variables.
|
|
313
233
|
|
|
314
|
-
For Ascend
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
234
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
235
|
+
Please see the `rank table Startup
|
|
236
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
318
237
|
for more details.
|
|
319
238
|
|
|
239
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
240
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
241
|
+
|
|
242
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
243
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
244
|
+
|
|
320
245
|
This example should be run with 2 devices.
|
|
321
246
|
|
|
322
247
|
>>> import numpy as np
|
|
323
248
|
>>> import mindspore as ms
|
|
324
|
-
>>>
|
|
249
|
+
>>> import mindspore.ops as ops
|
|
325
250
|
>>> import mindspore.nn as nn
|
|
326
251
|
>>> from mindspore.communication import init
|
|
327
252
|
>>> from mindspore import Tensor
|
|
@@ -375,25 +300,6 @@ class AllGather(PrimitiveWithInfer):
|
|
|
375
300
|
return x_dtype
|
|
376
301
|
|
|
377
302
|
|
|
378
|
-
class AShardIdentity(PrimitiveWithInfer):
|
|
379
|
-
"""
|
|
380
|
-
Auto parallel virtual operator. Identity operator only for shard function.
|
|
381
|
-
Do nothing in terms of infer_shape, infer_dtype, and the tensor.
|
|
382
|
-
|
|
383
|
-
It is only for internal use of parallel modules and cannot be called by users.
|
|
384
|
-
"""
|
|
385
|
-
|
|
386
|
-
@prim_attr_register
|
|
387
|
-
def __init__(self):
|
|
388
|
-
pass
|
|
389
|
-
|
|
390
|
-
def infer_shape(self, x_shape):
|
|
391
|
-
return x_shape
|
|
392
|
-
|
|
393
|
-
def infer_dtype(self, x_dtype):
|
|
394
|
-
return x_dtype
|
|
395
|
-
|
|
396
|
-
|
|
397
303
|
class _MiniStepAllGather(PrimitiveWithInfer):
|
|
398
304
|
"""
|
|
399
305
|
Auto parallel virtual operator. Do nothing in forward, do reducescatter in backward in mini-step. It is only for
|
|
@@ -521,8 +427,7 @@ class _HostAllGather(PrimitiveWithInfer):
|
|
|
521
427
|
|
|
522
428
|
class ReduceScatter(Primitive):
|
|
523
429
|
r"""
|
|
524
|
-
Reduces and scatters tensors from the specified communication group
|
|
525
|
-
and returns the tensor which is reduced and scattered.
|
|
430
|
+
Reduces and scatters tensors from the specified communication group.
|
|
526
431
|
|
|
527
432
|
Note:
|
|
528
433
|
The tensors must have the same shape and format in all processes of the collection.
|
|
@@ -543,7 +448,6 @@ class ReduceScatter(Primitive):
|
|
|
543
448
|
Raises:
|
|
544
449
|
TypeError: If any of operation and group is not a string.
|
|
545
450
|
ValueError: If the first dimension of the input cannot be divided by the rank_size.
|
|
546
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
547
451
|
|
|
548
452
|
Supported Platforms:
|
|
549
453
|
``Ascend`` ``GPU``
|
|
@@ -552,12 +456,17 @@ class ReduceScatter(Primitive):
|
|
|
552
456
|
.. note::
|
|
553
457
|
Before running the following examples, you need to configure the communication environment variables.
|
|
554
458
|
|
|
555
|
-
For Ascend
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
459
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
460
|
+
Please see the `rank table Startup
|
|
461
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
559
462
|
for more details.
|
|
560
463
|
|
|
464
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
465
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
466
|
+
|
|
467
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
468
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
469
|
+
|
|
561
470
|
This example should be run with 2 devices.
|
|
562
471
|
|
|
563
472
|
>>> import mindspore as ms
|
|
@@ -565,7 +474,7 @@ class ReduceScatter(Primitive):
|
|
|
565
474
|
>>> from mindspore.communication import init
|
|
566
475
|
>>> from mindspore.ops import ReduceOp
|
|
567
476
|
>>> import mindspore.nn as nn
|
|
568
|
-
>>>
|
|
477
|
+
>>> import mindspore.ops as ops
|
|
569
478
|
>>> import numpy as np
|
|
570
479
|
>>>
|
|
571
480
|
>>> ms.set_context(mode=ms.GRAPH_MODE)
|
|
@@ -668,12 +577,12 @@ class Broadcast(PrimitiveWithInfer):
|
|
|
668
577
|
The tensors must have the same shape and format in all processes of the collection.
|
|
669
578
|
|
|
670
579
|
Args:
|
|
671
|
-
root_rank (int):
|
|
672
|
-
|
|
580
|
+
root_rank (int): Source rank. Required in all processes except the one
|
|
581
|
+
that is sending the data.
|
|
673
582
|
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP`` .
|
|
674
583
|
|
|
675
584
|
Inputs:
|
|
676
|
-
- **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
585
|
+
- **input_x** (tuple[Tensor]) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
677
586
|
|
|
678
587
|
Outputs:
|
|
679
588
|
tuple[Tensor], Tensor has the same shape of the input, i.e., :math:`(x_1, x_2, ..., x_R)`.
|
|
@@ -689,19 +598,24 @@ class Broadcast(PrimitiveWithInfer):
|
|
|
689
598
|
.. note::
|
|
690
599
|
Before running the following examples, you need to configure the communication environment variables.
|
|
691
600
|
|
|
692
|
-
For Ascend
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
601
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
602
|
+
Please see the `rank table Startup
|
|
603
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
696
604
|
for more details.
|
|
697
605
|
|
|
698
|
-
|
|
606
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
607
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
608
|
+
|
|
609
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
610
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
611
|
+
|
|
612
|
+
This example should be run with multiple devices.
|
|
699
613
|
|
|
700
614
|
>>> import mindspore as ms
|
|
701
615
|
>>> from mindspore import Tensor
|
|
702
616
|
>>> from mindspore.communication import init
|
|
703
617
|
>>> import mindspore.nn as nn
|
|
704
|
-
>>>
|
|
618
|
+
>>> import mindspore.ops as ops
|
|
705
619
|
>>> import numpy as np
|
|
706
620
|
>>>
|
|
707
621
|
>>> ms.set_context(mode=ms.GRAPH_MODE)
|
|
@@ -834,7 +748,7 @@ class NeighborExchange(Primitive):
|
|
|
834
748
|
>>> from mindspore import Tensor
|
|
835
749
|
>>> from mindspore.communication import init
|
|
836
750
|
>>> import mindspore.nn as nn
|
|
837
|
-
>>>
|
|
751
|
+
>>> import mindspore.ops as ops
|
|
838
752
|
>>> import numpy as np
|
|
839
753
|
>>> class Net(nn.Cell):
|
|
840
754
|
... def __init__(self):
|
|
@@ -919,12 +833,17 @@ class AlltoAll(PrimitiveWithInfer):
|
|
|
919
833
|
.. note::
|
|
920
834
|
Before running the following examples, you need to configure the communication environment variables.
|
|
921
835
|
|
|
922
|
-
For Ascend
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
836
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
837
|
+
Please see the `rank table Startup
|
|
838
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
926
839
|
for more details.
|
|
927
840
|
|
|
841
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
842
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
843
|
+
|
|
844
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
845
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
846
|
+
|
|
928
847
|
This example should be run with 8 devices.
|
|
929
848
|
|
|
930
849
|
>>> import os
|
|
@@ -932,7 +851,7 @@ class AlltoAll(PrimitiveWithInfer):
|
|
|
932
851
|
>>> from mindspore import Tensor
|
|
933
852
|
>>> from mindspore.communication import init
|
|
934
853
|
>>> import mindspore.nn as nn
|
|
935
|
-
>>>
|
|
854
|
+
>>> import mindspore.ops as ops
|
|
936
855
|
>>> import numpy as np
|
|
937
856
|
>>> class Net(nn.Cell):
|
|
938
857
|
... def __init__(self):
|
|
@@ -976,20 +895,20 @@ class AlltoAll(PrimitiveWithInfer):
|
|
|
976
895
|
if self.split_count != rank_size:
|
|
977
896
|
raise ValueError(f"For '{self.name}', the 'split_count' must be equal to 'rank_size', "
|
|
978
897
|
f"but got 'split_count': {self.split_count}, 'rank_size': {rank_size}.")
|
|
979
|
-
if x_shape[self.split_dim]
|
|
980
|
-
raise ValueError(f"For '{self.name}', the '
|
|
981
|
-
f"but got '
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
x_shape[self.concat_dim] = x_shape[self.concat_dim] * self.split_count
|
|
985
|
-
if x_shape[self.split_dim] >= 0:
|
|
986
|
-
x_shape[self.split_dim] = int(x_shape[self.split_dim] / self.split_count)
|
|
898
|
+
if x_shape[self.split_dim] % self.split_count != 0:
|
|
899
|
+
raise ValueError(f"For '{self.name}', the 'split_count' must be divisible by 'rank_size', "
|
|
900
|
+
f"but got 'split_count' {self.split_count}, 'rank_size' {x_shape[self.split_dim]}.")
|
|
901
|
+
x_shape[self.concat_dim] = x_shape[self.concat_dim] * self.split_count
|
|
902
|
+
x_shape[self.split_dim] = int(x_shape[self.split_dim] / self.split_count)
|
|
987
903
|
return x_shape
|
|
988
904
|
|
|
989
905
|
def infer_dtype(self, x_dtype):
|
|
990
906
|
check_collective_target_dtype('x', x_dtype, self.name)
|
|
991
907
|
return x_dtype
|
|
992
908
|
|
|
909
|
+
def __call__(self, tensor):
|
|
910
|
+
raise NotImplementedError
|
|
911
|
+
|
|
993
912
|
|
|
994
913
|
class NeighborExchangeV2(Primitive):
|
|
995
914
|
r"""
|
|
@@ -1038,19 +957,24 @@ class NeighborExchangeV2(Primitive):
|
|
|
1038
957
|
.. note::
|
|
1039
958
|
Before running the following examples, you need to configure the communication environment variables.
|
|
1040
959
|
|
|
1041
|
-
For Ascend
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
960
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
961
|
+
Please see the `rank table Startup
|
|
962
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/rank_table.html>`_
|
|
1045
963
|
for more details.
|
|
1046
964
|
|
|
965
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
966
|
+
<https://www.mindspore.cn/tutorials/experts/en/master/parallel/mpirun.html>`_ .
|
|
967
|
+
|
|
968
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
969
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/master/parallel/dynamic_cluster.html>`_ .
|
|
970
|
+
|
|
1047
971
|
This example should be run with 2 devices.
|
|
1048
972
|
|
|
1049
973
|
>>> import os
|
|
1050
974
|
>>> import mindspore as ms
|
|
1051
975
|
>>> from mindspore.communication import init
|
|
1052
976
|
>>> import mindspore.nn as nn
|
|
1053
|
-
>>>
|
|
977
|
+
>>> import mindspore.ops as ops
|
|
1054
978
|
>>> import numpy as np
|
|
1055
979
|
>>>
|
|
1056
980
|
>>> class Net0(nn.Cell):
|
|
@@ -1122,416 +1046,6 @@ class NeighborExchangeV2(Primitive):
|
|
|
1122
1046
|
raise NotImplementedError
|
|
1123
1047
|
|
|
1124
1048
|
|
|
1125
|
-
class CollectiveScatter(Primitive):
|
|
1126
|
-
r"""
|
|
1127
|
-
Scatter tensor evently across the processes in the specified communication group.
|
|
1128
|
-
|
|
1129
|
-
Note:
|
|
1130
|
-
The interface behavior only support Tensor input and scatter evenly.
|
|
1131
|
-
Only the tensor in process `src_rank` (global rank) will do scatter.
|
|
1132
|
-
|
|
1133
|
-
Args:
|
|
1134
|
-
src_rank (int, optional): Specifies the rank of the process that send the tensor.
|
|
1135
|
-
And only process `src_rank` will send the tensor.
|
|
1136
|
-
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1137
|
-
|
|
1138
|
-
Inputs:
|
|
1139
|
-
- **input_x** (Tensor) - The input tensor to be scattered. The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
1140
|
-
|
|
1141
|
-
Outputs:
|
|
1142
|
-
Tensor, the shape of output is :math:`(x_1/src\_rank, x_2, ..., x_R)`. The dimension 0 of data is equal to
|
|
1143
|
-
the dimension of input tensor divided by `src`, and the other dimension keep the same.
|
|
1144
|
-
|
|
1145
|
-
Raises:
|
|
1146
|
-
TypeError: If `group` is not a str.
|
|
1147
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
1148
|
-
ValueError: If the local rank id of the calling process in the group
|
|
1149
|
-
is larger than the group's rank size.
|
|
1150
|
-
|
|
1151
|
-
Supported Platforms:
|
|
1152
|
-
``Ascend``
|
|
1153
|
-
|
|
1154
|
-
Examples:
|
|
1155
|
-
.. note::
|
|
1156
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1157
|
-
|
|
1158
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1159
|
-
without any third-party or configuration file dependencies.
|
|
1160
|
-
Please see the `msrun start up
|
|
1161
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1162
|
-
for more details.
|
|
1163
|
-
|
|
1164
|
-
This example should be run with 2 devices.
|
|
1165
|
-
|
|
1166
|
-
>>> import numpy as np
|
|
1167
|
-
>>> import mindspore.nn as nn
|
|
1168
|
-
>>> from mindspore import Tensor
|
|
1169
|
-
>>> from mindspore.communication.management import init, get_rank
|
|
1170
|
-
>>> from mindspore import ops
|
|
1171
|
-
>>> # Launch 2 processes.
|
|
1172
|
-
>>> init()
|
|
1173
|
-
>>> class CollectiveScatterNet(nn.Cell):
|
|
1174
|
-
>>> def __init__(self):
|
|
1175
|
-
>>> super(CollectiveScatter, self).__init__()
|
|
1176
|
-
>>> self.collective_scatter = ops.CollectiveScatter(src_rank=0)
|
|
1177
|
-
>>>
|
|
1178
|
-
>>> def construct(self, x):
|
|
1179
|
-
>>> return self.collective_scatter(x)
|
|
1180
|
-
>>>
|
|
1181
|
-
>>> input = Tensor(np.arange(8).reshape([4, 2]).astype(np.float32))
|
|
1182
|
-
>>> net = CollectiveScatterNet()
|
|
1183
|
-
>>> output = net(input)
|
|
1184
|
-
>>> print(output)
|
|
1185
|
-
Process with rank 0: [[0. 1.],
|
|
1186
|
-
[2. 3.]]
|
|
1187
|
-
Process with rank 1: [[4. 5.],
|
|
1188
|
-
[6. 7.]]
|
|
1189
|
-
|
|
1190
|
-
Tutorial Examples:
|
|
1191
|
-
- `Distributed Set Communication Primitives - CollectiveScatter
|
|
1192
|
-
<https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#reducescatter>`_
|
|
1193
|
-
|
|
1194
|
-
"""
|
|
1195
|
-
|
|
1196
|
-
@prim_attr_register
|
|
1197
|
-
def __init__(self, src_rank=0, group=GlobalComm.WORLD_COMM_GROUP):
|
|
1198
|
-
validator.check_value_type('group', _get_group(group), (str,), self.name)
|
|
1199
|
-
self.rank_id = get_rank(_get_group(group))
|
|
1200
|
-
self.src_rank = src_rank
|
|
1201
|
-
self.rank_size = get_group_size(_get_group(group))
|
|
1202
|
-
validator.check('rank', self.rank_id, 'rank_size', self.rank_size, validator.LT, self.name)
|
|
1203
|
-
self.add_prim_attr('rank_id', self.rank_id)
|
|
1204
|
-
self.add_prim_attr('src_rank', self.src_rank)
|
|
1205
|
-
self.add_prim_attr('rank_size', self.rank_size)
|
|
1206
|
-
self.add_prim_attr('group', _get_group(group))
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
class CollectiveGather(Primitive):
|
|
1210
|
-
r"""
|
|
1211
|
-
Gathers tensors from the specified communication group. The operation will gather the tensor
|
|
1212
|
-
from processes according to dimension 0.
|
|
1213
|
-
|
|
1214
|
-
Note:
|
|
1215
|
-
Only the tensor in process `dest_rank` (global rank) will keep the gathered tensor. The other process
|
|
1216
|
-
will keep a tensor with shape [1], which has no mathematical meaning.
|
|
1217
|
-
|
|
1218
|
-
Args:
|
|
1219
|
-
dest_rank(int): Specifies the rank of the process that receive the tensor.
|
|
1220
|
-
And only process `dest_rank` will receive the gathered tensor.
|
|
1221
|
-
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1222
|
-
|
|
1223
|
-
Inputs:
|
|
1224
|
-
- **input_x** (Tensor) - The tensor to be gathered. The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
1225
|
-
|
|
1226
|
-
Outputs:
|
|
1227
|
-
Tensor, the shape of output is :math:`(\sum x_1, x_2, ..., x_R)`. The dimension 0 of data is equal to
|
|
1228
|
-
sum of the dimension of input tensor, and the other dimension keep the same.
|
|
1229
|
-
|
|
1230
|
-
Raises:
|
|
1231
|
-
TypeError: If `group` is not a str.
|
|
1232
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
1233
|
-
ValueError: If the local rank id of the calling process in the group
|
|
1234
|
-
is larger than the group's rank size.
|
|
1235
|
-
|
|
1236
|
-
Supported Platforms:
|
|
1237
|
-
``Ascend``
|
|
1238
|
-
|
|
1239
|
-
Examples:
|
|
1240
|
-
.. note::
|
|
1241
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1242
|
-
|
|
1243
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1244
|
-
without any third-party or configuration file dependencies.
|
|
1245
|
-
Please see the `msrun start up
|
|
1246
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1247
|
-
for more details.
|
|
1248
|
-
|
|
1249
|
-
This example should be run with 4 devices.
|
|
1250
|
-
|
|
1251
|
-
>>> import numpy as np
|
|
1252
|
-
>>> import mindspore as ms
|
|
1253
|
-
>>> import mindspore.nn as nn
|
|
1254
|
-
>>> from mindspore.communication import init
|
|
1255
|
-
>>> from mindspore import Tensor
|
|
1256
|
-
>>> from mindspore import ops
|
|
1257
|
-
>>> # Launch 2 processes.
|
|
1258
|
-
>>>
|
|
1259
|
-
>>> ms.set_context(mode=ms.GRAPH_MODE)
|
|
1260
|
-
>>> init()
|
|
1261
|
-
>>> class CollectiveGatherNet(nn.Cell):
|
|
1262
|
-
... def __init__(self):
|
|
1263
|
-
... super(CollectiveGatherNet, self).__init__()
|
|
1264
|
-
... self.collective_gather = ops.CollectiveGather(dest_rank=0)
|
|
1265
|
-
...
|
|
1266
|
-
... def construct(self, x):
|
|
1267
|
-
... return self.collective_gather(x)
|
|
1268
|
-
...
|
|
1269
|
-
>>> input = Tensor(np.arange(4).reshape([2, 2]).astype(np.float32))
|
|
1270
|
-
>>> net = CollectiveGatherNet()
|
|
1271
|
-
>>> output = net(input)
|
|
1272
|
-
>>> print(output)
|
|
1273
|
-
Process with rank 0: [[0. 1.],
|
|
1274
|
-
[2. 3.],
|
|
1275
|
-
[0. 1.],
|
|
1276
|
-
[2. 3.]]
|
|
1277
|
-
Process with rank 1: [0.]
|
|
1278
|
-
|
|
1279
|
-
Tutorial Examples:
|
|
1280
|
-
- `Distributed Set Communication Primitives - CollectiveGather
|
|
1281
|
-
<https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#collectivegather>`_
|
|
1282
|
-
|
|
1283
|
-
"""
|
|
1284
|
-
|
|
1285
|
-
@prim_attr_register
|
|
1286
|
-
def __init__(self, dest_rank, group=GlobalComm.WORLD_COMM_GROUP):
|
|
1287
|
-
"""Initialize Gather."""
|
|
1288
|
-
validator.check_value_type('group', _get_group(group), (str,), self.name)
|
|
1289
|
-
self.rank_id = get_rank(_get_group(group))
|
|
1290
|
-
self.dest_rank = dest_rank
|
|
1291
|
-
self.rank_size = get_group_size(_get_group(group))
|
|
1292
|
-
validator.check('rank', self.rank_id, 'rank_size', self.rank_size, validator.LT, self.name)
|
|
1293
|
-
self.add_prim_attr('rank_size', self.rank_size)
|
|
1294
|
-
self.add_prim_attr('group', _get_group(group))
|
|
1295
|
-
self.add_prim_attr('dest_rank', self.dest_rank)
|
|
1296
|
-
self.add_prim_attr('rank_id', self.rank_id)
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
class Barrier(PrimitiveWithInfer):
|
|
1300
|
-
"""
|
|
1301
|
-
Synchronizes all processes in the specified group. Once the process call this operation, it will be blocked until
|
|
1302
|
-
all processes call this operation. After all processes finish calling the operations, the blocked processes
|
|
1303
|
-
will be waken and continue their task.
|
|
1304
|
-
|
|
1305
|
-
Args:
|
|
1306
|
-
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1307
|
-
|
|
1308
|
-
Raises:
|
|
1309
|
-
TypeError: If `group` is not a str.
|
|
1310
|
-
RuntimeError: If backend is invalid, or distributed initialization fails.
|
|
1311
|
-
ValueError: If the local rank id of the calling process in the group
|
|
1312
|
-
is larger than the group's rank size.
|
|
1313
|
-
|
|
1314
|
-
Supported Platforms:
|
|
1315
|
-
``Ascend``
|
|
1316
|
-
|
|
1317
|
-
Examples:
|
|
1318
|
-
.. note::
|
|
1319
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1320
|
-
|
|
1321
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1322
|
-
without any third-party or configuration file dependencies.
|
|
1323
|
-
Please see the `msrun start up
|
|
1324
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1325
|
-
for more details.
|
|
1326
|
-
|
|
1327
|
-
This example should be run with 2 devices.
|
|
1328
|
-
|
|
1329
|
-
>>> import numpy as np
|
|
1330
|
-
>>> import mindspore.nn as nn
|
|
1331
|
-
>>> from mindspore.communication import init
|
|
1332
|
-
>>> from mindspore import Tensor
|
|
1333
|
-
>>> from mindspore import ops
|
|
1334
|
-
>>> # Launch 4 processes.
|
|
1335
|
-
>>> init()
|
|
1336
|
-
>>> class BarrierNet(nn.Cell):
|
|
1337
|
-
>>> def __init__(self):
|
|
1338
|
-
>>> super(BarrierNet, self).__init__()
|
|
1339
|
-
>>> self.barrier = ops.Barrier()
|
|
1340
|
-
>>>
|
|
1341
|
-
>>> def construct(self):
|
|
1342
|
-
>>> self.barrier()
|
|
1343
|
-
>>> net = BarrierNet()
|
|
1344
|
-
>>> net()
|
|
1345
|
-
|
|
1346
|
-
Tutorial Examples:
|
|
1347
|
-
- `Distributed Set Communication Primitives - Barrier
|
|
1348
|
-
<https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#barrier>`_
|
|
1349
|
-
|
|
1350
|
-
"""
|
|
1351
|
-
|
|
1352
|
-
@prim_attr_register
|
|
1353
|
-
def __init__(self, group=GlobalComm.WORLD_COMM_GROUP):
|
|
1354
|
-
self.group = group
|
|
1355
|
-
self.add_prim_attr("side_effect_mem", True)
|
|
1356
|
-
|
|
1357
|
-
def infer_shape(self):
|
|
1358
|
-
return [1]
|
|
1359
|
-
|
|
1360
|
-
def infer_dtype(self):
|
|
1361
|
-
return mstype.float32
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
class Send(PrimitiveWithInfer):
|
|
1365
|
-
"""
|
|
1366
|
-
Send tensors to the specified dest_rank.
|
|
1367
|
-
|
|
1368
|
-
Note:
|
|
1369
|
-
Send and Receive must be used in combination and have same sr_tag.
|
|
1370
|
-
|
|
1371
|
-
Args:
|
|
1372
|
-
sr_tag (int): The tag to identify the send/recv message. The message will
|
|
1373
|
-
be received by the Receive op with the same "sr_tag".
|
|
1374
|
-
dest_rank (int): A required integer identifying the destination rank.
|
|
1375
|
-
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1376
|
-
group_back (str, optional): The communication group for backpropagation.
|
|
1377
|
-
Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1378
|
-
|
|
1379
|
-
Inputs:
|
|
1380
|
-
- **input_x** (Tensor) - The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
1381
|
-
|
|
1382
|
-
Raises:
|
|
1383
|
-
TypeError: If `group` is not a str.
|
|
1384
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
1385
|
-
ValueError: If the local rank id of the calling process in the group
|
|
1386
|
-
is larger than the group's rank size.
|
|
1387
|
-
|
|
1388
|
-
Supported Platforms:
|
|
1389
|
-
``Ascend`` ``GPU``
|
|
1390
|
-
|
|
1391
|
-
Examples:
|
|
1392
|
-
.. note::
|
|
1393
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1394
|
-
|
|
1395
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1396
|
-
without any third-party or configuration file dependencies.
|
|
1397
|
-
Please see the `msrun start up
|
|
1398
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1399
|
-
for more details.
|
|
1400
|
-
|
|
1401
|
-
This example should be run with 2 devices.
|
|
1402
|
-
|
|
1403
|
-
>>> import numpy as np
|
|
1404
|
-
>>> import mindspore.nn as nn
|
|
1405
|
-
>>> from mindspore.communication import init
|
|
1406
|
-
>>> from mindspore import Tensor
|
|
1407
|
-
>>> from mindspore import ops
|
|
1408
|
-
>>>
|
|
1409
|
-
>>> init()
|
|
1410
|
-
>>> class SendNet(nn.Cell):
|
|
1411
|
-
>>> def __init__(self):
|
|
1412
|
-
>>> super(SendNet, self).__init__()
|
|
1413
|
-
>>> self.depend = ops.Depend()
|
|
1414
|
-
>>> self.send = ops.Send(st_tag=0, dest_rank=8, group="hccl_world_group")
|
|
1415
|
-
>>>
|
|
1416
|
-
>>> def construct(self, x):
|
|
1417
|
-
>>> out = self.depend(x, self.send(x))
|
|
1418
|
-
>>> return out
|
|
1419
|
-
>>>
|
|
1420
|
-
>>> input_ = Tensor(np.ones([2, 8]).astype(np.float32))
|
|
1421
|
-
>>> net = Net()
|
|
1422
|
-
>>> output = net(input_)
|
|
1423
|
-
|
|
1424
|
-
Tutorial Examples:
|
|
1425
|
-
- `Distributed Set Communication Primitives - Send
|
|
1426
|
-
<https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#send>`_
|
|
1427
|
-
|
|
1428
|
-
"""
|
|
1429
|
-
|
|
1430
|
-
@prim_attr_register
|
|
1431
|
-
def __init__(self, sr_tag, dest_rank, group=GlobalComm.WORLD_COMM_GROUP, group_back=GlobalComm.WORLD_COMM_GROUP):
|
|
1432
|
-
self.rank = dest_rank
|
|
1433
|
-
self.sr_tag = sr_tag
|
|
1434
|
-
self.group = group
|
|
1435
|
-
self.add_prim_attr("no_eliminate", True)
|
|
1436
|
-
|
|
1437
|
-
def infer_shape(self, x_shape):
|
|
1438
|
-
self.add_prim_attr("shape", x_shape)
|
|
1439
|
-
return x_shape
|
|
1440
|
-
|
|
1441
|
-
def infer_dtype(self, x_dtype):
|
|
1442
|
-
return x_dtype
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
class Receive(PrimitiveWithInfer):
|
|
1446
|
-
"""
|
|
1447
|
-
Receive tensors from src_rank.
|
|
1448
|
-
|
|
1449
|
-
Note:
|
|
1450
|
-
Send and Receive must be used in combination and have same sr_tag.
|
|
1451
|
-
|
|
1452
|
-
Args:
|
|
1453
|
-
sr_tag (int): A required integer identifying the send/recv message tag. The message will
|
|
1454
|
-
will be send by the Send op with the same "sr_tag".
|
|
1455
|
-
src_rank (int): A required integer identifying the source rank.
|
|
1456
|
-
shape (list[int]): A required list identifying the shape of the tensor to be received.
|
|
1457
|
-
dtype (Type): A required Type identifying the type of the tensor to be received. The supported types:
|
|
1458
|
-
int8/int16/int32/float16/float32.
|
|
1459
|
-
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1460
|
-
group_back (str, optional): The communication group for backpropagation.
|
|
1461
|
-
Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
1462
|
-
|
|
1463
|
-
Outputs:
|
|
1464
|
-
Tensor, output has the same shape as the Tensor sent by `Send` operation.
|
|
1465
|
-
|
|
1466
|
-
Raises:
|
|
1467
|
-
TypeError: If `group` is not a str.
|
|
1468
|
-
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails.
|
|
1469
|
-
ValueError: If the local rank id of the calling process in the group
|
|
1470
|
-
is larger than the group's rank size.
|
|
1471
|
-
|
|
1472
|
-
Supported Platforms:
|
|
1473
|
-
``Ascend`` ``GPU``
|
|
1474
|
-
|
|
1475
|
-
Examples:
|
|
1476
|
-
.. note::
|
|
1477
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1478
|
-
|
|
1479
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1480
|
-
without any third-party or configuration file dependencies.
|
|
1481
|
-
Please see the `msrun start up
|
|
1482
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1483
|
-
for more details.
|
|
1484
|
-
|
|
1485
|
-
This example should be run with 2 devices.
|
|
1486
|
-
|
|
1487
|
-
>>> import numpy as np
|
|
1488
|
-
>>> import mindspore.nn as nn
|
|
1489
|
-
>>> from mindspore.communication import init
|
|
1490
|
-
>>> from mindspore import Tensor
|
|
1491
|
-
>>> from mindspore import ops
|
|
1492
|
-
>>>
|
|
1493
|
-
>>> init()
|
|
1494
|
-
>>> class ReceiveNet(nn.Cell):
|
|
1495
|
-
>>> def __init__(self):
|
|
1496
|
-
>>> super(ReceiveNet, self).__init__()
|
|
1497
|
-
>>> self.recv = ops.Receive(sr_tag=0, src_rank=0, shape=[2, 8], dtype=ms.float32,
|
|
1498
|
-
>>> group="hccl_world_group")
|
|
1499
|
-
>>>
|
|
1500
|
-
>>> def construct(self):
|
|
1501
|
-
>>> out = self.recv()
|
|
1502
|
-
>>> return out
|
|
1503
|
-
>>>
|
|
1504
|
-
>>> net = Net()
|
|
1505
|
-
>>> output = net()
|
|
1506
|
-
|
|
1507
|
-
Tutorial Examples:
|
|
1508
|
-
- `Distributed Set Communication Primitives - Receive
|
|
1509
|
-
<https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#receive>`_
|
|
1510
|
-
|
|
1511
|
-
"""
|
|
1512
|
-
|
|
1513
|
-
@prim_attr_register
|
|
1514
|
-
def __init__(self, sr_tag, src_rank, shape, dtype, group=GlobalComm.WORLD_COMM_GROUP,
|
|
1515
|
-
group_back=GlobalComm.WORLD_COMM_GROUP):
|
|
1516
|
-
self.rank = src_rank
|
|
1517
|
-
self.tag = sr_tag
|
|
1518
|
-
self.shape = shape
|
|
1519
|
-
self.dtype = dtype
|
|
1520
|
-
self.group = group
|
|
1521
|
-
self.add_prim_attr("no_eliminate", True)
|
|
1522
|
-
valid_type = [mstype.float16, mstype.float32, mstype.float64, mstype.bfloat16,
|
|
1523
|
-
mstype.int8, mstype.int16, mstype.int32, mstype.int64,
|
|
1524
|
-
mstype.uint8, mstype.uint16, mstype.uint32, mstype.uint64]
|
|
1525
|
-
args = {"dtype": dtype}
|
|
1526
|
-
validator.check_scalar_or_tensor_types_same(args, valid_type, self.name)
|
|
1527
|
-
|
|
1528
|
-
def infer_shape(self, x_shape=None):
|
|
1529
|
-
return self.get_attr_dict()['shape']
|
|
1530
|
-
|
|
1531
|
-
def infer_dtype(self, x_dtype=None):
|
|
1532
|
-
return self.get_attr_dict()['dtype']
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
1049
|
class _MirrorOperator(PrimitiveWithInfer):
|
|
1536
1050
|
"""
|
|
1537
1051
|
Auto parallel virtual operator. Do nothing in forward, do all reduce and mean in backward. It is only for
|
|
@@ -1695,6 +1209,8 @@ class _VirtualAssignAdd(PrimitiveWithInfer):
|
|
|
1695
1209
|
|
|
1696
1210
|
def infer_dtype(self, x_dtype, y_dtype):
|
|
1697
1211
|
return x_dtype
|
|
1212
|
+
|
|
1213
|
+
|
|
1698
1214
|
virtual_assign_add = _VirtualAssignAdd()
|
|
1699
1215
|
|
|
1700
1216
|
|
|
@@ -1787,210 +1303,3 @@ class _GetTensorSlice(PrimitiveWithInfer):
|
|
|
1787
1303
|
if tensor_slice.shape != slice_shape:
|
|
1788
1304
|
tensor_slice = tensor_slice.reshape(slice_shape)
|
|
1789
1305
|
return Tensor(tensor_slice, x.dtype)
|
|
1790
|
-
|
|
1791
|
-
|
|
1792
|
-
class BatchISendIRecv(PrimitiveWithInfer):
|
|
1793
|
-
"""
|
|
1794
|
-
Batch send and recv tensors asynchronously.
|
|
1795
|
-
|
|
1796
|
-
Note:
|
|
1797
|
-
- The ``isend`` and ``irecv`` in ``op_types`` between ranks need to match each other.
|
|
1798
|
-
- ``isend`` and ``irecv`` in a batch can only be used in the same communication group.
|
|
1799
|
-
|
|
1800
|
-
Args:
|
|
1801
|
-
op_types(Union[tuple[str], list[str]]): "isend" or "irecv" to indicate the order and number of communication.
|
|
1802
|
-
remote_ranks(Union[tuple[int], list[int]]): src or dst rank that matches the op_types.
|
|
1803
|
-
receive_shapes(Union[tuple[int], list[int]]): receive tensor shapes that matches "irecv" in op_types.
|
|
1804
|
-
receive_types(Union[tuple[mindspore.dtype], list[mindspore.dtype]]): receive tensor dtype
|
|
1805
|
-
that matches "irecv" in op_types.
|
|
1806
|
-
group (str): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``, which
|
|
1807
|
-
means ``"hccl_world_group"`` in Ascend, and ``"nccl_world_group"`` in GPU.
|
|
1808
|
-
|
|
1809
|
-
Inputs:
|
|
1810
|
-
- **input_x** (Union[tuple[Tensor], list[Tensor], tuple(None)]) -
|
|
1811
|
-
The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
1812
|
-
|
|
1813
|
-
Outputs:
|
|
1814
|
-
tuple(Tensor). Output tensors is corresponding to ``op_types``:
|
|
1815
|
-
At ``"isend"`` position, output tensor is a fake tensor with scalar, which has no meaning.
|
|
1816
|
-
At ``"irecv"`` position, output tensor is a tensor received from remote end.
|
|
1817
|
-
|
|
1818
|
-
|
|
1819
|
-
Raises:
|
|
1820
|
-
TypeError: If ``group`` is not a str.
|
|
1821
|
-
TypeError: If ``op_types``, ``receive_shapes``, ``receive_dtypes``, ``remote_ranks`` are not tuple or list.
|
|
1822
|
-
ValueError: If the length of ``receive_shapes`` and ``receive_dtypes`` are not the same.
|
|
1823
|
-
ValueError: If the length of ``op_types`` and ``remote_ranks`` are not the same.
|
|
1824
|
-
RuntimeError: If the length of input tensors and ``"isend"`` count are not the same.
|
|
1825
|
-
|
|
1826
|
-
Supported Platforms:
|
|
1827
|
-
``Ascend``
|
|
1828
|
-
|
|
1829
|
-
Examples:
|
|
1830
|
-
.. note::
|
|
1831
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1832
|
-
|
|
1833
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1834
|
-
without any third-party or configuration file dependencies.
|
|
1835
|
-
|
|
1836
|
-
Please see the `msrun start up
|
|
1837
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1838
|
-
for more details.
|
|
1839
|
-
|
|
1840
|
-
This example should be run with 2 devices.
|
|
1841
|
-
|
|
1842
|
-
>>> import numpy as np
|
|
1843
|
-
>>> import mindspore as ms
|
|
1844
|
-
>>> from mindspore import ops
|
|
1845
|
-
>>> import mindspore.nn as nn
|
|
1846
|
-
>>> from mindspore.communication import init, get_rank
|
|
1847
|
-
>>> from mindspore import Tensor
|
|
1848
|
-
>>>
|
|
1849
|
-
>>> init()
|
|
1850
|
-
>>> rank = get_rank()
|
|
1851
|
-
>>> class Net(nn.Cell):
|
|
1852
|
-
... def __init__(self):
|
|
1853
|
-
... super(Net, self).__init__()
|
|
1854
|
-
... if rank == 0:
|
|
1855
|
-
... remote_rank = [1, 1]
|
|
1856
|
-
... else:
|
|
1857
|
-
... remote_rank = [0, 0]
|
|
1858
|
-
... self.batchisendirecv = ops.BatchISendIRecv(("isend", "irecv"), remote_rank, [()], (ms.float32,))
|
|
1859
|
-
...
|
|
1860
|
-
... def construct(self, x):
|
|
1861
|
-
... if isinstance(x, Tensor):
|
|
1862
|
-
... x = (x,)
|
|
1863
|
-
... return self.batchisendirecv(x)
|
|
1864
|
-
...
|
|
1865
|
-
>>> send_x = Tensor(rank + 1).astype(ms.float32)
|
|
1866
|
-
>>> net = Net()
|
|
1867
|
-
>>> output = net(send_x)
|
|
1868
|
-
>>> print(output)
|
|
1869
|
-
rank 0:
|
|
1870
|
-
(Tensor(shape=[], dtype=Float32, value= 0), Tensor(shape=[], dtype=Float32, value= 2))
|
|
1871
|
-
rank 1:
|
|
1872
|
-
(Tensor(shape=[], dtype=Float32, value= 0), Tensor(shape=[], dtype=Float32, value= 1))
|
|
1873
|
-
|
|
1874
|
-
Tutorial Examples:
|
|
1875
|
-
- `Distributed Set Communication Primitives - BatchISendIRecv
|
|
1876
|
-
<https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#allgather>`_
|
|
1877
|
-
|
|
1878
|
-
"""
|
|
1879
|
-
|
|
1880
|
-
@prim_attr_register
|
|
1881
|
-
def __init__(self, op_types, remote_ranks, receive_shapes=None,
|
|
1882
|
-
receive_dtypes=None, group=GlobalComm.WORLD_COMM_GROUP):
|
|
1883
|
-
if receive_shapes is None:
|
|
1884
|
-
receive_shapes = ()
|
|
1885
|
-
else:
|
|
1886
|
-
validator.check_value_type("receive_shapes", receive_shapes, [tuple, list], self.name)
|
|
1887
|
-
|
|
1888
|
-
if receive_dtypes is None:
|
|
1889
|
-
receive_dtypes = ()
|
|
1890
|
-
else:
|
|
1891
|
-
validator.check_value_type("receive_dtypes", receive_dtypes, [tuple, list], self.name)
|
|
1892
|
-
|
|
1893
|
-
validator.check_value_type("op_types", op_types, [tuple, list], self.name)
|
|
1894
|
-
validator.check_value_type("remote_ranks", remote_ranks, [tuple, list], self.name)
|
|
1895
|
-
|
|
1896
|
-
if len(receive_shapes) != len(receive_dtypes):
|
|
1897
|
-
raise ValueError("length of receive_shapes and receive_shapes must be the same, "
|
|
1898
|
-
f"but got receive_shapes: {len(receive_shapes)} "
|
|
1899
|
-
f" and receive_shapes: {receive_dtypes}")
|
|
1900
|
-
|
|
1901
|
-
if len(op_types) != len(remote_ranks):
|
|
1902
|
-
raise ValueError("length of op_types and remote_ranks must be the same.")
|
|
1903
|
-
|
|
1904
|
-
if group is None:
|
|
1905
|
-
group = GlobalComm.WORLD_COMM_GROUP
|
|
1906
|
-
self.add_prim_attr('group', group)
|
|
1907
|
-
self.add_prim_attr('op_types', op_types)
|
|
1908
|
-
self.add_prim_attr('remote_ranks', remote_ranks)
|
|
1909
|
-
self.add_prim_attr('receive_shapes', receive_shapes)
|
|
1910
|
-
self.add_prim_attr('receive_dtypes', receive_dtypes)
|
|
1911
|
-
self.add_prim_attr('no_eliminate', True)
|
|
1912
|
-
|
|
1913
|
-
|
|
1914
|
-
class AlltoAllV(PrimitiveWithInfer):
|
|
1915
|
-
"""
|
|
1916
|
-
AllToAll which support uneven split.
|
|
1917
|
-
|
|
1918
|
-
Note:
|
|
1919
|
-
- Only support flatten tensor as input. input tensor should be flattened and
|
|
1920
|
-
concatenated before call this primitive.
|
|
1921
|
-
|
|
1922
|
-
Args:
|
|
1923
|
-
send_numel_list(Union[tuple[int], list[int]]): split numel to scatter to different remote rank.
|
|
1924
|
-
recv_numel_list(Union[tuple[int], list[int]]): split numel to gather from different remote rank.
|
|
1925
|
-
group (str): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``, which
|
|
1926
|
-
means ``"hccl_world_group"`` in Ascend, and ``"nccl_world_group"`` in GPU.
|
|
1927
|
-
|
|
1928
|
-
Inputs:
|
|
1929
|
-
- **input_x** (Tensor) - flatten tensor to scatter. The shape of tensor is :math:`(x_1)`.
|
|
1930
|
-
|
|
1931
|
-
Outputs:
|
|
1932
|
-
Tensor. flattened and concatenated tensor gather from remote ranks.
|
|
1933
|
-
If gather result is empty, it will return a Tensor with value 0, which has no actual meaning.
|
|
1934
|
-
|
|
1935
|
-
Raises:
|
|
1936
|
-
TypeError: If 'send_numel_list' or 'recv_numel_list' is not type of tuple and list.
|
|
1937
|
-
|
|
1938
|
-
Supported Platforms:
|
|
1939
|
-
``Ascend``
|
|
1940
|
-
|
|
1941
|
-
Examples:
|
|
1942
|
-
.. note::
|
|
1943
|
-
Before running the following examples, you need to configure the communication environment variables.
|
|
1944
|
-
|
|
1945
|
-
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
1946
|
-
without any third-party or configuration file dependencies.
|
|
1947
|
-
|
|
1948
|
-
Please see the `msrun start up
|
|
1949
|
-
<https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
|
|
1950
|
-
for more details.
|
|
1951
|
-
|
|
1952
|
-
This example should be run with 2 devices.
|
|
1953
|
-
|
|
1954
|
-
>>> import numpy as np
|
|
1955
|
-
>>> import mindspore as ms
|
|
1956
|
-
>>> from mindspore import ops
|
|
1957
|
-
>>> import mindspore.nn as nn
|
|
1958
|
-
>>> from mindspore.communication import init, get_rank
|
|
1959
|
-
>>> from mindspore import Tensor
|
|
1960
|
-
>>>
|
|
1961
|
-
>>> init()
|
|
1962
|
-
>>> rank = get_rank()
|
|
1963
|
-
>>> class Net(nn.Cell):
|
|
1964
|
-
... def __init__(self):
|
|
1965
|
-
... super(Net, self).__init__()
|
|
1966
|
-
... if rank == 0:
|
|
1967
|
-
... self.all_to_all = ops.AlltoAllV([1, 2], [1, 2])
|
|
1968
|
-
... else:
|
|
1969
|
-
... self.all_to_all = ops.AlltoAllV([2, 1], [2, 1])
|
|
1970
|
-
...
|
|
1971
|
-
... def construct(self, x):
|
|
1972
|
-
... return self.all_to_all(x)
|
|
1973
|
-
...
|
|
1974
|
-
>>> if rank == 0:
|
|
1975
|
-
>>> send_tensor = Tensor([0, 1, 2.])
|
|
1976
|
-
>>> elif rank == 1:
|
|
1977
|
-
>>> send_tensor = Tensor([3, 4, 5.])
|
|
1978
|
-
>>> net = Net()
|
|
1979
|
-
>>> output = net(send_tensor)
|
|
1980
|
-
>>> print(output)
|
|
1981
|
-
rank 0:
|
|
1982
|
-
[0. 3. 4]
|
|
1983
|
-
rank 1:
|
|
1984
|
-
[1. 2. 5]
|
|
1985
|
-
|
|
1986
|
-
"""
|
|
1987
|
-
|
|
1988
|
-
@prim_attr_register
|
|
1989
|
-
def __init__(self, send_numel_list, recv_numel_list, group=None):
|
|
1990
|
-
validator.check_value_type("send_numel_list", send_numel_list, [tuple, list], self.name)
|
|
1991
|
-
validator.check_value_type("recv_numel_list", recv_numel_list, [tuple, list], self.name)
|
|
1992
|
-
if group is None:
|
|
1993
|
-
group = GlobalComm.WORLD_COMM_GROUP
|
|
1994
|
-
self.add_prim_attr('group', group)
|
|
1995
|
-
self.add_prim_attr('send_numel_list', send_numel_list)
|
|
1996
|
-
self.add_prim_attr('recv_numel_list', recv_numel_list)
|