mindspore 2.1.0__cp38-cp38-win_amd64.whl → 2.2.11__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +23 -29
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +4 -11
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +13 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +67 -72
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +86 -106
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +25 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/amp.py +47 -11
- mindspore/atlprov.dll +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +29 -0
- mindspore/common/api.py +174 -259
- mindspore/common/auto_dynamic_shape.py +494 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +243 -165
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +14 -3
- mindspore/context.py +152 -61
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +33 -7
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +16 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +17 -14
- mindspore/include/api/status.h +8 -3
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +313 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +22 -30
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +323 -204
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +61 -95
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +16 -25
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +27 -22
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +87 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/numpy/utils_const.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +6 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +174 -193
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -9
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +568 -260
- mindspore/ops/function/random_func.py +88 -57
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +244 -25
- mindspore/ops/operations/__init__.py +31 -19
- mindspore/ops/operations/_grad_ops.py +71 -7
- mindspore/ops/operations/_inner_ops.py +350 -17
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +68 -282
- mindspore/ops/operations/comm_ops.py +107 -59
- mindspore/ops/operations/custom_ops.py +94 -70
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +192 -144
- mindspore/ops/operations/nn_ops.py +857 -489
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +12 -5
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +18 -13
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +19 -12
- mindspore/parallel/shard.py +21 -14
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +4 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
- mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
- mindspore/profiler/parser/ascend_op_generator.py +6 -6
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
- mindspore/profiler/parser/base_timeline_generator.py +10 -8
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +38 -22
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +21 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +179 -89
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +42 -21
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +523 -578
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +6 -4
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +541 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +15 -8
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +84 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +187 -47
- mindspore/train/serialization.py +487 -161
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +37 -17
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +7 -4
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +429 -486
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
|
@@ -18,12 +18,15 @@ Collective communication interface.
|
|
|
18
18
|
Note that the APIs in the following list need to preset communication environment variables.
|
|
19
19
|
|
|
20
20
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
21
|
-
Please see the `
|
|
22
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
21
|
+
Please see the `rank table Startup
|
|
22
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
23
23
|
for more details.
|
|
24
24
|
|
|
25
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
26
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
25
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
26
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
27
|
+
|
|
28
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster Startup
|
|
29
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
27
30
|
"""
|
|
28
31
|
|
|
29
32
|
from mindspore.communication.management import GlobalComm, init, release, get_rank, \
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
"""comm_helper"""
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
|
+
import glob
|
|
18
19
|
import ctypes
|
|
19
20
|
|
|
20
21
|
import sys
|
|
@@ -25,6 +26,7 @@ from mindspore.parallel._ps_context import _is_role_worker, _is_role_pserver, _i
|
|
|
25
26
|
_get_ps_context
|
|
26
27
|
from mindspore import log as logger
|
|
27
28
|
from mindspore._c_expression import CollectiveManager, set_cluster_exit_with_exception, MSContext
|
|
29
|
+
from mindspore.common._utils import load_lib
|
|
28
30
|
|
|
29
31
|
HCCL_LIB = 'libhccl_plugin.so'
|
|
30
32
|
|
|
@@ -35,8 +37,8 @@ def hccl_load_lib():
|
|
|
35
37
|
base_dir = os.path.dirname(os.path.realpath(__file__))
|
|
36
38
|
lib_path = os.path.join(base_dir, "../lib/plugin/ascend", HCCL_LIB)
|
|
37
39
|
ctypes.CDLL(lib_path)
|
|
38
|
-
except Exception:
|
|
39
|
-
raise RuntimeError('Get hccl lib error.')
|
|
40
|
+
except Exception as exc:
|
|
41
|
+
raise RuntimeError('Get hccl lib error.') from exc
|
|
40
42
|
|
|
41
43
|
_HCCL_TEST_AVAILABLE = False
|
|
42
44
|
|
|
@@ -57,6 +59,11 @@ HCCL_WORLD_COMM_GROUP = "hccl_world_group"
|
|
|
57
59
|
NCCL_WORLD_COMM_GROUP = "nccl_world_group"
|
|
58
60
|
MCCL_WORLD_COMM_GROUP = "mccl_world_group"
|
|
59
61
|
|
|
62
|
+
DEVICE_TO_BACKEND = {
|
|
63
|
+
"Ascend": "hccl",
|
|
64
|
+
"GPU": "nccl",
|
|
65
|
+
"CPU": "mccl"
|
|
66
|
+
}
|
|
60
67
|
|
|
61
68
|
class Backend:
|
|
62
69
|
"""
|
|
@@ -102,8 +109,12 @@ class GlobalComm:
|
|
|
102
109
|
"""
|
|
103
110
|
World communication information. The GlobalComm is a global class. The members contain:
|
|
104
111
|
|
|
105
|
-
- ``BACKEND`` : The communication library used, using
|
|
106
|
-
|
|
112
|
+
- ``BACKEND`` : The communication library used, using ``"hccl"`` / ``"nccl"`` / ``"mccl"`` .
|
|
113
|
+
``"hccl"`` means Huawei Collective Communication Library(HCCL),
|
|
114
|
+
``"nccl"`` means NVIDIA Collective Communication Library(NCCL),
|
|
115
|
+
``"mccl"`` means MindSpore Collective Communication Library(MCCL).
|
|
116
|
+
- ``WORLD_COMM_GROUP`` : Global communication domain,
|
|
117
|
+
using ``"hccl_world_group"`` / ``"nccl_world_group"`` / ``"mccl_world_group"`` .
|
|
107
118
|
"""
|
|
108
119
|
BACKEND = DEFAULT_BACKEND
|
|
109
120
|
WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
|
|
@@ -181,6 +192,60 @@ def check_parameter_available(func):
|
|
|
181
192
|
return wrapper
|
|
182
193
|
|
|
183
194
|
|
|
195
|
+
def _is_available():
|
|
196
|
+
"""
|
|
197
|
+
Returns `True` if distributed module is available.
|
|
198
|
+
|
|
199
|
+
Note:
|
|
200
|
+
Always returns `True` because MindSpore always has distributed ability on all platforms.
|
|
201
|
+
"""
|
|
202
|
+
return True
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _is_initialized():
|
|
206
|
+
"""
|
|
207
|
+
Checks if distributed module is successfully initialized.
|
|
208
|
+
"""
|
|
209
|
+
return CollectiveManager.get_instance().initialized()
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _get_backend():
|
|
213
|
+
"""
|
|
214
|
+
Returns the backend of communication process groups.
|
|
215
|
+
|
|
216
|
+
Note:
|
|
217
|
+
Only one communication backend is supported by MindSpore for each process.
|
|
218
|
+
It should be one of `hccl`/`nccl`/`mccl`.
|
|
219
|
+
"""
|
|
220
|
+
return GlobalComm.BACKEND
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _is_hccl_available():
|
|
224
|
+
"""
|
|
225
|
+
Checks if `hccl` backend is available.
|
|
226
|
+
"""
|
|
227
|
+
return _HCCL_TEST_AVAILABLE
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _is_nccl_available():
|
|
231
|
+
"""
|
|
232
|
+
Checks if `nccl` backend is available.
|
|
233
|
+
"""
|
|
234
|
+
base_dir = os.path.dirname(os.path.realpath(__file__))
|
|
235
|
+
lib_path = os.path.join(base_dir, "../lib/plugin/gpu*/libnvidia_collective.so")
|
|
236
|
+
file_paths = glob.glob(lib_path)
|
|
237
|
+
return all(list(load_lib(f) for f in file_paths))
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _is_mpi_available():
|
|
241
|
+
"""
|
|
242
|
+
Checks if OpenMPI's library is available.
|
|
243
|
+
"""
|
|
244
|
+
base_dir = os.path.dirname(os.path.realpath(__file__))
|
|
245
|
+
lib_path = os.path.join(base_dir, "../lib/libmpi_collective.so")
|
|
246
|
+
return load_lib(lib_path)
|
|
247
|
+
|
|
248
|
+
|
|
184
249
|
@check_parameter_available
|
|
185
250
|
def _get_rank_helper(group):
|
|
186
251
|
"""
|
|
@@ -320,6 +385,20 @@ def _get_group_rank_from_world_rank_helper(world_rank_id, group):
|
|
|
320
385
|
return group_rank_id
|
|
321
386
|
|
|
322
387
|
|
|
388
|
+
@check_parameter_available
|
|
389
|
+
def _get_group_ranks(group):
|
|
390
|
+
"""
|
|
391
|
+
The Helper to do get_group_ranks.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
group (str): The communication group.
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
List. The ranks of specified group.
|
|
398
|
+
"""
|
|
399
|
+
return CollectiveManager.get_instance().get_group_ranks(group)
|
|
400
|
+
|
|
401
|
+
|
|
323
402
|
@check_parameter_available
|
|
324
403
|
def _create_group_helper(group, rank_ids):
|
|
325
404
|
"""
|
|
@@ -20,7 +20,7 @@ from mindspore.parallel._ps_context import _is_ps_mode, _is_role_pserver, _is_ro
|
|
|
20
20
|
from mindspore.communication._comm_helper import Backend, _get_rank_helper, _get_size_helper, \
|
|
21
21
|
_get_world_rank_from_group_rank_helper, _get_group_rank_from_world_rank_helper, \
|
|
22
22
|
_create_group_helper, _destroy_group_helper, HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, \
|
|
23
|
-
MCCL_WORLD_COMM_GROUP, _get_local_rank_helper, _get_local_size_helper, GlobalComm, \
|
|
23
|
+
MCCL_WORLD_COMM_GROUP, DEVICE_TO_BACKEND, _get_local_rank_helper, _get_local_size_helper, GlobalComm, \
|
|
24
24
|
_check_mpi_envs, _set_elegant_exit_handle
|
|
25
25
|
from mindspore._c_expression import init_hccl, finalize_hccl, init_cluster, MSContext, ms_ctx_param
|
|
26
26
|
|
|
@@ -93,18 +93,20 @@ def _check_parallel_envs():
|
|
|
93
93
|
|
|
94
94
|
def init(backend_name=None):
|
|
95
95
|
"""
|
|
96
|
-
Initialize distributed backends required by communication services, e.g.
|
|
97
|
-
distributed parallel scenarios and set before using communication services.
|
|
96
|
+
Initialize distributed backends required by communication services, e.g. ``"hccl"`` / ``"nccl"`` / ``"mccl"``.
|
|
97
|
+
It is usually used in distributed parallel scenarios and set before using communication services.
|
|
98
98
|
|
|
99
99
|
Note:
|
|
100
|
-
- The full name of
|
|
101
|
-
- The full name of
|
|
102
|
-
- The full name of
|
|
100
|
+
- The full name of ``"hccl"`` is Huawei Collective Communication Library(HCCL).
|
|
101
|
+
- The full name of ``"nccl"`` is NVIDIA Collective Communication Library(NCCL).
|
|
102
|
+
- The full name of ``"mccl"`` is MindSpore Collective Communication Library(MCCL).
|
|
103
103
|
|
|
104
104
|
Args:
|
|
105
|
-
backend_name (str): Backend, using
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
backend_name (str): Backend, using ``"hccl"`` / ``"nccl"`` / ``"mccl"``.
|
|
106
|
+
``"hccl"`` should be used for Ascend hardware platforms,
|
|
107
|
+
``"nccl"`` for GPU hardware platforms and ``"mccl"`` for CPU hardware platforms.
|
|
108
|
+
If not set, inference is automatically made based on the hardware
|
|
109
|
+
platform type (device_target). Default: ``None`` .
|
|
108
110
|
|
|
109
111
|
Raises:
|
|
110
112
|
TypeError: If `backend_name` is not a string.
|
|
@@ -120,12 +122,15 @@ def init(backend_name=None):
|
|
|
120
122
|
Before running the following examples, you need to configure the communication environment variables.
|
|
121
123
|
|
|
122
124
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
123
|
-
Please see the `
|
|
124
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
125
|
+
Please see the `rank table Startup
|
|
126
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
125
127
|
for more details.
|
|
126
128
|
|
|
127
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
128
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
129
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
130
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
131
|
+
|
|
132
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
133
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
129
134
|
|
|
130
135
|
>>> from mindspore.communication import init
|
|
131
136
|
>>> init()
|
|
@@ -158,29 +163,35 @@ def init(backend_name=None):
|
|
|
158
163
|
_set_elegant_exit_handle()
|
|
159
164
|
return
|
|
160
165
|
if device_target != "Ascend":
|
|
161
|
-
raise RuntimeError("For 'init', the argument
|
|
162
|
-
"but got
|
|
166
|
+
raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', "
|
|
167
|
+
"but got 'hccl'.".format(DEVICE_TO_BACKEND[device_target], device_target))
|
|
163
168
|
if not host_init:
|
|
164
169
|
_check_parallel_envs()
|
|
165
170
|
GlobalComm.BACKEND = Backend("hccl")
|
|
166
171
|
init_hccl()
|
|
167
172
|
GlobalComm.WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
|
|
168
173
|
elif backend_name == "nccl":
|
|
174
|
+
if device_target != "GPU":
|
|
175
|
+
raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', "
|
|
176
|
+
"but got 'nccl'.".format(DEVICE_TO_BACKEND[device_target], device_target))
|
|
169
177
|
init_cluster()
|
|
178
|
+
GlobalComm.BACKEND = Backend("nccl")
|
|
170
179
|
GlobalComm.WORLD_COMM_GROUP = NCCL_WORLD_COMM_GROUP
|
|
171
180
|
elif backend_name == "mccl":
|
|
172
181
|
init_cluster()
|
|
182
|
+
GlobalComm.BACKEND = Backend("mccl")
|
|
173
183
|
GlobalComm.WORLD_COMM_GROUP = MCCL_WORLD_COMM_GROUP
|
|
174
184
|
else:
|
|
175
|
-
raise RuntimeError("For 'init', the argument 'backend_name' must be
|
|
176
|
-
"but got
|
|
185
|
+
raise RuntimeError("For 'init', the argument 'backend_name' must be one of 'hccl', 'nccl' and 'mccl', "
|
|
186
|
+
"but got 'backend_name' : {}".format(backend_name))
|
|
187
|
+
|
|
177
188
|
GlobalComm.INITED = True
|
|
178
189
|
_set_elegant_exit_handle()
|
|
179
190
|
|
|
180
191
|
|
|
181
192
|
def release():
|
|
182
193
|
"""
|
|
183
|
-
Release distributed resource. e.g. HCCL/NCCL.
|
|
194
|
+
Release distributed resource. e.g. HCCL/NCCL/MCCL.
|
|
184
195
|
|
|
185
196
|
Note:
|
|
186
197
|
This method should be used after init().
|
|
@@ -189,19 +200,22 @@ def release():
|
|
|
189
200
|
RuntimeError: If failed to release distributed resource.
|
|
190
201
|
|
|
191
202
|
Supported Platforms:
|
|
192
|
-
``Ascend`` ``GPU``
|
|
203
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
193
204
|
|
|
194
205
|
Examples:
|
|
195
206
|
.. note::
|
|
196
207
|
Before running the following examples, you need to configure the communication environment variables.
|
|
197
208
|
|
|
198
209
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
199
|
-
Please see the `
|
|
200
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
210
|
+
Please see the `rank table Startup
|
|
211
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
201
212
|
for more details.
|
|
202
213
|
|
|
203
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
204
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
214
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
215
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
216
|
+
|
|
217
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
218
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
205
219
|
|
|
206
220
|
>>> from mindspore.communication import init, release
|
|
207
221
|
>>> init()
|
|
@@ -227,7 +241,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
227
241
|
Raises:
|
|
228
242
|
TypeError: If group is not a string.
|
|
229
243
|
ValueError: If backend is invalid.
|
|
230
|
-
RuntimeError: If HCCL/NCCL is not available.
|
|
244
|
+
RuntimeError: If HCCL/NCCL/MCCL is not available.
|
|
231
245
|
|
|
232
246
|
Supported Platforms:
|
|
233
247
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -237,12 +251,15 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
237
251
|
Before running the following examples, you need to configure the communication environment variables.
|
|
238
252
|
|
|
239
253
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
240
|
-
Please see the `
|
|
241
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
254
|
+
Please see the `rank table Startup
|
|
255
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
242
256
|
for more details.
|
|
243
257
|
|
|
244
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
245
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
258
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
259
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
260
|
+
|
|
261
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
262
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
246
263
|
|
|
247
264
|
>>> from mindspore.communication import init, get_rank
|
|
248
265
|
>>> init()
|
|
@@ -261,7 +278,7 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
261
278
|
Gets local rank ID for current device in specified collective communication group.
|
|
262
279
|
|
|
263
280
|
Note:
|
|
264
|
-
|
|
281
|
+
This method isn't supported in GPU and CPU versions of MindSpore.
|
|
265
282
|
This method should be used after init().
|
|
266
283
|
|
|
267
284
|
Args:
|
|
@@ -274,7 +291,7 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
274
291
|
Raises:
|
|
275
292
|
TypeError: If group is not a string.
|
|
276
293
|
ValueError: If backend is invalid.
|
|
277
|
-
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
|
294
|
+
RuntimeError: If HCCL is not available or MindSpore is GPU/CPU version.
|
|
278
295
|
|
|
279
296
|
Supported Platforms:
|
|
280
297
|
``Ascend``
|
|
@@ -284,15 +301,18 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
284
301
|
Before running the following examples, you need to configure the communication environment variables.
|
|
285
302
|
|
|
286
303
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
287
|
-
Please see the `
|
|
288
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
304
|
+
Please see the `rank table Startup
|
|
305
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
289
306
|
for more details.
|
|
290
307
|
|
|
291
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
292
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
308
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
309
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
310
|
+
|
|
311
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
312
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
293
313
|
|
|
294
314
|
>>> import mindspore as ms
|
|
295
|
-
>>> from mindspore.communication
|
|
315
|
+
>>> from mindspore.communication import init, get_rank, get_local_rank
|
|
296
316
|
>>> ms.set_context(device_target="Ascend")
|
|
297
317
|
>>> ms.set_auto_parallel_context(device_num=16) # 2 server, each server with 8 NPU.
|
|
298
318
|
>>> init()
|
|
@@ -324,7 +344,7 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
324
344
|
Raises:
|
|
325
345
|
TypeError: If group is not a string.
|
|
326
346
|
ValueError: If backend is invalid.
|
|
327
|
-
RuntimeError: If HCCL/NCCL is not available.
|
|
347
|
+
RuntimeError: If HCCL/NCCL/MCCL is not available.
|
|
328
348
|
|
|
329
349
|
Supported Platforms:
|
|
330
350
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -334,15 +354,18 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
334
354
|
Before running the following examples, you need to configure the communication environment variables.
|
|
335
355
|
|
|
336
356
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
337
|
-
Please see the `
|
|
338
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
357
|
+
Please see the `rank table Startup
|
|
358
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
339
359
|
for more details.
|
|
340
360
|
|
|
341
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
342
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
361
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
362
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
363
|
+
|
|
364
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
365
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
343
366
|
|
|
344
367
|
>>> import mindspore as ms
|
|
345
|
-
>>> from mindspore.communication
|
|
368
|
+
>>> from mindspore.communication import init, get_group_size
|
|
346
369
|
>>> ms.set_auto_parallel_context(device_num=8)
|
|
347
370
|
>>> init()
|
|
348
371
|
>>> group_size = get_group_size()
|
|
@@ -360,7 +383,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
360
383
|
Gets local rank size of the specified collective communication group.
|
|
361
384
|
|
|
362
385
|
Note:
|
|
363
|
-
|
|
386
|
+
This method isn't supported in GPU and CPU versions of MindSpore.
|
|
364
387
|
This method should be used after init().
|
|
365
388
|
|
|
366
389
|
Args:
|
|
@@ -373,7 +396,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
373
396
|
Raises:
|
|
374
397
|
TypeError: If group is not a string.
|
|
375
398
|
ValueError: If backend is invalid.
|
|
376
|
-
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
|
399
|
+
RuntimeError: If HCCL is not available or MindSpore is GPU/CPU version.
|
|
377
400
|
|
|
378
401
|
Supported Platforms:
|
|
379
402
|
``Ascend``
|
|
@@ -383,15 +406,18 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
383
406
|
Before running the following examples, you need to configure the communication environment variables.
|
|
384
407
|
|
|
385
408
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
386
|
-
Please see the `
|
|
387
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
409
|
+
Please see the `rank table Startup
|
|
410
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
388
411
|
for more details.
|
|
389
412
|
|
|
390
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
391
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
413
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
414
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
415
|
+
|
|
416
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
417
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
392
418
|
|
|
393
419
|
>>> import mindspore as ms
|
|
394
|
-
>>> from mindspore.communication
|
|
420
|
+
>>> from mindspore.communication import init, get_local_rank_size
|
|
395
421
|
>>> ms.set_context(device_target="Ascend")
|
|
396
422
|
>>> ms.set_auto_parallel_context(device_num=16) # 2 server, each server with 8 NPU.
|
|
397
423
|
>>> init()
|
|
@@ -411,7 +437,7 @@ def get_world_rank_from_group_rank(group, group_rank_id):
|
|
|
411
437
|
the rank ID in the specified user communication group.
|
|
412
438
|
|
|
413
439
|
Note:
|
|
414
|
-
|
|
440
|
+
This method isn't supported in GPU and CPU versions of MindSpore.
|
|
415
441
|
The parameter group should not be "hccl_world_group".
|
|
416
442
|
This method should be used after init().
|
|
417
443
|
|
|
@@ -425,7 +451,7 @@ def get_world_rank_from_group_rank(group, group_rank_id):
|
|
|
425
451
|
Raises:
|
|
426
452
|
TypeError: If `group_rank_id` is not an integer or the group is not a string.
|
|
427
453
|
ValueError: If group is 'hccl_world_group' or backend is invalid.
|
|
428
|
-
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
|
454
|
+
RuntimeError: If HCCL is not available or MindSpore is GPU/CPU version.
|
|
429
455
|
|
|
430
456
|
Supported Platforms:
|
|
431
457
|
``Ascend``
|
|
@@ -435,22 +461,27 @@ def get_world_rank_from_group_rank(group, group_rank_id):
|
|
|
435
461
|
Before running the following examples, you need to configure the communication environment variables.
|
|
436
462
|
|
|
437
463
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
438
|
-
Please see the `
|
|
439
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
464
|
+
Please see the `rank table Startup
|
|
465
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
440
466
|
for more details.
|
|
441
467
|
|
|
442
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
443
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
468
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
469
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_
|
|
470
|
+
|
|
471
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
472
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
444
473
|
|
|
474
|
+
>>> import mindspore as ms
|
|
445
475
|
>>> from mindspore import set_context
|
|
446
|
-
>>> from mindspore.communication
|
|
447
|
-
>>> set_context(device_target="Ascend")
|
|
476
|
+
>>> from mindspore.communication import init, create_group, get_world_rank_from_group_rank, get_rank
|
|
477
|
+
>>> set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
|
|
448
478
|
>>> init()
|
|
449
479
|
>>> group = "0-4"
|
|
450
480
|
>>> rank_ids = [0,4]
|
|
451
|
-
>>>
|
|
452
|
-
|
|
453
|
-
|
|
481
|
+
>>> if get_rank() in rank_ids:
|
|
482
|
+
... create_group(group, rank_ids)
|
|
483
|
+
... world_rank_id = get_world_rank_from_group_rank(group, 1)
|
|
484
|
+
... print("world_rank_id is: ", world_rank_id)
|
|
454
485
|
world_rank_id is: 4
|
|
455
486
|
"""
|
|
456
487
|
if not isinstance(group, str):
|
|
@@ -465,7 +496,7 @@ def get_group_rank_from_world_rank(world_rank_id, group):
|
|
|
465
496
|
the rank ID in the world communication group.
|
|
466
497
|
|
|
467
498
|
Note:
|
|
468
|
-
|
|
499
|
+
This method isn't supported in GPU and CPU versions of MindSpore.
|
|
469
500
|
The parameter group should not be "hccl_world_group".
|
|
470
501
|
This method should be used after init().
|
|
471
502
|
|
|
@@ -479,7 +510,7 @@ def get_group_rank_from_world_rank(world_rank_id, group):
|
|
|
479
510
|
Raises:
|
|
480
511
|
TypeError: If world_rank_id is not an integer or the group is not a string.
|
|
481
512
|
ValueError: If group is 'hccl_world_group' or backend is invalid.
|
|
482
|
-
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
|
513
|
+
RuntimeError: If HCCL is not available or MindSpore is GPU/CPU version.
|
|
483
514
|
|
|
484
515
|
Supported Platforms:
|
|
485
516
|
``Ascend``
|
|
@@ -489,22 +520,27 @@ def get_group_rank_from_world_rank(world_rank_id, group):
|
|
|
489
520
|
Before running the following examples, you need to configure the communication environment variables.
|
|
490
521
|
|
|
491
522
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
492
|
-
Please see the `
|
|
493
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
523
|
+
Please see the `rank table Startup
|
|
524
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
494
525
|
for more details.
|
|
495
526
|
|
|
496
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
497
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
527
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
528
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_
|
|
529
|
+
|
|
530
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
531
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
498
532
|
|
|
533
|
+
>>> import mindspore as ms
|
|
499
534
|
>>> from mindspore import set_context
|
|
500
|
-
>>> from mindspore.communication
|
|
501
|
-
>>> set_context(device_target="Ascend")
|
|
535
|
+
>>> from mindspore.communication import init, create_group, get_group_rank_from_world_rank, get_rank
|
|
536
|
+
>>> set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
|
|
502
537
|
>>> init()
|
|
503
538
|
>>> group = "0-4"
|
|
504
539
|
>>> rank_ids = [0,4]
|
|
505
|
-
>>>
|
|
506
|
-
|
|
507
|
-
|
|
540
|
+
>>> if get_rank() in rank_ids:
|
|
541
|
+
... create_group(group, rank_ids)
|
|
542
|
+
... group_rank_id = get_group_rank_from_world_rank(4, group)
|
|
543
|
+
... print("group_rank_id is: ", group_rank_id)
|
|
508
544
|
group_rank_id is: 1
|
|
509
545
|
"""
|
|
510
546
|
if not isinstance(group, str):
|
|
@@ -518,7 +554,7 @@ def create_group(group, rank_ids):
|
|
|
518
554
|
Create a user collective communication group.
|
|
519
555
|
|
|
520
556
|
Note:
|
|
521
|
-
|
|
557
|
+
This method isn't supported in GPU and CPU versions of MindSpore.
|
|
522
558
|
The size of rank_ids should be larger than 1, rank_ids should not have duplicate data.
|
|
523
559
|
This method should be used after init().
|
|
524
560
|
Only support global single communication group in PyNative mode if you do not start with mpirun.
|
|
@@ -530,7 +566,7 @@ def create_group(group, rank_ids):
|
|
|
530
566
|
Raises:
|
|
531
567
|
TypeError: If group is not a string or `rank_ids` is not a list.
|
|
532
568
|
ValueError: If `rank_ids` size is not larger than 1, or `rank_ids` has duplicate data, or backend is invalid.
|
|
533
|
-
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
|
569
|
+
RuntimeError: If HCCL is not available or MindSpore is GPU/CPU version.
|
|
534
570
|
|
|
535
571
|
Supported Platforms:
|
|
536
572
|
``Ascend``
|
|
@@ -540,22 +576,27 @@ def create_group(group, rank_ids):
|
|
|
540
576
|
Before running the following examples, you need to configure the communication environment variables.
|
|
541
577
|
|
|
542
578
|
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
543
|
-
Please see the `
|
|
544
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
579
|
+
Please see the `rank table Startup
|
|
580
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
545
581
|
for more details.
|
|
546
582
|
|
|
547
|
-
For the GPU devices, users need to prepare the host file and mpi, please see the `
|
|
548
|
-
<https://www.mindspore.cn/tutorials/experts/en/r2.
|
|
583
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun Startup
|
|
584
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
585
|
+
|
|
586
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
587
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
549
588
|
|
|
589
|
+
>>> import mindspore as ms
|
|
550
590
|
>>> from mindspore import set_context
|
|
551
591
|
>>> import mindspore.ops as ops
|
|
552
|
-
>>> from mindspore.communication
|
|
553
|
-
>>> set_context(device_target="Ascend")
|
|
592
|
+
>>> from mindspore.communication import init, create_group, get_rank
|
|
593
|
+
>>> set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
|
|
554
594
|
>>> init()
|
|
555
|
-
>>> group = "0-
|
|
556
|
-
>>> rank_ids = [0,
|
|
557
|
-
>>>
|
|
558
|
-
|
|
595
|
+
>>> group = "0-7"
|
|
596
|
+
>>> rank_ids = [0,7]
|
|
597
|
+
>>> if get_rank() in rank_ids:
|
|
598
|
+
... create_group(group, rank_ids)
|
|
599
|
+
... allreduce = ops.AllReduce(group)
|
|
559
600
|
"""
|
|
560
601
|
if not isinstance(group, str):
|
|
561
602
|
raise TypeError("For 'create_group', the argument 'group' must be type of string, "
|
|
@@ -568,7 +609,7 @@ def destroy_group(group):
|
|
|
568
609
|
Destroy the user collective communication group.
|
|
569
610
|
|
|
570
611
|
Note:
|
|
571
|
-
|
|
612
|
+
This method isn't supported in GPU and CPU versions of MindSpore.
|
|
572
613
|
The parameter group should not be "hccl_world_group".
|
|
573
614
|
This method should be used after init().
|
|
574
615
|
|
|
@@ -578,10 +619,37 @@ def destroy_group(group):
|
|
|
578
619
|
Raises:
|
|
579
620
|
TypeError: If group is not a string.
|
|
580
621
|
ValueError: If group is "hccl_world_group" or backend is invalid.
|
|
581
|
-
RuntimeError: If HCCL is not available or MindSpore is GPU version.
|
|
622
|
+
RuntimeError: If HCCL is not available or MindSpore is GPU/CPU version.
|
|
582
623
|
|
|
583
624
|
Supported Platforms:
|
|
584
625
|
``Ascend``
|
|
626
|
+
|
|
627
|
+
Examples:
|
|
628
|
+
.. note::
|
|
629
|
+
Before running the following examples, you need to configure the communication environment variables.
|
|
630
|
+
|
|
631
|
+
For the Ascend devices, users need to prepare the rank table, set rank_id and device_id.
|
|
632
|
+
Please see the `rank table startup
|
|
633
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/rank_table.html>`_
|
|
634
|
+
for more details.
|
|
635
|
+
|
|
636
|
+
For the GPU devices, users need to prepare the host file and mpi, please see the `mpirun startup
|
|
637
|
+
<https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/mpirun.html>`_ .
|
|
638
|
+
|
|
639
|
+
For the CPU device, users need to write a dynamic cluster startup script, please see the `Dynamic Cluster
|
|
640
|
+
Startup <https://www.mindspore.cn/tutorials/experts/en/r2.2/parallel/dynamic_cluster.html>`_ .
|
|
641
|
+
|
|
642
|
+
>>> import mindspore as ms
|
|
643
|
+
>>> from mindspore import set_context
|
|
644
|
+
>>> import mindspore.ops as ops
|
|
645
|
+
>>> from mindspore.communication import init, create_group, destroy_group, get_rank
|
|
646
|
+
>>> set_context(mode=ms.GRAPH_MODE, device_target="Ascend")
|
|
647
|
+
>>> init()
|
|
648
|
+
>>> group = "0-2"
|
|
649
|
+
>>> rank_ids = [0,2]
|
|
650
|
+
>>> if get_rank() in rank_ids:
|
|
651
|
+
... create_group(group, rank_ids)
|
|
652
|
+
... destroy_group(group)
|
|
585
653
|
"""
|
|
586
654
|
if not isinstance(group, str):
|
|
587
655
|
raise TypeError("For 'destroy_group', the argument 'group' must be type of string, "
|