mindspore 2.7.0__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -2
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +8 -1
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +7 -22
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +275 -64
- mindspore/common/_utils.py +0 -44
- mindspore/common/api.py +285 -35
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/auto_dynamic_shape.py +1 -3
- mindspore/common/hook_handle.py +60 -0
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/parameter.py +13 -107
- mindspore/common/recompute.py +4 -11
- mindspore/common/tensor.py +16 -169
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +5 -85
- mindspore/dataset/engine/datasets.py +8 -4
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dnnl.dll +0 -0
- mindspore/{experimental/llm_boost/ascend_native → graph}/__init__.py +7 -7
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +1 -1
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +1 -1
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/distributed.py +182 -62
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +4 -4
- mindspore/mint/nn/layer/normalization.py +8 -13
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +16 -66
- mindspore/nn/layer/basic.py +49 -1
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +31 -124
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +0 -1
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +0 -1
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/wrap/grad_reducer.py +4 -74
- mindspore/numpy/array_creations.py +2 -2
- mindspore/numpy/fft.py +9 -9
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +0 -5
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +16 -22
- mindspore/ops/auto_generate/gen_extend_func.py +2 -7
- mindspore/ops/auto_generate/gen_ops_def.py +98 -141
- mindspore/ops/auto_generate/gen_ops_prim.py +12708 -12686
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +15 -1
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +1 -0
- mindspore/ops/function/array_func.py +14 -12
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +3 -4
- mindspore/ops/function/math_func.py +45 -54
- mindspore/ops/function/nn_func.py +75 -294
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +2 -0
- mindspore/ops/functional_overload.py +354 -18
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +1 -38
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +1 -0
- mindspore/ops/operations/comm_ops.py +94 -2
- mindspore/ops/operations/custom_ops.py +228 -19
- mindspore/ops/operations/debug_ops.py +27 -29
- mindspore/ops/operations/manually_defined/ops_def.py +27 -306
- mindspore/ops/operations/nn_ops.py +2 -2
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +1 -17
- mindspore/ops/tensor_method.py +72 -3
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +1 -4
- mindspore/parallel/_utils.py +29 -6
- mindspore/parallel/checkpoint_transform.py +18 -2
- mindspore/parallel/cluster/process_entity/_api.py +24 -32
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/{experimental/llm_boost/atb → parallel/distributed}/__init__.py +21 -23
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +117 -16
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +3 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +38 -2
- mindspore/profiler/common/path_manager.py +56 -24
- mindspore/profiler/common/profiler_context.py +2 -12
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/experimental_config.py +2 -1
- mindspore/profiler/platform/npu_profiler.py +33 -6
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +3 -2
- mindspore/runtime/executor.py +11 -3
- mindspore/runtime/memory.py +112 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +5 -18
- mindspore/train/amp.py +6 -4
- mindspore/train/callback/_checkpoint.py +0 -9
- mindspore/train/callback/_train_fault_tolerance.py +69 -18
- mindspore/train/data_sink.py +1 -5
- mindspore/train/model.py +38 -211
- mindspore/train/serialization.py +126 -387
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +144 -8
- mindspore/version.py +1 -1
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/RECORD +254 -267
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -210
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_append.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/train/memory_profiling_pb2.py +0 -298
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
mindspore/common/tensor.py
CHANGED
|
@@ -17,7 +17,6 @@
|
|
|
17
17
|
__all__ = ['Tensor']
|
|
18
18
|
|
|
19
19
|
import abc
|
|
20
|
-
import math
|
|
21
20
|
import numbers
|
|
22
21
|
import numpy as np
|
|
23
22
|
|
|
@@ -29,7 +28,6 @@ from mindspore import log as logger
|
|
|
29
28
|
from mindspore.common import dtype as mstype
|
|
30
29
|
from mindspore.common.hook_handle import _TensorHookHandle
|
|
31
30
|
|
|
32
|
-
from mindspore.common._utils import get_slice_num
|
|
33
31
|
from mindspore.common._register_for_tensor import tensor_operator_registry
|
|
34
32
|
from mindspore._c_expression import TensorPy as TensorPy_
|
|
35
33
|
from mindspore._c_expression import _rmod_instance
|
|
@@ -296,6 +294,7 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
296
294
|
...
|
|
297
295
|
>>> # initialize a tensor with init
|
|
298
296
|
>>> t4 = Tensor(shape = (1, 3), dtype=ms.float32, init=One())
|
|
297
|
+
>>> t4.init_data()
|
|
299
298
|
>>> print(t4)
|
|
300
299
|
[[1. 1. 1.]]
|
|
301
300
|
>>> print(type(t4))
|
|
@@ -326,7 +325,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
326
325
|
|
|
327
326
|
def __repr__(self):
|
|
328
327
|
if self.init_finished:
|
|
329
|
-
TensorPy_.data_sync(self, True)
|
|
330
328
|
return TensorPy_.__repr__(self)
|
|
331
329
|
return ''
|
|
332
330
|
|
|
@@ -453,6 +451,8 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
453
451
|
def __str__(self):
|
|
454
452
|
if self.dtype == mstype.type_none:
|
|
455
453
|
return "Unknown Tensor type!"
|
|
454
|
+
if not self._data_ptr():
|
|
455
|
+
return TensorPy_.__str__(self)
|
|
456
456
|
return str(self.asnumpy())
|
|
457
457
|
|
|
458
458
|
def __getstate__(self):
|
|
@@ -983,37 +983,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
983
983
|
"""
|
|
984
984
|
return self.asnumpy()
|
|
985
985
|
|
|
986
|
-
def is_persistent_data(self):
|
|
987
|
-
"""
|
|
988
|
-
Check if size of tensor is huge, and need save data to persistent storage.
|
|
989
|
-
If size of tensor is bigger then MS_EMBEDDING_REMOTE_CACHE_MEMORY_SIZE, it will
|
|
990
|
-
use persistent storage to save tensor data. And will spilt data to some slice.
|
|
991
|
-
|
|
992
|
-
Returns:
|
|
993
|
-
True or False
|
|
994
|
-
"""
|
|
995
|
-
return TensorPy_.is_persistent_data(self)
|
|
996
|
-
|
|
997
|
-
def asnumpy_of_slice_persistent_data(self, param_key, slice_index):
|
|
998
|
-
"""
|
|
999
|
-
Convert a slice of tensor data to numpy array. A slice is part of tensor data.
|
|
1000
|
-
Returns as a NumPy ndarray. This slice tensor data and the returned ndarray
|
|
1001
|
-
share the same underlying storage. Changes to self tensor will be reflected in the ndarray.
|
|
1002
|
-
|
|
1003
|
-
Returns:
|
|
1004
|
-
A numpy ndarray which shares the same underlying storage with the slice of tensor data.
|
|
1005
|
-
"""
|
|
1006
|
-
return TensorPy_.asnumpy_of_slice_persistent_data(self, param_key, slice_index)
|
|
1007
|
-
|
|
1008
|
-
def slice_num_of_persistent_data(self):
|
|
1009
|
-
"""
|
|
1010
|
-
Get slice num of a tensor which use persistent storage.
|
|
1011
|
-
|
|
1012
|
-
Returns:
|
|
1013
|
-
Num of slice.
|
|
1014
|
-
"""
|
|
1015
|
-
return self.slice_num_of_persistent_data_
|
|
1016
|
-
|
|
1017
986
|
def slice_scatter(self, src, axis=0, start=None, end=None, step=1):
|
|
1018
987
|
"""
|
|
1019
988
|
For details, please refer to :func:`mindspore.ops.slice_scatter`.
|
|
@@ -1032,15 +1001,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
1032
1001
|
"""
|
|
1033
1002
|
return tensor_operator_registry.get('geqrf')(self)
|
|
1034
1003
|
|
|
1035
|
-
def slice_shape_of_persistent_data(self):
|
|
1036
|
-
"""
|
|
1037
|
-
Get slice shape of tensor after cut to slice size.
|
|
1038
|
-
|
|
1039
|
-
Returns:
|
|
1040
|
-
The slice shape of tensor.
|
|
1041
|
-
"""
|
|
1042
|
-
return self.slice_shape_of_persistent_data_
|
|
1043
|
-
|
|
1044
1004
|
def value(self):
|
|
1045
1005
|
"""
|
|
1046
1006
|
Get the value of the tensor or the parameter.
|
|
@@ -1241,35 +1201,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
1241
1201
|
"""
|
|
1242
1202
|
return tensor_operator_registry.get('angle')(self)
|
|
1243
1203
|
|
|
1244
|
-
def view(self, *shape):
|
|
1245
|
-
"""
|
|
1246
|
-
Reshape the tensor according to the input shape. It's the same as :func:`mindspore.Tensor.reshape`,
|
|
1247
|
-
implemented by the underlying reshape operator.
|
|
1248
|
-
|
|
1249
|
-
Args:
|
|
1250
|
-
shape (Union[tuple(int), int]): Dimension of the output tensor.
|
|
1251
|
-
|
|
1252
|
-
Returns:
|
|
1253
|
-
Tensor, which dimension is the input shape's value.
|
|
1254
|
-
|
|
1255
|
-
Examples:
|
|
1256
|
-
>>> from mindspore import Tensor
|
|
1257
|
-
>>> import numpy as np
|
|
1258
|
-
>>> a = Tensor(np.array([[1, 2, 3], [2, 3, 4]], dtype=np.float32))
|
|
1259
|
-
>>> output = a.view((3, 2))
|
|
1260
|
-
>>> print(output)
|
|
1261
|
-
[[1. 2.]
|
|
1262
|
-
[3. 2.]
|
|
1263
|
-
[3. 4.]]
|
|
1264
|
-
"""
|
|
1265
|
-
if not shape:
|
|
1266
|
-
raise ValueError("The shape variable should not be empty")
|
|
1267
|
-
if isinstance(shape[0], tuple):
|
|
1268
|
-
if len(shape) != 1:
|
|
1269
|
-
raise ValueError(f"Only one tuple is needed, but got {shape}")
|
|
1270
|
-
shape = shape[0]
|
|
1271
|
-
return tensor_operator_registry.get('reshape')(self, shape)
|
|
1272
|
-
|
|
1273
1204
|
def bitwise_left_shift(self, other):
|
|
1274
1205
|
"""
|
|
1275
1206
|
For details, please refer to :func:`mindspore.ops.bitwise_left_shift`.
|
|
@@ -1302,18 +1233,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
1302
1233
|
"""
|
|
1303
1234
|
return tensor_operator_registry.get('ger')(self, vec2)
|
|
1304
1235
|
|
|
1305
|
-
def broadcast_to(self, shape):
|
|
1306
|
-
"""
|
|
1307
|
-
For details, please refer to :func:`mindspore.ops.broadcast_to`.
|
|
1308
|
-
"""
|
|
1309
|
-
return tensor_operator_registry.get('broadcast_to')(self, shape)
|
|
1310
|
-
|
|
1311
|
-
def real(self):
|
|
1312
|
-
r"""
|
|
1313
|
-
For details, please refer to :func:`mindspore.ops.real`.
|
|
1314
|
-
"""
|
|
1315
|
-
return tensor_operator_registry.get('real')(self)
|
|
1316
|
-
|
|
1317
1236
|
def tanh_(self):
|
|
1318
1237
|
r"""
|
|
1319
1238
|
Computes hyperbolic tangent of self inplace element-wise. The Tanh function is defined as:
|
|
@@ -1538,8 +1457,7 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
1538
1457
|
>>> print(output.shape)
|
|
1539
1458
|
(24,)
|
|
1540
1459
|
"""
|
|
1541
|
-
|
|
1542
|
-
return reshape_op(self, (-1,))
|
|
1460
|
+
return self.reshape((-1,))
|
|
1543
1461
|
|
|
1544
1462
|
def rot90(self, k, dims):
|
|
1545
1463
|
r"""
|
|
@@ -1577,15 +1495,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
1577
1495
|
"""
|
|
1578
1496
|
return self._size
|
|
1579
1497
|
|
|
1580
|
-
def permute(self, *axis):
|
|
1581
|
-
"""
|
|
1582
|
-
Tensor.permute supports unpacking the `axis` argument automatically when it is passed as an indefinite number of
|
|
1583
|
-
positional arguments, which has a slight difference from the input parameter of :func:`mindspore.ops.permute`.
|
|
1584
|
-
For details, please refer to :func:`mindspore.ops.permute`.
|
|
1585
|
-
"""
|
|
1586
|
-
perm = validator.check_transpose_axis(axis, self.ndim)
|
|
1587
|
-
return tensor_operator_registry.get('permute')(self, perm)
|
|
1588
|
-
|
|
1589
1498
|
def positive(self):
|
|
1590
1499
|
"""
|
|
1591
1500
|
For details, please refer to :func:`mindspore.ops.positive`.
|
|
@@ -1622,12 +1531,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
1622
1531
|
"""
|
|
1623
1532
|
return tensor_operator_registry.get('swapdims')(self, dim0, dim1)
|
|
1624
1533
|
|
|
1625
|
-
def squeeze(self, axis=None):
|
|
1626
|
-
"""
|
|
1627
|
-
For details, please refer to :func:`mindspore.ops.squeeze`.
|
|
1628
|
-
"""
|
|
1629
|
-
return tensor_operator_registry.get('squeeze')(self, axis)
|
|
1630
|
-
|
|
1631
1534
|
def slogdet(self):
|
|
1632
1535
|
"""
|
|
1633
1536
|
For details, please refer to :func:`mindspore.ops.slogdet`.
|
|
@@ -2085,15 +1988,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
2085
1988
|
|
|
2086
1989
|
if shape is None:
|
|
2087
1990
|
shape = self.shape
|
|
2088
|
-
# At embedding cache scenes, we need limit the size of memory for tensor.
|
|
2089
|
-
# And save out of range data to persistent storage to support TB-Level size of tensor.
|
|
2090
|
-
data_shape = list(shape)
|
|
2091
|
-
slice_num_of_persistent_data = get_slice_num(self.dtype, shape)
|
|
2092
|
-
if slice_num_of_persistent_data > 1:
|
|
2093
|
-
slice_first_dim = math.ceil(shape[0] / slice_num_of_persistent_data)
|
|
2094
|
-
data_shape[0] = slice_first_dim
|
|
2095
|
-
self.slice_shape_of_persistent_data_ = data_shape
|
|
2096
|
-
self.slice_num_of_persistent_data_ = slice_num_of_persistent_data
|
|
2097
1991
|
|
|
2098
1992
|
from mindspore.common.initializer import Zero as ZeroInitializer
|
|
2099
1993
|
|
|
@@ -2101,9 +1995,9 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
2101
1995
|
try:
|
|
2102
1996
|
dtype_ = mstype.int8 if is_qint4x2 else self.dtype
|
|
2103
1997
|
if isinstance(self.init, ZeroInitializer):
|
|
2104
|
-
data = np.zeros(
|
|
1998
|
+
data = np.zeros(shape, dtype=mstype._dtype_to_nptype(dtype_)) # pylint:disable=protected-access
|
|
2105
1999
|
else:
|
|
2106
|
-
data = np.ndarray(
|
|
2000
|
+
data = np.ndarray(shape, dtype=mstype._dtype_to_nptype(dtype_)) # pylint:disable=protected-access
|
|
2107
2001
|
except ValueError as e:
|
|
2108
2002
|
msg = "Error shape={}".format(shape)
|
|
2109
2003
|
logger.critical(msg)
|
|
@@ -2139,16 +2033,12 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
2139
2033
|
self.init.seed, _ = self.seed
|
|
2140
2034
|
|
|
2141
2035
|
with seed_context(self.init):
|
|
2142
|
-
if (not isinstance(self.init, ZeroInitializer)
|
|
2036
|
+
if (not isinstance(self.init, ZeroInitializer)) \
|
|
2143
2037
|
and not is_reboot_node():
|
|
2144
2038
|
self.init(data)
|
|
2145
2039
|
self.init = None
|
|
2146
2040
|
|
|
2147
|
-
|
|
2148
|
-
if slice_num_of_persistent_data > 1:
|
|
2149
|
-
self.assign_value(TensorPy_.persistent_data_from_numpy(data, slice_num_of_persistent_data))
|
|
2150
|
-
else:
|
|
2151
|
-
self.assign_value(TensorPy_.from_numpy(data))
|
|
2041
|
+
self.assign_value(TensorPy_.from_numpy(data))
|
|
2152
2042
|
|
|
2153
2043
|
if is_qint4x2:
|
|
2154
2044
|
self.set_dtype(mstype.qint4x2)
|
|
@@ -2956,41 +2846,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
2956
2846
|
"""
|
|
2957
2847
|
return tensor_operator_registry.get('bmm')(self, mat2)
|
|
2958
2848
|
|
|
2959
|
-
def to(self, dtype):
|
|
2960
|
-
r"""
|
|
2961
|
-
Performs tensor dtype conversion.
|
|
2962
|
-
|
|
2963
|
-
Note:
|
|
2964
|
-
- If the `self` Tensor already has the correct `mindspore.dtype`, then self is returned.
|
|
2965
|
-
Otherwise, the returned tensor is a copy of `self` with the desired mindspore.dtype.
|
|
2966
|
-
- When converting complex numbers to boolean type, the imaginary part of the complex number is not
|
|
2967
|
-
taken into account. As long as the real part is non-zero, it returns True; otherwise, it returns False.
|
|
2968
|
-
|
|
2969
|
-
Args:
|
|
2970
|
-
dtype (dtype.Number, bool): The valid data type of the output tensor. Only constant value is allowed.
|
|
2971
|
-
Only Support type bool in PyNative mode.
|
|
2972
|
-
|
|
2973
|
-
Returns:
|
|
2974
|
-
Tensor, converted to the specified `dtype`.
|
|
2975
|
-
|
|
2976
|
-
Raises:
|
|
2977
|
-
TypeError: If `dtype` is not a Number.
|
|
2978
|
-
|
|
2979
|
-
Supported Platforms:
|
|
2980
|
-
``Ascend`` ``GPU`` ``CPU``
|
|
2981
|
-
|
|
2982
|
-
Examples:
|
|
2983
|
-
>>> import numpy as np
|
|
2984
|
-
>>> import mindspore
|
|
2985
|
-
>>> from mindspore import Tensor
|
|
2986
|
-
>>> input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
|
|
2987
|
-
>>> input_x = Tensor(input_np)
|
|
2988
|
-
>>> dtype = mindspore.int32
|
|
2989
|
-
>>> output = input_x.to(dtype)
|
|
2990
|
-
>>> print(output.dtype)
|
|
2991
|
-
Int32
|
|
2992
|
-
"""
|
|
2993
|
-
return self if self.dtype == dtype else self._to(dtype)
|
|
2994
2849
|
|
|
2995
2850
|
def type(self, dtype=None):
|
|
2996
2851
|
r"""
|
|
@@ -3024,9 +2879,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
3024
2879
|
r"""
|
|
3025
2880
|
Returns self tensor cast to the type of the with the input other tensor.
|
|
3026
2881
|
|
|
3027
|
-
.. warning::
|
|
3028
|
-
This is an experimental API that is subject to change or deletion.
|
|
3029
|
-
|
|
3030
2882
|
Note:
|
|
3031
2883
|
When converting complex numbers to boolean type, the imaginary part of the complex number is not
|
|
3032
2884
|
taken into account. As long as the real part is non-zero, it returns True; otherwise, it returns False.
|
|
@@ -3329,14 +3181,12 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
3329
3181
|
"""
|
|
3330
3182
|
return tensor_operator_registry.get('unfold')(self, kernel_size, dilation, padding, stride)
|
|
3331
3183
|
|
|
3332
|
-
def expand(self, size):
|
|
3184
|
+
def expand(self, *size):
|
|
3333
3185
|
r"""
|
|
3334
3186
|
For details, please refer to :func:`mindspore.ops.broadcast_to`.
|
|
3335
3187
|
The parameter `size` of the current interface is the same as the parameter `shape` of the reference interface.
|
|
3336
3188
|
"""
|
|
3337
|
-
|
|
3338
|
-
size = tensor_operator_registry.get('tensortotuple')()(size)
|
|
3339
|
-
return tensor_operator_registry.get('expand')(self, size)
|
|
3189
|
+
return self.broadcast_to(*size)
|
|
3340
3190
|
|
|
3341
3191
|
def cumprod(self, dim, dtype=None):
|
|
3342
3192
|
r"""
|
|
@@ -3537,9 +3387,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
3537
3387
|
r"""
|
|
3538
3388
|
Return a tensor filled with zeros.
|
|
3539
3389
|
|
|
3540
|
-
.. warning::
|
|
3541
|
-
This is an experimental API that is subject to change or deletion.
|
|
3542
|
-
|
|
3543
3390
|
Returns:
|
|
3544
3391
|
Return a tensor. Fill self tensor with zeros.
|
|
3545
3392
|
|
|
@@ -3563,6 +3410,12 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
3563
3410
|
"""
|
|
3564
3411
|
return tensor_operator_registry.get('sign')(self)
|
|
3565
3412
|
|
|
3413
|
+
def sign_(self):
|
|
3414
|
+
"""
|
|
3415
|
+
In-place version of :func:`mindspore.mint.sign`.
|
|
3416
|
+
"""
|
|
3417
|
+
return tensor_operator_registry.get('sign_')(self)
|
|
3418
|
+
|
|
3566
3419
|
def signbit(self):
|
|
3567
3420
|
"""
|
|
3568
3421
|
For details, please refer to :func:`mindspore.ops.signbit`.
|
|
@@ -3575,12 +3428,6 @@ class Tensor(TensorPy_, metaclass=_TensorMeta):
|
|
|
3575
3428
|
"""
|
|
3576
3429
|
return tensor_operator_registry.get('sgn')(self)
|
|
3577
3430
|
|
|
3578
|
-
def imag(self):
|
|
3579
|
-
r"""
|
|
3580
|
-
For details, please refer to :func:`mindspore.ops.imag`.
|
|
3581
|
-
"""
|
|
3582
|
-
return tensor_operator_registry.get('imag')(self)
|
|
3583
|
-
|
|
3584
3431
|
def quantile(self, q, axis=None, keepdims=False):
|
|
3585
3432
|
r"""
|
|
3586
3433
|
This interface is deprecated from version 2.4 and will be removed in a future version.
|
|
@@ -25,7 +25,8 @@ from mindspore import context
|
|
|
25
25
|
from mindspore.parallel._ps_context import _is_role_sched, _is_ps_mode,\
|
|
26
26
|
_get_ps_context
|
|
27
27
|
from mindspore import log as logger
|
|
28
|
-
from mindspore._c_expression import CollectiveManager, set_cluster_exit_with_exception, MSContext, GroupOptions
|
|
28
|
+
from mindspore._c_expression import CollectiveManager, set_cluster_exit_with_exception, MSContext, GroupOptions, \
|
|
29
|
+
ParallelCommManager
|
|
29
30
|
from mindspore.common._utils import load_lib
|
|
30
31
|
|
|
31
32
|
HCCL_LIB = 'libhccl_plugin.so'
|
|
@@ -523,6 +524,9 @@ def _create_group_helper(group, rank_ids, options=None):
|
|
|
523
524
|
raise RuntimeError("Failed to create communication group for {} with rank ids {}. "
|
|
524
525
|
"If NCCL is used, 'export NCCL_DEBUG=INFO' "
|
|
525
526
|
"is suggested before launching jobs.".format(group, rank_ids))
|
|
527
|
+
group_info = ParallelCommManager.get_instance().hccl_groups(rank_ids)
|
|
528
|
+
if group_info is None or not group_info[1]:
|
|
529
|
+
ParallelCommManager.get_instance().set_hccl_groups(rank_ids, group, True)
|
|
526
530
|
|
|
527
531
|
_ExistingGroup.ITEMS[group] = rank_ids
|
|
528
532
|
sorted_ranks = sorted(rank_ids)
|
|
@@ -547,7 +551,13 @@ def _destroy_group_helper(group):
|
|
|
547
551
|
if _hccl_test():
|
|
548
552
|
hccl.create_group(group)
|
|
549
553
|
else:
|
|
554
|
+
group_ranks_map = CollectiveManager.get_instance().get_group_map()
|
|
555
|
+
ranks = group_ranks_map.get(group)
|
|
556
|
+
if ranks is not None:
|
|
557
|
+
ParallelCommManager.get_instance().set_hccl_groups(ranks, group, False)
|
|
550
558
|
CollectiveManager.get_instance().destroy_group(group)
|
|
559
|
+
del _ExistingGroup.ITEMS[group]
|
|
560
|
+
del _ExistingGroup.GROUP_RANKS[group]
|
|
551
561
|
|
|
552
562
|
|
|
553
563
|
@check_parameter_available
|
|
@@ -18,7 +18,7 @@ Defines communication operators with functional form.
|
|
|
18
18
|
"""
|
|
19
19
|
from mindspore.communication import GlobalComm, get_group_rank_from_world_rank, get_group_size
|
|
20
20
|
from mindspore.communication.management import _get_group
|
|
21
|
-
from mindspore.communication._comm_helper import _get_group_rank_from_world_rank_from_cache_helper
|
|
21
|
+
from mindspore.communication._comm_helper import _get_group_rank_from_world_rank_from_cache_helper, _get_rank_helper
|
|
22
22
|
from mindspore.common.tensor import Tensor
|
|
23
23
|
from mindspore._c_expression import TensorPy as Tensor_
|
|
24
24
|
from mindspore.ops import ReduceOp, cat
|
|
@@ -26,7 +26,8 @@ from mindspore.ops._primitive_cache import _get_cache_prim
|
|
|
26
26
|
from mindspore.ops.primitive import _primexpr
|
|
27
27
|
from mindspore.ops.auto_generate.gen_ops_prim import (inner_comm_all_reduce_op, inner_comm_all_gather_op,
|
|
28
28
|
inner_comm_all_to_all_v_op, inner_comm_irecv_op,
|
|
29
|
-
inner_comm_isend_op, inner_comm_reduce_scatter_op
|
|
29
|
+
inner_comm_isend_op, inner_comm_reduce_scatter_op,
|
|
30
|
+
dist_comm_all_to_all_v_c_op)
|
|
30
31
|
from mindspore._c_expression import CommHandle as CommHandle_
|
|
31
32
|
from mindspore._c_expression.typing import Type
|
|
32
33
|
from mindspore import jit_class
|
|
@@ -49,11 +50,13 @@ __all__ = [
|
|
|
49
50
|
'recv',
|
|
50
51
|
'P2POp',
|
|
51
52
|
'batch_isend_irecv',
|
|
53
|
+
'all_to_all_v_c'
|
|
52
54
|
]
|
|
53
55
|
|
|
54
56
|
import mindspore.ops.operations as P
|
|
55
57
|
|
|
56
58
|
_GROPU_SIZE_CACHE = {}
|
|
59
|
+
_GROPU_RANK_CACHE = {}
|
|
57
60
|
|
|
58
61
|
@jit_class
|
|
59
62
|
class CommHandle(CommHandle_):
|
|
@@ -733,7 +736,7 @@ def gather_into_tensor(tensor, dst=0, group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
733
736
|
Args:
|
|
734
737
|
tensor (Tensor): The tensor to be gathered. The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
|
|
735
738
|
dst(int, optional): Specifies the rank(global rank) of the process that receive the tensor.
|
|
736
|
-
And only process `dst` will receive the gathered tensor. Default: 0
|
|
739
|
+
And only process `dst` will receive the gathered tensor. Default: ``0``.
|
|
737
740
|
group (str, optional): The communication group to work on. Default: ``GlobalComm.WORLD_COMM_GROUP``.
|
|
738
741
|
|
|
739
742
|
Returns:
|
|
@@ -890,7 +893,8 @@ def barrier(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
890
893
|
if not isinstance(group, str):
|
|
891
894
|
raise TypeError(f"group must be type of string, but got {type(group)}")
|
|
892
895
|
_op = _get_cache_prim(P.Barrier)(group)
|
|
893
|
-
|
|
896
|
+
_op()
|
|
897
|
+
ms.runtime.synchronize()
|
|
894
898
|
|
|
895
899
|
|
|
896
900
|
def _deal_comm_outputs(output, async_op, exec_sync=False):
|
|
@@ -1452,3 +1456,133 @@ def all_to_all_single_with_output_shape(output_shape, tensor, output_split_sizes
|
|
|
1452
1456
|
result = result.reshape((-1,) + recv_shape_without_first_dim)
|
|
1453
1457
|
|
|
1454
1458
|
return result, handle
|
|
1459
|
+
|
|
1460
|
+
|
|
1461
|
+
def _get_all_to_all_v_c_numel_list(output, input, send_count_matrix_size):
|
|
1462
|
+
"""get numel list for all_to_all_v_c."""
|
|
1463
|
+
send_size_without_first_dim = _get_size(input.shape[1:])
|
|
1464
|
+
recv_size_without_first_dim = _get_size(output.shape[1:])
|
|
1465
|
+
if send_size_without_first_dim != recv_size_without_first_dim:
|
|
1466
|
+
raise ValueError("The input and output dimensions except 0 must be of equal size, "
|
|
1467
|
+
f"but got {send_size_without_first_dim} and {recv_size_without_first_dim}.")
|
|
1468
|
+
send_count_matrix = [size * send_size_without_first_dim for size in send_count_matrix_size]
|
|
1469
|
+
return send_count_matrix
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
def get_cache_group_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
1473
|
+
"""get cache group size."""
|
|
1474
|
+
global _GROPU_SIZE_CACHE
|
|
1475
|
+
if group not in _GROPU_SIZE_CACHE:
|
|
1476
|
+
_GROPU_SIZE_CACHE[group] = get_group_size(group)
|
|
1477
|
+
group_size = _GROPU_SIZE_CACHE[group]
|
|
1478
|
+
return group_size
|
|
1479
|
+
|
|
1480
|
+
|
|
1481
|
+
def get_cache_group_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
1482
|
+
"""get cache rank id."""
|
|
1483
|
+
global _GROPU_RANK_CACHE
|
|
1484
|
+
if group not in _GROPU_RANK_CACHE:
|
|
1485
|
+
_GROPU_RANK_CACHE[group] = _get_rank_helper(group)
|
|
1486
|
+
group_rank = _GROPU_RANK_CACHE[group]
|
|
1487
|
+
return group_rank
|
|
1488
|
+
|
|
1489
|
+
|
|
1490
|
+
def all_to_all_v_c(output, input, send_count_matrix, group=None, async_op=False):
|
|
1491
|
+
r"""
|
|
1492
|
+
Based on the user-specified split size, the input tensor is divided and sent to other devices, where split chunks
|
|
1493
|
+
are received and then merged into a single output tensor.
|
|
1494
|
+
|
|
1495
|
+
Note:
|
|
1496
|
+
Only support PyNative mode, Graph mode is not currently supported.
|
|
1497
|
+
|
|
1498
|
+
Args:
|
|
1499
|
+
output (Tensor): the output tensor is gathered concatenated from remote ranks.
|
|
1500
|
+
input (Tensor): tensor to be scattered to remote rank.
|
|
1501
|
+
send_count_matrix (list[int]): The sending and receiving parameters of all ranks,
|
|
1502
|
+
:math:`\text{send_count_matrix}[i*\text{rank_size}+j]` represents the amount of data sent by
|
|
1503
|
+
rank i to rank j, and the basic unit is first dimension sizes. Among them, `rank_size`
|
|
1504
|
+
indicates the size of the communication group.
|
|
1505
|
+
group (str, optional): The communication group to work on. If ``None``, which means ``"hccl_world_group"`` in
|
|
1506
|
+
Ascend. Default: ``None``.
|
|
1507
|
+
async_op (bool, optional): Whether this operator should be an async operator. Default: ``False`` .
|
|
1508
|
+
|
|
1509
|
+
Returns:
|
|
1510
|
+
CommHandle. CommHandle is an async work handle, if `async_op` is set to True.
|
|
1511
|
+
CommHandle will be None, when `async_op` is False.
|
|
1512
|
+
|
|
1513
|
+
Raises:
|
|
1514
|
+
TypeError: If `input` or `output` is not tensor. `group` is not a str, or async_op is not bool.
|
|
1515
|
+
|
|
1516
|
+
Supported Platforms:
|
|
1517
|
+
``Ascend``
|
|
1518
|
+
|
|
1519
|
+
Examples:
|
|
1520
|
+
.. note::
|
|
1521
|
+
Before running the following examples, you need to configure the communication environment variables.
|
|
1522
|
+
|
|
1523
|
+
For Ascend devices, it is recommended to use the msrun startup method
|
|
1524
|
+
without any third-party or configuration file dependencies.
|
|
1525
|
+
Please see the `msrun start up
|
|
1526
|
+
<https://www.mindspore.cn/tutorials/en/master/parallel/msrun_launcher.html>`_
|
|
1527
|
+
for more details.
|
|
1528
|
+
|
|
1529
|
+
This example should be run with 2 devices.
|
|
1530
|
+
|
|
1531
|
+
>>> import numpy as np
|
|
1532
|
+
>>> import mindspore
|
|
1533
|
+
>>> from mindspore.mint.distributed import init_process_group, get_rank
|
|
1534
|
+
>>> from mindspore.communication.comm_func import all_to_all_v_c
|
|
1535
|
+
>>> from mindspore import Tensor
|
|
1536
|
+
>>> from mindspore.ops import zeros
|
|
1537
|
+
>>>
|
|
1538
|
+
>>> init_process_group()
|
|
1539
|
+
>>> this_rank = get_rank()
|
|
1540
|
+
>>> if this_rank == 0:
|
|
1541
|
+
... output = Tensor(np.zeros([3]).astype(np.float32))
|
|
1542
|
+
... tensor = Tensor([0, 1, 2.]) * this_rank
|
|
1543
|
+
... result = all_to_all_v_c(output, tensor, [0, 3, 3, 0])
|
|
1544
|
+
... print(output)
|
|
1545
|
+
>>> if this_rank == 1:
|
|
1546
|
+
... output = Tensor(np.zeros([3]).astype(np.float32))
|
|
1547
|
+
... tensor = Tensor([0, 1, 2.]) * this_rank
|
|
1548
|
+
... result = all_to_all_v_c(output, tensor, [0, 3, 3, 0])
|
|
1549
|
+
... print(output)
|
|
1550
|
+
rank 0:
|
|
1551
|
+
[0. 1. 2]
|
|
1552
|
+
rank 1:
|
|
1553
|
+
[0. 0. 0]
|
|
1554
|
+
"""
|
|
1555
|
+
|
|
1556
|
+
_check_all_tensors([input])
|
|
1557
|
+
_check_all_tensors([output])
|
|
1558
|
+
if group is None:
|
|
1559
|
+
group = GlobalComm.WORLD_COMM_GROUP
|
|
1560
|
+
if not isinstance(group, str):
|
|
1561
|
+
raise TypeError(
|
|
1562
|
+
"The argument 'group' must be type of string, "
|
|
1563
|
+
"but got 'group' type : {}.".format(type(group))
|
|
1564
|
+
)
|
|
1565
|
+
if not isinstance(async_op, bool):
|
|
1566
|
+
raise TypeError(
|
|
1567
|
+
f"The argument 'async_op' must be a bool, but got {type(async_op)}."
|
|
1568
|
+
)
|
|
1569
|
+
if not isinstance(send_count_matrix, list):
|
|
1570
|
+
raise TypeError("send_count_matrix must be list, but got {}".format(type(send_count_matrix)))
|
|
1571
|
+
if not all(isinstance(x, int) for x in send_count_matrix):
|
|
1572
|
+
raise TypeError("send_count_matrix elements must be of type int")
|
|
1573
|
+
rank_size = get_cache_group_size(group)
|
|
1574
|
+
if rank_size * rank_size != len(send_count_matrix):
|
|
1575
|
+
raise TypeError(f"send_count_matrix must be square matrix, but got {len(send_count_matrix)}.")
|
|
1576
|
+
_send_count_matrix = _get_all_to_all_v_c_numel_list(output, input, send_count_matrix)
|
|
1577
|
+
_input = input.reshape(-1)
|
|
1578
|
+
rank_id = get_cache_group_rank(group)
|
|
1579
|
+
result = dist_comm_all_to_all_v_c_op(
|
|
1580
|
+
output,
|
|
1581
|
+
_input,
|
|
1582
|
+
group,
|
|
1583
|
+
_send_count_matrix,
|
|
1584
|
+
rank_size,
|
|
1585
|
+
rank_id,
|
|
1586
|
+
)
|
|
1587
|
+
_, handle = _deal_comm_outputs(result, async_op)
|
|
1588
|
+
return handle
|
|
@@ -22,7 +22,8 @@ from mindspore.communication._comm_helper import Backend, _get_rank_helper, _get
|
|
|
22
22
|
_create_group_helper, _destroy_group_helper, HCCL_WORLD_COMM_GROUP, NCCL_WORLD_COMM_GROUP, \
|
|
23
23
|
MCCL_WORLD_COMM_GROUP, DEVICE_TO_BACKEND, _get_local_rank_helper, _get_local_size_helper, GlobalComm, \
|
|
24
24
|
_check_mpi_envs, _set_elegant_exit_handle, _get_group_ranks, _get_comm_name_helper, _comm_switch_nic_helper
|
|
25
|
-
from mindspore._c_expression import init_hccl, finalize_hccl, init_cluster, MSContext, ms_ctx_param
|
|
25
|
+
from mindspore._c_expression import init_hccl, finalize_hccl, init_cluster, MSContext, ms_ctx_param, \
|
|
26
|
+
_init_hccl_with_store, _init_cluster_with_store
|
|
26
27
|
from mindspore.hal.device import is_initialized
|
|
27
28
|
|
|
28
29
|
__all__ = ["init", "release", "get_rank", "get_local_rank", "get_group_size",
|
|
@@ -221,6 +222,83 @@ def init(backend_name=None):
|
|
|
221
222
|
_set_envs()
|
|
222
223
|
|
|
223
224
|
|
|
225
|
+
def _init_without_sched(backend_name=None, init_method=None, timeout=None, world_size=-1, rank=-1, store=None):
|
|
226
|
+
"""
|
|
227
|
+
Initialize the distributed backends required by the communication services through an existing TcpStore or
|
|
228
|
+
by creating a new TcpStore. This approach does not rely on an additional Scheduler process.
|
|
229
|
+
|
|
230
|
+
Args:
|
|
231
|
+
backend_name (str, optional): Backend, using ``"hccl"`` / ``"nccl"`` / ``"mccl"``.
|
|
232
|
+
``"hccl"`` should be used for Ascend hardware platforms,
|
|
233
|
+
``"nccl"`` for GPU hardware platforms and ``"mccl"`` for CPU hardware platforms.
|
|
234
|
+
If not set, inference is automatically made based on the hardware platform type (device_target).
|
|
235
|
+
Default: ``None`` .
|
|
236
|
+
init_method (str, optional): URL specifying how to init collective communication group. Default is ``None``.
|
|
237
|
+
timeout (timedelta, optional): Timeout for API executed. Default is ``None``. Currently, this parameter is
|
|
238
|
+
only supported for host-side cluster network configuration using `init_method` or `store`.
|
|
239
|
+
world_size (int, optional): Number of the processes participating in the job. Default is ``-1``.
|
|
240
|
+
rank (int, optional): Rank of the current process. Default is ``-1``.
|
|
241
|
+
store (Store, optional): An object that stores key/value data, facilitating the exchange of inter-process
|
|
242
|
+
communication addresses and connection information. Default is ``None``. Currently, only the
|
|
243
|
+
``TCPStore`` type is supported.
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
TypeError: If `backend_name` is not a string.
|
|
247
|
+
RuntimeError: If device target is invalid, or backend is invalid, or distributed initialization fails,
|
|
248
|
+
or the environment variables RANK_ID/MINDSPORE_HCCL_CONFIG_PATH
|
|
249
|
+
have not been exported when backend is HCCL.
|
|
250
|
+
|
|
251
|
+
Supported Platforms:
|
|
252
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
253
|
+
"""
|
|
254
|
+
device_target = context.get_context("device_target")
|
|
255
|
+
|
|
256
|
+
if backend_name is None:
|
|
257
|
+
if device_target == "Ascend":
|
|
258
|
+
backend_name = "hccl"
|
|
259
|
+
elif device_target == "GPU":
|
|
260
|
+
backend_name = "nccl"
|
|
261
|
+
elif device_target == "CPU":
|
|
262
|
+
backend_name = "mccl"
|
|
263
|
+
else:
|
|
264
|
+
raise RuntimeError("For 'set_context', the argument 'device_target' {} is not supported in "
|
|
265
|
+
"parallel initialization, please use Ascend, GPU or CPU.".format(device_target))
|
|
266
|
+
if not isinstance(backend_name, str):
|
|
267
|
+
raise TypeError("For 'init', the argument 'backend_name' must be a string, "
|
|
268
|
+
"but got the type : {}".format(type(backend_name)))
|
|
269
|
+
|
|
270
|
+
_set_elegant_exit_handle()
|
|
271
|
+
if backend_name == "hccl":
|
|
272
|
+
if device_target != "Ascend":
|
|
273
|
+
raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', "
|
|
274
|
+
"but got 'hccl'.".format(DEVICE_TO_BACKEND[device_target], device_target))
|
|
275
|
+
if is_initialized(device_target):
|
|
276
|
+
logger.warning(f"For 'init' in Ascend backend, the backend is already initialized, please set it before "
|
|
277
|
+
"the definition of any Tensor and Parameter, and the instantiation and execution of any "
|
|
278
|
+
"operation and net, otherwise the 'init' may not take effect.")
|
|
279
|
+
GlobalComm.BACKEND = Backend("hccl")
|
|
280
|
+
_check_hccl()
|
|
281
|
+
_init_hccl_with_store(init_method, timeout, world_size, rank, store)
|
|
282
|
+
GlobalComm.WORLD_COMM_GROUP = HCCL_WORLD_COMM_GROUP
|
|
283
|
+
elif backend_name == "nccl":
|
|
284
|
+
if device_target != "GPU":
|
|
285
|
+
raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', "
|
|
286
|
+
"but got 'nccl'.".format(DEVICE_TO_BACKEND[device_target], device_target))
|
|
287
|
+
_init_cluster_with_store(init_method, timeout, world_size, rank, store)
|
|
288
|
+
GlobalComm.BACKEND = Backend("nccl")
|
|
289
|
+
GlobalComm.WORLD_COMM_GROUP = NCCL_WORLD_COMM_GROUP
|
|
290
|
+
elif backend_name == "mccl":
|
|
291
|
+
_init_cluster_with_store(init_method, timeout, world_size, rank, store)
|
|
292
|
+
GlobalComm.BACKEND = Backend("mccl")
|
|
293
|
+
GlobalComm.WORLD_COMM_GROUP = MCCL_WORLD_COMM_GROUP
|
|
294
|
+
else:
|
|
295
|
+
raise RuntimeError("For 'init', the argument 'backend_name' must be one of 'hccl', 'nccl' and 'mccl', "
|
|
296
|
+
"but got 'backend_name' : {}".format(backend_name))
|
|
297
|
+
|
|
298
|
+
GlobalComm.INITED = True
|
|
299
|
+
_set_envs()
|
|
300
|
+
|
|
301
|
+
|
|
224
302
|
def release():
|
|
225
303
|
"""
|
|
226
304
|
Release distributed resource. e.g. HCCL/NCCL/MCCL.
|
|
@@ -571,6 +649,12 @@ def create_group(group, rank_ids, options=None):
|
|
|
571
649
|
hccl_config(dict)
|
|
572
650
|
}
|
|
573
651
|
|
|
652
|
+
`hccl_config` currently only supports "hccl_buffer_size" or "hccl_comm".
|
|
653
|
+
|
|
654
|
+
- hccl_buffer_size (uint32): specifies the size of the HCCL communication buffer.
|
|
655
|
+
- hccl_comm (int64): specifies an existing HcclComm pointer. If "hccl_comm" is set,
|
|
656
|
+
"hccl_buffer_size" will be ignored.
|
|
657
|
+
|
|
574
658
|
Raises:
|
|
575
659
|
TypeError: If group is not a string or `rank_ids` is not a list.
|
|
576
660
|
ValueError: If `rank_ids` size is not larger than 1, or `rank_ids` has duplicate data, or backend is invalid.
|