PyPI - mindspore - Versions diffs - 2.4.1__cp310-none-any.whl → 2.4.10__cp310-none-any.whl - Mend

mindspore 2.4.1cp310-none-any.whl → 2.4.10cp310-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (242) hide show

mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/lib/linux/aarch64/libcust_opmaster_rt2.0.so ADDED Viewed

Binary file

mindspore/lib/plugin/ascend/custom_ascendc_910b/op_impl/ai_core/tbe/op_tiling/liboptiling.so ADDED Viewed

Binary file

mindspore/lib/plugin/ascend/custom_ascendc_910b/op_proto/lib/linux/aarch64/libcust_opsproto_rt2.0.so ADDED Viewed

Binary file

mindspore/lib/plugin/ascend/custom_ascendc_910b/version.info ADDED Viewed

	@@ -0,0 +1 @@
1	+ custom_opp_compiler_version=7.6.T8.0.B059

mindspore/lib/plugin/ascend/libhccl_plugin.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/libmindspore_internal_kernels.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/PkgInspect CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/bin/op_man CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_cann_host.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/host/libasdops_host.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/libasdops_static.a CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/asdops/lib/liblcal_static.a CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/include/acme/src/ops/host_src/flash_attention_score_op.h CHANGED Viewed

@@ -59,6 +59,7 @@ class FlashAttentionScoreOp : public MultiImplsOp {
   AcmeStatus UpdateShape(const ShapeInfoList &inputs_shape, const ShapeInfoList &outputs_shape) override;
   const std::string &TargetKernelName() const override { return target_kernel_name_; }
   ShapeInfoList InferShape(const ShapeInfoList &inputs_shape) const override;
+  AcmeStatus UpdateParam(const void *) override;
  protected:
   AcmeStatus InitImpl() override;
@@ -69,7 +70,7 @@ class FlashAttentionScoreOp : public MultiImplsOp {
   std::string DumpTilingAcme(const RawHostAddr host_ptr) const override;
   uint32_t GetLaunchCoreNumAcme() const override;
   AcmeStatus CreateAsdTensor();
-  AcmeStatus UpdateAsdTensor(ShapeInfoList *asd_input_shape, ShapeInfoList *asd_output_shape);
+  AcmeStatus UpdateAsdTensor();
   AcmeStatus UpdateAsdParam();
   AcmeStatus CheckAsdopSupport() const;
@@ -78,6 +79,10 @@ class FlashAttentionScoreOp : public MultiImplsOp {
   FlashAttentionScoreParam param_;
   InputsDescList asd_inputs_;
   OutputsDescList asd_outputs_;
+  InputsImmutableInfoList asd_inputs_ii_;
+  InputsImmutableInfoList asd_outputs_ii_;
+  ShapeInfoList asd_input_shape_;
+  ShapeInfoList asd_output_shape_;
   uint64_t tiling_key_{0};
   bool has_attn_mask_{false};
   bool has_alibi_mask_{false};

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libadd_rms_norm_quant_acme_impl.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/lib/libms_kernels_internal.so CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_bf16_bnsd_full_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_full_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bnsd_tri_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_full_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/flash_attention_score/flash_attention_score_fp16_bsh_tri_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bnsd_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_bf16_bsh_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/ascend/ms_kernels_internal/internal_kernel/op_kernels/ascend910b/paged_attention/paged_attention_fp16_bsh_mix.o CHANGED Viewed

Binary file

mindspore/lib/plugin/libmindspore_ascend.so.2 CHANGED Viewed

Binary file

mindspore/nn/__init__.py CHANGED Viewed

@@ -31,6 +31,7 @@ from mindspore.nn.wrap import *
 from mindspore.nn.grad import Jvp, Vjp
 from mindspore.nn.sparse import *
 from mindspore.nn.reinforcement import *
+from mindspore.nn.utils import *
 __all__ = ["Cell", "GraphCell"]
 __all__.extend(layer.__all__)
@@ -43,5 +44,6 @@ __all__.extend(sparse.__all__)
 __all__.extend(learning_rate_schedule.__all__)
 __all__.extend(dynamic_lr.__all__)
 __all__.extend(reinforcement.__all__)
+__all__.extend(utils.__all__)
 __all__.sort()

mindspore/nn/cell.py CHANGED Viewed

@@ -32,7 +32,8 @@ from mindspore import context
 from mindspore._c_expression import init_pipeline, update_func_graph_hyper_params, Cell_, FuncGraph, MixedPrecisionType
 from mindspore import _checkparam as Validator
 from mindspore.common import dtype as mstype
-from mindspore.common.api import _cell_graph_executor, _pynative_executor, _get_args_for_run, cells_compile_cache, _no_grad
+from mindspore.common.api import _cell_graph_executor, _pynative_executor, _get_args_for_run, cells_compile_cache, \
+    _no_grad
 from mindspore.common.api import _generate_branch_control_input, _convert_python_data, _get_args_for_run_predict
 from mindspore.common.api import _process_dyn_args, _generate_dyn_compile_args
 from mindspore.common.parameter import Parameter, ParameterTuple
@@ -45,6 +46,7 @@ from mindspore._check_jit_forbidden_api import jit_forbidden_register
 from mindspore.common._decorator import deprecated
 from mindspore.common._register_for_recompute import recompute_registry
 class Cell(Cell_):
     """
     The basic building block of neural networks in MindSpore. The model or neural network layer should inherit this
@@ -2582,7 +2584,7 @@ class Cell(Cell_):
         """
         if context.get_context("mode") == context.PYNATIVE_MODE:
             self._recompute_cell = recompute_registry.get()(self.construct)
-            self._recompute()
+            self._add_recompute_flag()
             return
         self._recompute()
         if 'mp_comm_recompute' in kwargs.keys():
@@ -2685,6 +2687,18 @@ class Cell(Cell_):
         if hasattr(network, "_amp_level"):
             self._amp_level = getattr(network, "_amp_level")
+    def _add_recompute_flag(self):
+        """
+        Set pynative cell recomputed.
+        """
+        if not self._has_config_recompute:
+            self._has_config_recompute = True
+        else:
+            logger.info("The recompute interface can be configured only once."
+                        " If the parent cell is configured, the child cell should not be configured")
+        for cell in self.cells():
+            cell._add_recompute_flag()
 class GraphCell(Cell):
     """

mindspore/nn/layer/conv.py CHANGED Viewed

@@ -862,6 +862,9 @@ class Conv3dTranspose(_Conv):
     However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network
     can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_.
+    Note:
+        For Atlas A2 training series products, `output_padding` is currently not supported.
     Args:
         in_channels (int): The channel number of the input tensor of the Conv3dTranspose layer.
         out_channels (int): The channel number of the output tensor of the Conv3dTranspose layer.

mindspore/nn/layer/pooling.py CHANGED Viewed

@@ -297,6 +297,9 @@ class MaxPool3d(_PoolNd):
         \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
         \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
+    .. note::
+        For Atlas training series products, this interface is not supported.
     Args:
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number or a single element tuple that represents depth, height and width of the kernel, or a tuple
@@ -1032,16 +1035,11 @@ class AvgPool2dExt(Cell):
         >>> import numpy as np
         >>> from mindspore import Tensor, nn
         >>> from mindspore import dtype as mstype
-        >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mstype.float32)
-        >>> m =  nn.AvgPool2dExt(x, kernel_size=2, stride=1)
-        >>> output = m(x)
-        >>> print(output)
-        [[[[ 2.5   3.5   4.5]
-           [ 6.5   7.5   8.5]]
-          [[14.5  15.5  16.5]
-           [18.5  19.5  20.5]]
-          [[26.5  27.5  28.5]
-           [30.5  31.5  32.5]]]]
+        >>> input = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mstype.float32)
+        >>> net = nn.AvgPool2dExt(kernel_size=2, stride=1)
+        >>> output = net(input)
+        >>> print(output.shape)
+        (1, 3, 2, 3)
     """
     def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
                  count_include_pad=True, divisor_override=None):

mindspore/nn/utils/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+nn.utils.
+"""
+from __future__ import absolute_import
+from .init import no_init_parameters
+__all__ = ["no_init_parameters"]

mindspore/nn/utils/init.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""init for nn.Cell."""
+from __future__ import absolute_import
+from contextlib import contextmanager
+from mindspore.common.parameter import Parameter
+@contextmanager
+def no_init_parameters():
+    r"""
+     In scenarios where a checkpoint is loaded, parameters within the network instantiation will be
+     instantiated and occupy physical memory. Loading a checkpoint will replace the parameter values.
+     Decorator can be applied during network instantiation to add an attribute `init_param` to all
+     parameters within the current Cell, setting it to `init_param=False` .
+     When `init_param=False` is detected, the initialization of the parameters is skipped,
+     and the parameters are assigned values directly from the checkpoint during loading,
+     which can optimize performance and reduce physical memory usage.
+     Note:
+         Initialization of parameters created with `initializer` can only be skipped.
+         Parameters created by `Tensor` or `numpy` cannot be skipped.
+     Examples:
+        >>> import mindspore as ms
+        >>> from mindspore import nn, ops, load_checkpoint
+        >>> from mindspore.common.initializer import initializer
+        >>> from mindspore.nn.utils import no_init_parameters
+        >>> # 1. Add a decorator to the network that requires delayed initialization
+        >>> class Net(nn.Cell):
+        ...     def __init__(self, in_channels, out_channels):
+        ...         super().__init__()
+        ...         self.weight = ms.Parameter(initializer("normal", [in_channels, out_channels], ms.float32))
+        ...         self.bias = ms.Parameter(initializer("normal", [out_channels], ms.float32))
+        ...         self.matmul = ops.MatMul()
+        ...         self.add = ops.Add()
+        ...
+        ...     def construct(self, x):
+        ...         x = self.matmul(x, self.weight)
+        ...         x = self.add(x, self.bias)
+        ...         return x
+        >>> with no_init_parameters():
+        ...     # After instantiation, all parameters in the net are not initialized
+        ...     net = Net(28*28, 64)
+        >>> # 2. Load checkpoint parameters to the net
+        >>> load_checkpoint('./checkpoint/test_net.ckpt', net=net)
+        >>> # 3. After loading the checkpoint, manually call init_parameters_data() to initialize
+        >>> #    the uninitialized parameters in the net if need. If the network is executed,
+        >>> #    the framework will automatically call this interface.
+        >>> net.init_parameters_data()
+    """
+    init_class = Parameter
+    setattr(init_class, "init_param", False)
+    try:
+        yield
+    finally:
+        setattr(init_class, "init_param", True)

mindspore/ops/_grad_experimental/grad_comm_ops.py CHANGED Viewed

@@ -16,7 +16,7 @@
 """Generate bprop for comm ops"""
 from __future__ import division
 from __future__ import absolute_import
-from mindspore import Tensor
+from mindspore import Tensor, Parameter
 import mindspore.common.dtype as mstype
 from mindspore.ops import functional as F
 from mindspore.communication import get_rank, get_group_size
@@ -37,6 +37,9 @@ from mindspore.ops._grad_experimental.grad_base import bprop_getters
 from mindspore.ops.operations import _grad_ops as G
 import mindspore as ms
+_device_local_norm = None
+if ms.get_auto_parallel_context("dump_device_local_norm"):
+    _device_local_norm = Parameter(Tensor(0.0, mstype.float32), name="_device_local_norm", requires_grad=False)
 @bprop_getters.register(AllReduce)
 def get_bprop_all_reduce(self):
@@ -247,10 +250,15 @@ def get_bprop_mirror_micro_step_operator(self):
     reduce_sum = P.ReduceSum(keep_dims=False)
     square = P.Square()
     dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
+    dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
     def bprop(x, z, out, dout):
-        if dump_local_norm:
-            z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
+        if dump_local_norm or dump_device_local_norm:
+            _norm = reduce_sum(square((z)))
+            if dump_local_norm:
+                z = F.depend(z, ln_print("dump local norm: ", param_name, _norm))
+            if dump_device_local_norm:
+                z = F.depend(z, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
         real_grad = z
         assign_out = dout
         if issubclass_(F.typeof(dout), mstype.tensor_type):
@@ -373,6 +381,7 @@ def get_bprop_micro_step_all_gather(self):
     reduce_sum = P.ReduceSum(keep_dims=False)
     square = P.Square()
     dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
+    dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
     def bprop(x, z, out, dout):
         if with_mirror_operator:
@@ -383,8 +392,12 @@ def get_bprop_micro_step_all_gather(self):
                 real_grad = F.tensor_mul(real_grad, scale)
             return (real_grad, cast(out_tensor, dtype(z)))
         z = F.depend(z, dout)
-        if dump_local_norm:
-            z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
+        if dump_local_norm or dump_device_local_norm:
+            _norm = reduce_sum(square((z)))
+            if dump_local_norm:
+                z = F.depend(z, ln_print("dump local norm: ", param_name, _norm))
+            if dump_device_local_norm:
+                z = F.depend(z, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
         if not do_mirror:
             return (z, cast(out_tensor, dtype(z)))
         real_grad = reduce_scatter(z)
@@ -586,6 +599,7 @@ def get_bprop_mirror_operator(self):
     dev_num_r = 1.0
     dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
+    dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
     if dev_num > 1:
         dev_num_r = 1.0 / dev_num
         all_reduce = AllReduce(group=group)
@@ -608,8 +622,12 @@ def get_bprop_mirror_operator(self):
             all_reduce.set_prim_instance_name(instance_name)
     def bprop(x, out, dout):
-        if dump_local_norm:
-            dout = F.depend(dout, ln_print("dump local norm: ", param_name, reduce_sum(square((dout)))))
+        if dump_local_norm or dump_device_local_norm:
+            _norm = reduce_sum(square((dout)))
+            if dump_local_norm:
+                dout = F.depend(dout, ln_print("dump local norm: ", param_name, _norm))
+            if dump_device_local_norm:
+                dout = F.depend(dout, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
         if dev_num == 1:
             return (dout,)

mindspore/ops/auto_generate/gen_ops_prim.py CHANGED Viewed

@@ -2387,7 +2387,8 @@ class BatchMatMul(Primitive):
     \text{output}[..., :, :] = \text{matrix}(x[..., :, :]) * \text{matrix}(y[..., :, :])
-    The rank of both two input tensors must be same and not less than `2`.
+    The rank of the two input tensors must be at least `2`, and the two input tensors must have the same rank
+    if the environment is GPU or CPU.
     Args:
         transpose_a (bool): If ``True`` , the last two dimensions of `x` is transposed before multiplication.
@@ -9488,7 +9489,7 @@ class MatMul(Primitive):
     .. math::
-    Output_{i j}=\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\cdots+a_{i p} b_{p j}, p\in N
+        (Output)_{i j}=\sum_{k=1}^{p} a_{i k} b_{k j}=a_{i 1} b_{1 j}+a_{i 2} b_{2 j}+\cdots+a_{i p} b_{p j}, p\in N
     where the :math:`i,j` indicates the output of the i-th row and j-th column element.

mindspore/ops/function/math_func.py CHANGED Viewed

@@ -9088,9 +9088,9 @@ def remainder(input, other):
     both dtypes cannot be bool, and the shapes of them could be broadcast. When the inputs are one tensor
     and one scalar, the scalar could only be a constant.
-    .. math::
+    .. code:: python
-        remainder(input, other) = input - input.div(other, rounding\_mode="floor") * other
+        remainder(input, other) == input - input.div(other, rounding_mode="floor") * other
     .. warning::
         - When the elements of input exceed 2048, there might be accuracy problems.
@@ -9135,9 +9135,10 @@ def remainder_ext(input, other):
     Supports broadcasting to a common shape and implicit type promotion.
-    .. math::
+    .. code:: python
+        remainder(input, other) == input - input.div(other, rounding_mode="floor") * other
-        remainder(input, other) = input - input.div(other, rounding\_mode="floor") * other
     Note:
         Complex inputs are not supported. At least one input need to be tensor, but not both are bool tensors.

mindspore/ops/operations/comm_ops.py CHANGED Viewed

@@ -988,6 +988,9 @@ class NeighborExchangeV2(Primitive):
         in the same subnet, please check the `details \
         <https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#notes>`_.
+        Users need to ensure that the length of the received data `recv_lens` is consistent with that of
+        the sent data `send_lens`.
     Args:
         send_rank_ids (list(int)): Ranks which the data is sent to. 8 rank_ids represents 8 directions, if one
                                    direction is not send to , set it -1.
@@ -1393,7 +1396,7 @@ class Send(PrimitiveWithInfer):
         >>>     def __init__(self):
         >>>         super(SendNet, self).__init__()
         >>>         self.depend = ops.Depend()
-        >>>         self.send = ops.Send(st_tag=0, dest_rank=8, group="hccl_world_group")
+        >>>         self.send = ops.Send(sr_tag=0, dest_rank=8, group="hccl_world_group")
         >>>
         >>>     def construct(self, x):
         >>>         out = self.depend(x, self.send(x))

mindspore/ops/operations/custom_ops.py CHANGED Viewed

@@ -251,11 +251,13 @@ class Custom(ops.PrimitiveWithInfer):
                  - "xxx.so" file generation:
-                   1) GPU Platform: Given user defined "xxx.cu" file (ex. "{path}/add.cu"), use nvcc command to compile
-                   it.(ex. "nvcc --shared -Xcompiler -fPIC -o add.so add.cu")
+                   1) GPU Platform: Given user defined "xxx.cu" file (ex. "{path}/add.cu"),
+                   use nvcc command to compile
+                   it.(ex. :code:`nvcc --shared -Xcompiler -fPIC -o add.so add.cu`)
-                   2) CPU Platform: Given user defined "xxx.cc" file (ex. "{path}/add.cc"), use g++/gcc command to
-                   compile it.(ex. "g++ --shared -fPIC  -o add.so add.cc")
+                   2) CPU Platform: Given user defined "xxx.cc" file (ex. "{path}/add.cc"),
+                   use g++/gcc command to
+                   compile it.(ex. :code:`g++ --shared -fPIC -o add.so add.cc`)
                  - Define a "xxx.cc"/"xxx.cu" file:

mindspore/ops/operations/nn_ops.py CHANGED Viewed

@@ -1430,6 +1430,9 @@ class MaxPool3D(Primitive):
         \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
         \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
+    .. note::
+        For Atlas training series products, this primitive is not supported.
     Args:
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number that represents depth, height and width of the kernel, or a tuple
@@ -4759,7 +4762,8 @@ class SparseApplyAdagradV2(Primitive):
         - **grad** (Tensor) - Gradients has the same shape as `var` and
           :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
         - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
-          The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`.
+          The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`. The value of indices
+          must be unique. Otherwise, the result is unpredictable.
     Outputs:
         Tuple of 2 tensors, the updated parameters.
@@ -7158,7 +7162,8 @@ class Conv3DTranspose(Primitive):
         \times (\text{kernel_size}[2] - 1) + \text{output_padding}[2] + 1
     Note:
-        In Ascend, only support :math:`group=1`.
+        - In Ascend, only support :math:`group=1`.
+        - For Atlas A2 training series products, `output_padding` is currently not supported.
     Args:
         in_channel (int): The channel of the input x.

mindspore/parallel/_auto_parallel_context.py CHANGED Viewed

@@ -187,6 +187,25 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_dump_local_norm()
+    def set_dump_device_local_norm(self, dump_device_local_norm):
+        """
+        Set dump device local norm for auto parallel.
+        Args:
+            dump_device_local_norm (bool): User need to specify if he want to dump device local norm.  Default: False
+        Raises:
+            ValueError: If the dump_device_local_norm in not a bool value.
+        """
+        self.check_context_handle()
+        self._context_handle.set_dump_device_local_norm(dump_device_local_norm)
+    def get_dump_device_local_norm(self):
+        """Get dump device local norm."""
+        self.check_context_handle()
+        return self._context_handle.get_dump_device_local_norm()
     def set_fusion_threshold_mb(self, fusion_threshold=64, comm_type="allreduce"):
         """
         Set fusion threshold (MB) for auto parallel.
@@ -1287,7 +1306,8 @@ _set_auto_parallel_context_func_map = {
     "enable_alltoall": auto_parallel_context().set_enable_alltoall,
     "strategy_ckpt_config": auto_parallel_context().set_strategy_ckpt_config,
     "comm_fusion": auto_parallel_context().set_comm_fusion,
-    "dump_local_norm": auto_parallel_context().set_dump_local_norm}
+    "dump_local_norm": auto_parallel_context().set_dump_local_norm,
+    "dump_device_local_norm": auto_parallel_context().set_dump_device_local_norm}
 _get_auto_parallel_context_func_map = {
     "device_num": auto_parallel_context().get_device_num,
@@ -1320,7 +1340,8 @@ _get_auto_parallel_context_func_map = {
     "comm_fusion": auto_parallel_context().get_comm_fusion,
     "strategy_ckpt_config": auto_parallel_context().get_strategy_ckpt_config,
     "full_batch_is_set": auto_parallel_context().get_full_batch_is_set,
-    "dump_local_norm": auto_parallel_context().get_dump_local_norm}
+    "dump_local_norm": auto_parallel_context().get_dump_local_norm,
+    "dump_device_local_norm": auto_parallel_context().get_dump_device_local_norm}
 @args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool,
@@ -1431,8 +1452,6 @@ def _set_auto_parallel_context(**kwargs):
                     - reducescatter: If communication fusion type is `reducescatter`. The `mode` contains: `auto`
                         and `size`. Config is same as `allgather`.
     Raises:
         ValueError: If input key is not attribute in auto parallel context.
     """

mindspore/parallel/_cell_wrapper.py CHANGED Viewed

@@ -24,7 +24,8 @@ from mindspore.ops import operations as P
 from mindspore.ops.operations.comm_ops import AllGather
 from mindspore.communication import GlobalComm
 from mindspore.common import jit
-from mindspore.communication import create_group
+from mindspore.communication import create_group, destroy_group
+from mindspore.communication._comm_helper import _get_group_map
 from mindspore.train._utils import get_parameter_redundancy, remove_param_redundancy
 _ALLGATHER_CELL = None
@@ -131,6 +132,21 @@ def _restore_parallel_context(origin_parallel_mode, origin_dataset_strategy):
             context.set_auto_parallel_context(dataset_strategy=origin_dataset_strategy)
+def _get_group_name(group_map, group):
+    """get group name"""
+    group_name = str(group)
+    is_manual_communication_group = True
+    if group_map:
+        for name, rank_list in group_map.items():
+            if list(group) == rank_list:
+                group_name = name
+                is_manual_communication_group = False
+                break
+    if is_manual_communication_group:
+        create_group(str(group), list(group))
+    return group_name, is_manual_communication_group
 def _single_parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
     """
     Broadcast single parameter to other rank in data parallel dimension.
@@ -158,8 +174,9 @@ def _single_parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
         return
     net_param_dict = net.parameters_dict()
     _chang_parallel_context(origin_dataset_strategy)
+    group_map = _get_group_map()
     for group, params in param_redundancy_reversed.items():
-        create_group(str(group), list(group))
+        group_name, is_manual_communication_group = _get_group_name(group_map, group)
         allreduce_input = []
         for param in params:
             if param not in net_param_dict:
@@ -170,7 +187,9 @@ def _single_parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
             allreduce_input.append(real_param)
         if not allreduce_input:
             continue
-        communicator = SingleCommunicator(str(group))
+        communicator = SingleCommunicator(group_name)
         for real_param in allreduce_input:
             real_param.set_data(communicator(real_param), real_param.sliced)
+        if is_manual_communication_group:
+            destroy_group(group_name)
     _restore_parallel_context(origin_parallel_mode, origin_dataset_strategy)

mindspore/parallel/_utils.py CHANGED Viewed

@@ -127,7 +127,6 @@ class ParallelParamInitProfCtx:
 def _slice_parameter(parameter, phase, layout):
     """Slice python parameter obj according to the layout."""
-    # graph_executor.updata_param_node_default_input(phase, {parameter.name: parameter})
     if getattr(parameter, "init_param", False):
         if layout is None:
             parameter.sliced = True

mindspore 2.4.1__cp310-none-any.whl → 2.4.10__cp310-none-any.whl

Potentially problematic release.

mindspore 2.4.1cp310-none-any.whl → 2.4.10cp310-none-any.whl