PyPI - mindspore - Versions diffs - 2.4.0__cp39-cp39-manylinux1_x86_64.whl → 2.4.10__cp39-cp39-manylinux1_x86_64.whl - Mend

mindspore 2.4.0__cp39-cp39-manylinux1_x86_64.whl → 2.4.10__cp39-cp39-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (294) hide show

mindspore/nn/cell.py CHANGED Viewed

@@ -32,7 +32,8 @@ from mindspore import context
 from mindspore._c_expression import init_pipeline, update_func_graph_hyper_params, Cell_, FuncGraph, MixedPrecisionType
 from mindspore import _checkparam as Validator
 from mindspore.common import dtype as mstype
-from mindspore.common.api import _cell_graph_executor, _pynative_executor, _get_args_for_run, cells_compile_cache, _no_grad
+from mindspore.common.api import _cell_graph_executor, _pynative_executor, _get_args_for_run, cells_compile_cache, \
+    _no_grad
 from mindspore.common.api import _generate_branch_control_input, _convert_python_data, _get_args_for_run_predict
 from mindspore.common.api import _process_dyn_args, _generate_dyn_compile_args
 from mindspore.common.parameter import Parameter, ParameterTuple
@@ -45,6 +46,7 @@ from mindspore._check_jit_forbidden_api import jit_forbidden_register
 from mindspore.common._decorator import deprecated
 from mindspore.common._register_for_recompute import recompute_registry
 class Cell(Cell_):
     """
     The basic building block of neural networks in MindSpore. The model or neural network layer should inherit this
@@ -1820,9 +1822,6 @@ class Cell(Cell_):
         if not hasattr(self, "_func_graph_flags"):
             self._func_graph_flags = {}
         self._func_graph_flags.update({**flags})
-        if context._get_mode() == context.PYNATIVE_MODE and self._func_graph_flags.get("output_no_recompute"):
-            raise TypeError("Recompute is not supported in PyNative mode currently, you can use "
-                            "'context.set_context(mode=context.GRAPH_MODE)' or @jit to set graph mode.")
         self.__dict__.update({**flags})
         self._add_mixed_precision_flag(**flags)
         return self
@@ -2585,6 +2584,7 @@ class Cell(Cell_):
         """
         if context.get_context("mode") == context.PYNATIVE_MODE:
             self._recompute_cell = recompute_registry.get()(self.construct)
+            self._add_recompute_flag()
             return
         self._recompute()
         if 'mp_comm_recompute' in kwargs.keys():
@@ -2687,6 +2687,18 @@ class Cell(Cell_):
         if hasattr(network, "_amp_level"):
             self._amp_level = getattr(network, "_amp_level")
+    def _add_recompute_flag(self):
+        """
+        Set pynative cell recomputed.
+        """
+        if not self._has_config_recompute:
+            self._has_config_recompute = True
+        else:
+            logger.info("The recompute interface can be configured only once."
+                        " If the parent cell is configured, the child cell should not be configured")
+        for cell in self.cells():
+            cell._add_recompute_flag()
 class GraphCell(Cell):
     """

mindspore/nn/layer/basic.py CHANGED Viewed

@@ -579,11 +579,15 @@ class Identity(Cell):
     r"""
     A placeholder identity operator that returns the same as input.
+    Args:
+        args (Any): Any argument.
+        kwargs (Any): Any keyword argument.
     Inputs:
-        - **x** (Any) - The input of Identity.
+        - **input** (Any) - The input of Identity.
     Outputs:
-        The same as `x`.
+        The same as `input`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -592,19 +596,19 @@ class Identity(Cell):
         >>> import mindspore
         >>> from mindspore import Tensor, nn
         >>> import numpy as np
-        >>> x = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
+        >>> input = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
         >>> net = nn.Identity()
-        >>> output = net(x)
+        >>> output = net(input)
         >>> print(output)
         [1 2 3 4]
     """
-    def __init__(self):
+    def __init__(self, *args, **kwargs):
         """Initialize Identity."""
         super(Identity, self).__init__()
-    def construct(self, x):
-        return x
+    def construct(self, input):
+        return input
 class Dense(Cell):
@@ -621,6 +625,9 @@ class Dense(Cell):
     data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
     with the same data type as the :math:`X` created by the layer (only if has_bias is True).
+    .. warning::
+        In PYNATIVE mode, if `bias` is ``False`` , the `x` cannot be greater than 6D.
     Args:
         in_channels (int): The number of channels in the input space.
         out_channels (int): The number of channels in the output space.
@@ -635,6 +642,8 @@ class Dense(Cell):
             layer. Both activation name, e.g. 'relu', and mindspore activation function, e.g. mindspore.ops.ReLU(),
             are supported. Default: ``None`` .
         dtype (:class:`mindspore.dtype`): Data type of Parameter. Default: ``mstype.float32`` .
+            When `weight_init` is Tensor, Parameter has the same data type as `weight_init` ,
+            in other cases, Parameter has the same data type as `dtype`, the same goes for `bias_init`.
     Inputs:
         - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`. The `in_channels` in `Args` should be equal
@@ -651,6 +660,7 @@ class Dense(Cell):
                     is not equal to `out_channels` or shape[1] of `weight_init` is not equal to `in_channels`.
         ValueError: If length of shape of `bias_init` is not equal to 1
                     or shape[0] of `bias_init` is not equal to `out_channels`.
+        RuntimeError: If `bias` is ``False`` and `x` is greater than 6D in PYNATIVE mode.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -752,6 +762,9 @@ class Linear(Cell):
     .. math::
         \text{outputs} = X * kernel + bias
+    .. warning::
+        In PYNATIVE mode, if `bias` is ``False`` , the `x` cannot be greater than 6D.
     where :math:`X` is the input tensors, :math:`\text{kernel}` is a weight matrix with the same
     data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
     with the same data type as the :math:`X` created by the layer (only if has_bias is True).
@@ -767,6 +780,9 @@ class Linear(Cell):
             same as `x`. The values of str refer to the function `initializer`. Default: ``None`` ,
             bias will be initialized using Uniform.
         dtype (:class:`mindspore.dtype`): Data type of Parameter. Default: ``None`` .
+            If `dtype` is ``None`` , `dtype` is set to ``mstype.float32`` when initializing the method.
+            When `weight_init` is Tensor, Parameter has the same data type as `weight_init` ,
+            in other cases, Parameter has the same data type as `dtype`, the same goes for `bias_init`.
     Inputs:
         - **x** (Tensor) - Tensor of shape :math:`(*, in\_features)`. The `in_features` in `Args` should be equal
@@ -782,6 +798,7 @@ class Linear(Cell):
                     is not equal to `out_features` or shape[1] of `weight_init` is not equal to `in_features`.
         ValueError: If length of shape of `bias_init` is not equal to 1
                     or shape[0] of `bias_init` is not equal to `out_features`.
+        RuntimeError: If `bias` is ``False`` and `x` is greater than 6D in PYNATIVE mode.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``

mindspore/nn/layer/conv.py CHANGED Viewed

@@ -862,6 +862,9 @@ class Conv3dTranspose(_Conv):
     However, when `stride` > 1, Conv2d maps multiple input shapes to the same output shape. Deconvolutional network
     can refer to `Deconvolutional Networks <https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf>`_.
+    Note:
+        For Atlas A2 training series products, `output_padding` is currently not supported.
     Args:
         in_channels (int): The channel number of the input tensor of the Conv3dTranspose layer.
         out_channels (int): The channel number of the output tensor of the Conv3dTranspose layer.

mindspore/nn/layer/embedding.py CHANGED Viewed

@@ -164,11 +164,11 @@ class Embedding(Cell):
 class EmbeddingExt(Cell):
     r"""
-    Embedding layer.
-    Retrieve the word embeddings in weight stored in the layer using indices specified in `input`.
+    The value in `input` is used as the index, and the corresponding embedding vector is queried from `weight` .
     .. warning::
-        On Ascend, the behavior is unpredictable when the value of `input` is invalid.
+        - This is an experimental API that is subject to change or deletion.
+        - On Ascend, the behavior is unpredictable when the value of `input` is invalid.
     Args:
         num_embeddings (int): Size of the dictionary of embeddings.
@@ -183,14 +183,22 @@ class EmbeddingExt(Cell):
         norm_type (float, optional): Indicated the value of p in p-norm. Default ``2.0``.
         scale_grad_by_freq (bool, optional): If ``True`` the gradients will be scaled by the inverse of frequency
             of the index in `input`. Default ``False``.
-        _weight (Tensor, optional): Used to initialize the weight of Embedding. If ``None``, the weight will be
+        sparse (bool, optional): If ``True``, gradient w.r.t. `weight` matrix will be a sparse tensor which
+            has not been supported. Default: ``False``.
+        _weight (Tensor, optional): Used to initialize the `weight` of Embedding. If ``None``, the weight will be
             initialized from normal distribution :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})`. Default ``None``.
-        dtype (mindspore.dtype, optional) : Dtype of Parameters. It is meaningless when `_weight` is not None.
-            Default: ``mindspore.float32``.
+        _freeze(bool, optional): If `weight` , the learnable weights of this module, should be freezed.
+            Default: ``False``.
+        dtype (mindspore.dtype, optional) : Dtype of Embedding's `weight` . It is meaningless when `_weight` is
+            not None. Default: ``None``.
+    Variables:
+        weight (Parameter): The learnable weights of this module of shape (num_embeddings, embedding_dim), which
+            initialized from :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})` or `_weight` .
     Inputs:
         - **input** (Tensor) - The indices used to lookup in the embedding vector. The data type must be
-          mindspore.int32 or mindspore.int64, and the value should be in range `[0, num_embeddings)`.
+          int32 or int64, and the value should be in range `[0, num_embeddings)`.
     Outputs:
         Tensor, has the same data type as weight, the shape is :math:`(*input.shape, embedding\_dim)`.
@@ -202,6 +210,7 @@ class EmbeddingExt(Cell):
         TypeError: If `max_norm` is not a float.
         TypeError: If `norm_type` is not a float.
         TypeError: If `scale_grad_by_freq` is not a bool.
+        ValueError: If `weight.shape` is invalid.
         TypeError: If `dtype` is not one of mindspore.dtype.
     Supported Platforms:
@@ -212,7 +221,7 @@ class EmbeddingExt(Cell):
         >>> import numpy as np
         >>> from mindspore import Tensor, nn
         >>> input = Tensor([[1, 0, 1, 1], [0, 0, 1, 0]])
-        >>> embedding = nn.mint.nn.Embedding(num_embeddings=10, embedding_dim=3)
+        >>> embedding = nn.EmbeddingExt(num_embeddings=10, embedding_dim=3)
         >>> output = embedding(input)
         >>> print(output)
         [[[-0.0024154  -0.01203444  0.00811537]
@@ -226,23 +235,30 @@ class EmbeddingExt(Cell):
     """
     def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0,
-                 scale_grad_by_freq=False, _weight=None, dtype=mstype.float32):
+                 scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, dtype=None):
         """Initialize Embedding."""
         super().__init__()
+        self.sparse = Validator.check_value_type('sparse', sparse, [bool], self.cls_name)
+        if self.sparse:
+            raise ValueError("For Embedding, the scenerio, where `sparse` is True, has not be supported.")
         self.num_embeddings = Validator.check_value_type(
             'num_embeddings', num_embeddings, [int], self.cls_name)
         self.embedding_dim = Validator.check_value_type(
             'embedding_dim', embedding_dim, [int], self.cls_name)
+        self.dtype = dtype if dtype is not None else mstype.float32
         Validator.check_subclass(
-            "dtype", dtype, mstype.number_type, self.cls_name)
-        self.dtype = dtype
+            "dtype", self.dtype, mstype.number_type, self.cls_name)
         self.padding_idx = padding_idx
         if _weight is None:
-            init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=dtype, init=Normal(1, 0))
+            init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=self.dtype, init=Normal(1, 0))
             init_tensor = self._zero_weight_by_index(init_tensor)
-            self.weight = Parameter(init_tensor, name='weight')
+            self.weight = Parameter(init_tensor, name='weight', requires_grad=not _freeze)
         else:
-            self.weight = Parameter(_weight)
+            if _weight.shape != (num_embeddings, embedding_dim):
+                raise ValueError(f"For Embedding, shape of weight should be match with num_embeddings "
+                                 f"and embedding_dim, but got weight.shape: {_weight.shape}, "
+                                 f"and (num_embeddings, embedding_dim): ({num_embeddings}, {embedding_dim})")
+            self.weight = Parameter(_weight, name='weight', requires_grad=not _freeze)
         self.max_norm = max_norm
         if max_norm is not None:
@@ -300,6 +316,7 @@ class EmbeddingLookup(Cell):
         specified 'axis = 0' to lookup table.
         In field slice mode, the manual_shapes must be given. It is a tuple ,where
         the element is vocab[i], vocab[i] is the row numbers for i-th part.
+        This module does not support the PyNative mode.
     Args:
         vocab_size (int): Size of the dictionary of embeddings.

mindspore/nn/layer/pooling.py CHANGED Viewed

@@ -297,6 +297,9 @@ class MaxPool3d(_PoolNd):
         \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
         \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
+    .. note::
+        For Atlas training series products, this interface is not supported.
     Args:
         kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
             is an int number or a single element tuple that represents depth, height and width of the kernel, or a tuple
@@ -1032,16 +1035,11 @@ class AvgPool2dExt(Cell):
         >>> import numpy as np
         >>> from mindspore import Tensor, nn
         >>> from mindspore import dtype as mstype
-        >>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mstype.float32)
-        >>> m =  nn.AvgPool2dExt(x, kernel_size=2, stride=1)
-        >>> output = m(x)
-        >>> print(output)
-        [[[[ 2.5   3.5   4.5]
-           [ 6.5   7.5   8.5]]
-          [[14.5  15.5  16.5]
-           [18.5  19.5  20.5]]
-          [[26.5  27.5  28.5]
-           [30.5  31.5  32.5]]]]
+        >>> input = Tensor(np.arange(1 * 3 * 3 * 4).reshape(1, 3, 3, 4), mstype.float32)
+        >>> net = nn.AvgPool2dExt(kernel_size=2, stride=1)
+        >>> output = net(input)
+        >>> print(output.shape)
+        (1, 3, 2, 3)
     """
     def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
                  count_include_pad=True, divisor_override=None):

mindspore/nn/optim/tft_wrapper.py CHANGED Viewed

@@ -20,6 +20,8 @@ from mindspore.common.tensor import Tensor
 from mindspore.nn.optim.optimizer import Optimizer
 from mindspore.ops.operations.manually_defined._inner import TensorReport
 from mindspore import ops, context
+from mindspore.common.parameter import Parameter
+import mindspore.common.dtype as mstype
 class OptTFTWrapper(Optimizer):
     r"""
@@ -61,9 +63,9 @@ class OptTFTWrapper(Optimizer):
     """
     def __init__(self, opt, **kwargs):
-        super(OptTFTWrapper, self).__init__(opt.learning_rate, opt._parameters) # pylint: disable=W0212
         if not isinstance(opt, Optimizer):
             raise TypeError(f"For 'OptTFTWrapper', the argument 'opt' must be Optimizer type, " f"but got {type(opt)}.")
+        super(OptTFTWrapper, self).__init__(opt.learning_rate, opt._parameters) # pylint: disable=W0212
         tft_env = os.getenv("MS_ENABLE_TFT", "")
         if ("TTP:1" not in tft_env) and ("UCE:1" not in tft_env):
             raise ValueError("MindIO TFT regitster need custom switch on[MS_ENABLE_TFT='{TTP:1,UCE:1}']!")
@@ -74,13 +76,9 @@ class OptTFTWrapper(Optimizer):
         self.opt = opt
         self.report = TensorReport()
         self.depend = ops.Depend()
-        self.g_one = Tensor([0.1])
-        # enable consistent check by default, only disable when enable_consistent_check is False
-        self.use_allreduce = kwargs.get("enable_consistent_check", True)
-        if self.use_allreduce:
-            self.allreduce_sum = ops.AllReduce()
-            self.allreduce_sum.add_prim_attr("tft_report_before", True)
+        self.allreduce_sum = ops.AllReduce()
+        self.allreduce_sum.add_prim_attr("tft_report_before", True)
+        self.tft_g_one_flag = Parameter(Tensor([1], dtype=mstype.int32))
         self.param_rank = opt.param_rank
         self.optim_filter = opt.optim_filter
@@ -118,10 +116,9 @@ class OptTFTWrapper(Optimizer):
         self.enable_tuple_broaden = opt.enable_tuple_broaden
     def construct(self, gradients):
-        g_one = self.depend(self.g_one, gradients)
-        if self.use_allreduce is True:
-            g_one_res = self.allreduce_sum(g_one)
-        else:
-            g_one_res = g_one
-        self.report("tft_report", g_one_res)
-        return self.opt(gradients)
+        tft_g_one_flag = self.depend(self.tft_g_one_flag, gradients)
+        self.tft_g_one_flag = self.allreduce_sum(tft_g_one_flag)
+        grads = self.depend(gradients, self.report("tft_report", self.tft_g_one_flag))
+        opt_ret = self.opt(grads)
+        return opt_ret

mindspore/nn/utils/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+nn.utils.
+"""
+from __future__ import absolute_import
+from .init import no_init_parameters
+__all__ = ["no_init_parameters"]

mindspore/nn/utils/init.py ADDED Viewed

@@ -0,0 +1,71 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""init for nn.Cell."""
+from __future__ import absolute_import
+from contextlib import contextmanager
+from mindspore.common.parameter import Parameter
+@contextmanager
+def no_init_parameters():
+    r"""
+     In scenarios where a checkpoint is loaded, parameters within the network instantiation will be
+     instantiated and occupy physical memory. Loading a checkpoint will replace the parameter values.
+     Decorator can be applied during network instantiation to add an attribute `init_param` to all
+     parameters within the current Cell, setting it to `init_param=False` .
+     When `init_param=False` is detected, the initialization of the parameters is skipped,
+     and the parameters are assigned values directly from the checkpoint during loading,
+     which can optimize performance and reduce physical memory usage.
+     Note:
+         Initialization of parameters created with `initializer` can only be skipped.
+         Parameters created by `Tensor` or `numpy` cannot be skipped.
+     Examples:
+        >>> import mindspore as ms
+        >>> from mindspore import nn, ops, load_checkpoint
+        >>> from mindspore.common.initializer import initializer
+        >>> from mindspore.nn.utils import no_init_parameters
+        >>> # 1. Add a decorator to the network that requires delayed initialization
+        >>> class Net(nn.Cell):
+        ...     def __init__(self, in_channels, out_channels):
+        ...         super().__init__()
+        ...         self.weight = ms.Parameter(initializer("normal", [in_channels, out_channels], ms.float32))
+        ...         self.bias = ms.Parameter(initializer("normal", [out_channels], ms.float32))
+        ...         self.matmul = ops.MatMul()
+        ...         self.add = ops.Add()
+        ...
+        ...     def construct(self, x):
+        ...         x = self.matmul(x, self.weight)
+        ...         x = self.add(x, self.bias)
+        ...         return x
+        >>> with no_init_parameters():
+        ...     # After instantiation, all parameters in the net are not initialized
+        ...     net = Net(28*28, 64)
+        >>> # 2. Load checkpoint parameters to the net
+        >>> load_checkpoint('./checkpoint/test_net.ckpt', net=net)
+        >>> # 3. After loading the checkpoint, manually call init_parameters_data() to initialize
+        >>> #    the uninitialized parameters in the net if need. If the network is executed,
+        >>> #    the framework will automatically call this interface.
+        >>> net.init_parameters_data()
+    """
+    init_class = Parameter
+    setattr(init_class, "init_param", False)
+    try:
+        yield
+    finally:
+        setattr(init_class, "init_param", True)

mindspore/ops/_grad_experimental/grad_array_ops.py CHANGED Viewed

@@ -38,7 +38,6 @@ from mindspore.ops.operations.array_ops import SegmentMean
 from mindspore.ops.operations.array_ops import AffineGrid
 from mindspore.ops.operations.array_ops import MaskedScatter
 from mindspore.ops.operations.array_ops import MaskedSelect
-from mindspore.ops.operations.array_ops import CountNonZero
 from mindspore.ops.operations.random_ops import LogNormalReverse
 from mindspore.ops.operations.random_ops import ParameterizedTruncatedNormal
 from mindspore.ops.operations import _inner_ops as inner
@@ -125,16 +124,6 @@ def get_bprop_masked_scatter(self):
     return bprop
-@bprop_getters.register(CountNonZero)
-def get_bprop_countnonzero(self):
-    """Grad definition for CountNonZero"""
-    def bprop(x, out, dout):
-        return (zeros_like(x),)
-    return bprop
 @bprop_getters.register(Mvlgamma)
 def get_bprop_mvlgamma(self):
     """Grad definition for Mvlgamma"""

mindspore/ops/_grad_experimental/grad_comm_ops.py CHANGED Viewed

@@ -16,7 +16,7 @@
 """Generate bprop for comm ops"""
 from __future__ import division
 from __future__ import absolute_import
-from mindspore import Tensor
+from mindspore import Tensor, Parameter
 import mindspore.common.dtype as mstype
 from mindspore.ops import functional as F
 from mindspore.communication import get_rank, get_group_size
@@ -31,11 +31,15 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
                                                _GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
                                                ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
                                                _VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
-                                               _MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter)
+                                               _MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter,
+                                               _VirtualAssignKvCache)
 from mindspore.ops._grad_experimental.grad_base import bprop_getters
 from mindspore.ops.operations import _grad_ops as G
 import mindspore as ms
+_device_local_norm = None
+if ms.get_auto_parallel_context("dump_device_local_norm"):
+    _device_local_norm = Parameter(Tensor(0.0, mstype.float32), name="_device_local_norm", requires_grad=False)
 @bprop_getters.register(AllReduce)
 def get_bprop_all_reduce(self):
@@ -179,6 +183,24 @@ def get_bprop_virtual_assign_add(self):
     return bprop
+@bprop_getters.register(_VirtualAssignKvCache)
+def get_bprop_virtual_assign_kv_cache(self):
+    """Generate bprop for VirtualAssignAdd."""
+    assign = P.Assign()
+    cast = P.Cast()
+    dtype = P.DType()
+    out_tensor = Tensor(0.0, mstype.float16)
+    def bprop(x, y, seq_chunk, out, dout):
+        dout_update = dout + y
+        kv_equal = F.equal(seq_chunk, 0)
+        update_kv = F.select(kv_equal, F.broadcast_to(cast(out_tensor, dtype(y)), F.shape(y)), dout_update)
+        return F.depend((dout_update, cast(out_tensor, dtype(y)),
+                         cast(out_tensor, dtype(seq_chunk))), assign(y, update_kv))
+    return bprop
 @bprop_getters.register(_VirtualAccuGrad)
 def get_bprop_virtual_accu_grad(self):
     """Generate bprop for VirtualAccuGrad."""
@@ -228,10 +250,15 @@ def get_bprop_mirror_micro_step_operator(self):
     reduce_sum = P.ReduceSum(keep_dims=False)
     square = P.Square()
     dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
+    dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
     def bprop(x, z, out, dout):
-        if dump_local_norm:
-            z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
+        if dump_local_norm or dump_device_local_norm:
+            _norm = reduce_sum(square((z)))
+            if dump_local_norm:
+                z = F.depend(z, ln_print("dump local norm: ", param_name, _norm))
+            if dump_device_local_norm:
+                z = F.depend(z, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
         real_grad = z
         assign_out = dout
         if issubclass_(F.typeof(dout), mstype.tensor_type):
@@ -354,6 +381,7 @@ def get_bprop_micro_step_all_gather(self):
     reduce_sum = P.ReduceSum(keep_dims=False)
     square = P.Square()
     dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
+    dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
     def bprop(x, z, out, dout):
         if with_mirror_operator:
@@ -364,8 +392,12 @@ def get_bprop_micro_step_all_gather(self):
                 real_grad = F.tensor_mul(real_grad, scale)
             return (real_grad, cast(out_tensor, dtype(z)))
         z = F.depend(z, dout)
-        if dump_local_norm:
-            z = F.depend(z, ln_print("dump local norm: ", param_name, reduce_sum(square((z)))))
+        if dump_local_norm or dump_device_local_norm:
+            _norm = reduce_sum(square((z)))
+            if dump_local_norm:
+                z = F.depend(z, ln_print("dump local norm: ", param_name, _norm))
+            if dump_device_local_norm:
+                z = F.depend(z, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
         if not do_mirror:
             return (z, cast(out_tensor, dtype(z)))
         real_grad = reduce_scatter(z)
@@ -567,6 +599,7 @@ def get_bprop_mirror_operator(self):
     dev_num_r = 1.0
     dump_local_norm = ms.get_auto_parallel_context("dump_local_norm")
+    dump_device_local_norm = ms.get_auto_parallel_context("dump_device_local_norm")
     if dev_num > 1:
         dev_num_r = 1.0 / dev_num
         all_reduce = AllReduce(group=group)
@@ -589,8 +622,12 @@ def get_bprop_mirror_operator(self):
             all_reduce.set_prim_instance_name(instance_name)
     def bprop(x, out, dout):
-        if dump_local_norm:
-            dout = F.depend(dout, ln_print("dump local norm: ", param_name, reduce_sum(square((dout)))))
+        if dump_local_norm or dump_device_local_norm:
+            _norm = reduce_sum(square((dout)))
+            if dump_local_norm:
+                dout = F.depend(dout, ln_print("dump local norm: ", param_name, _norm))
+            if dump_device_local_norm:
+                dout = F.depend(dout, F.assign_add(_device_local_norm, cast(_norm, _device_local_norm.dtype)))
         if dev_num == 1:
             return (dout,)

mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py CHANGED Viewed

@@ -65,6 +65,7 @@ op_args_default_value = {
     "ConvolutionGrad": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1, "output_mask": ()},
     "Convolution": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1},
     "Correlate": {"mode": 'valid'},
+    "CountNonZero": {"dim": None},
     "Cross": {"dim": -65530},
     "CumProd": {"exclusive": False, "reverse": False},
     "CumSum": {"exclusive": False, "reverse": False},
@@ -185,6 +186,11 @@ op_args_default_value = {
     "Qr": {"full_matrices": False},
     "RandExt": {"dtype": None},
     "RandLikeExt": {"dtype": None},
+    "RandIntLike": {"dtype": None},
+    "RandInt": {"dtype": None},
+    "RandnLike": {"dtype": None},
+    "Randn": {"dtype": None},
+    "RandpermExt": {"dtype": mstype.int64},
     "RandpermV2": {"seed": 0, "offset": 0, "dtype": mstype.int64},
     "Range": {"maxlen": 1000000},
     "ReduceAll": {"axis": None, "keep_dims": False},

mindspore/ops/auto_generate/gen_extend_func.py CHANGED Viewed

@@ -1350,6 +1350,39 @@ def prod(input, axis=None, keep_dims=False, dtype=None):
     return prod_impl(input, axis, keep_dims, dtype)
+def select(input, dim, index):
+    r"""
+    Slices the input tensor along the selected dimension at the given index.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): the input tensor.
+        dim (int): the dimension to slice.
+        index (int): the index to select with.
+    Returns:
+        Tensor.
+    Raises:
+        TypeError: If input is not a Tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, mint
+        >>> input = Tensor([[2, 3, 4, 5],[3, 2, 4, 5]])
+        >>> y = mint.select(input, 0, 0)
+        >>> y = Tensor([1,2], mindspore.float32)
+        >>> print(y)
+        [2 3 4 5]
+    """
+    return select_impl(input, dim, index)
 def selu(input):
     r"""
     Activation function SELU (Scaled exponential Linear Unit).