PyPI - mindspore - Versions diffs - 2.4.0__cp310-cp310-manylinux1_x86_64.whl → 2.4.1__cp310-cp310-manylinux1_x86_64.whl - Mend

mindspore 2.4.0__cp310-cp310-manylinux1_x86_64.whl → 2.4.1__cp310-cp310-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (100) hide show

mindspore/mint/optim/adamw.py CHANGED Viewed

@@ -22,32 +22,25 @@ from mindspore.common import dtype as mstype
 from mindspore.ops import auto_generate as gen
 from mindspore.experimental.optim.optimizer import Optimizer
 from mindspore import _checkparam as validator
-from mindspore import mint
-_optim_adamw_opt = C.MultitypeFuncGraph("optim_adamw_opt")
 hyper_map = C.HyperMap()
-@_optim_adamw_opt.register("Function", "Float", "Float", "Float", "Float", "Float", "Tensor", "Bool", "Bool", "Tensor",
-                           "Tensor", "Tensor", "Tensor", "Tensor")
 def _run_optim_adamw_amsgrad_opt(opt, beta1, beta2, lr, eps, weight_decay, step, amsgrad, maximize, parameters, grads,
                                  exp_avg, exp_avg_sq, max_exp_avg_sq):
     """Apply adamw optimizer to the weight parameter."""
     success = True
-    opt(parameters, exp_avg, exp_avg_sq, max_exp_avg_sq, P.Cast()(grads, F.dtype(parameters)), step, lr, beta1, beta2,
-        weight_decay, eps, amsgrad, maximize)
+    opt(parameters, exp_avg, exp_avg_sq, max_exp_avg_sq, grads, step, lr, beta1, beta2, weight_decay, eps, amsgrad,
+        maximize)
     return success
-@_optim_adamw_opt.register("Function", "Float", "Float", "Float", "Float", "Float", "Tensor", "Bool", "Bool", "Tensor",
-                           "Tensor", "Tensor", "Tensor")
 def _run_optim_adamw_opt(opt, beta1, beta2, lr, eps, weight_decay, step, amsgrad, maximize, parameters, grads, exp_avg,
                          exp_avg_sq):
     """Apply adamw optimizer to the weight parameter."""
     success = True
-    max_exp_avg_sq = mint.zeros_like(exp_avg)
-    opt(parameters, exp_avg, exp_avg_sq, max_exp_avg_sq, P.Cast()(grads, F.dtype(parameters)), step, lr, beta1, beta2,
-        weight_decay, eps, amsgrad, maximize)
+    opt(parameters, exp_avg, exp_avg_sq, exp_avg_sq, grads, step, lr, beta1, beta2, weight_decay, eps, amsgrad,
+        maximize)
     return success
@@ -177,7 +170,6 @@ class AdamW(Optimizer):
         self.state_step = Parameter(Tensor([-1], mstype.float32), "state_step")
         self.increase_tensor = Tensor(1, mstype.float32)
         self.assignadd = P.AssignAdd()
-        self.op_cast = P.Cast()
         self.adamw_opt = gen.AdamW()
     def construct(self, gradients):
@@ -191,13 +183,13 @@ class AdamW(Optimizer):
             grads = tuple(gradients[start_id: end_id])
             if group.get("amsgrad"):
-                self.hyper_map(F.partial(_optim_adamw_opt, self.adamw_opt, beta1, beta2, float(lr),
+                self.hyper_map(F.partial(_run_optim_adamw_amsgrad_opt, self.adamw_opt, beta1, beta2, float(lr),
                                          group.get("eps"), group.get("weight_decay"), self.state_step,
                                          group.get("amsgrad"), maximize),
                                self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
                                self.exp_avg_sq[start_id: end_id], group.get("max_exp_avg_sq"))
             else:
-                self.hyper_map(F.partial(_optim_adamw_opt, self.adamw_opt, beta1, beta2, float(lr),
+                self.hyper_map(F.partial(_run_optim_adamw_opt, self.adamw_opt, beta1, beta2, float(lr),
                                          group.get("eps"), group.get("weight_decay"), self.state_step,
                                          group.get("amsgrad"), maximize),
                                self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],

mindspore/nn/cell.py CHANGED Viewed

@@ -1820,9 +1820,6 @@ class Cell(Cell_):
         if not hasattr(self, "_func_graph_flags"):
             self._func_graph_flags = {}
         self._func_graph_flags.update({**flags})
-        if context._get_mode() == context.PYNATIVE_MODE and self._func_graph_flags.get("output_no_recompute"):
-            raise TypeError("Recompute is not supported in PyNative mode currently, you can use "
-                            "'context.set_context(mode=context.GRAPH_MODE)' or @jit to set graph mode.")
         self.__dict__.update({**flags})
         self._add_mixed_precision_flag(**flags)
         return self
@@ -2585,6 +2582,7 @@ class Cell(Cell_):
         """
         if context.get_context("mode") == context.PYNATIVE_MODE:
             self._recompute_cell = recompute_registry.get()(self.construct)
+            self._recompute()
             return
         self._recompute()
         if 'mp_comm_recompute' in kwargs.keys():

mindspore/nn/layer/basic.py CHANGED Viewed

@@ -579,11 +579,15 @@ class Identity(Cell):
     r"""
     A placeholder identity operator that returns the same as input.
+    Args:
+        args (Any): Any argument.
+        kwargs (Any): Any keyword argument.
     Inputs:
-        - **x** (Any) - The input of Identity.
+        - **input** (Any) - The input of Identity.
     Outputs:
-        The same as `x`.
+        The same as `input`.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -592,19 +596,19 @@ class Identity(Cell):
         >>> import mindspore
         >>> from mindspore import Tensor, nn
         >>> import numpy as np
-        >>> x = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
+        >>> input = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
         >>> net = nn.Identity()
-        >>> output = net(x)
+        >>> output = net(input)
         >>> print(output)
         [1 2 3 4]
     """
-    def __init__(self):
+    def __init__(self, *args, **kwargs):
         """Initialize Identity."""
         super(Identity, self).__init__()
-    def construct(self, x):
-        return x
+    def construct(self, input):
+        return input
 class Dense(Cell):
@@ -621,6 +625,9 @@ class Dense(Cell):
     data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
     with the same data type as the :math:`X` created by the layer (only if has_bias is True).
+    .. warning::
+        In PYNATIVE mode, if `bias` is ``False`` , the `x` cannot be greater than 6D.
     Args:
         in_channels (int): The number of channels in the input space.
         out_channels (int): The number of channels in the output space.
@@ -635,6 +642,8 @@ class Dense(Cell):
             layer. Both activation name, e.g. 'relu', and mindspore activation function, e.g. mindspore.ops.ReLU(),
             are supported. Default: ``None`` .
         dtype (:class:`mindspore.dtype`): Data type of Parameter. Default: ``mstype.float32`` .
+            When `weight_init` is Tensor, Parameter has the same data type as `weight_init` ,
+            in other cases, Parameter has the same data type as `dtype`, the same goes for `bias_init`.
     Inputs:
         - **x** (Tensor) - Tensor of shape :math:`(*, in\_channels)`. The `in_channels` in `Args` should be equal
@@ -651,6 +660,7 @@ class Dense(Cell):
                     is not equal to `out_channels` or shape[1] of `weight_init` is not equal to `in_channels`.
         ValueError: If length of shape of `bias_init` is not equal to 1
                     or shape[0] of `bias_init` is not equal to `out_channels`.
+        RuntimeError: If `bias` is ``False`` and `x` is greater than 6D in PYNATIVE mode.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -752,6 +762,9 @@ class Linear(Cell):
     .. math::
         \text{outputs} = X * kernel + bias
+    .. warning::
+        In PYNATIVE mode, if `bias` is ``False`` , the `x` cannot be greater than 6D.
     where :math:`X` is the input tensors, :math:`\text{kernel}` is a weight matrix with the same
     data type as the :math:`X` created by the layer, and :math:`\text{bias}` is a bias vector
     with the same data type as the :math:`X` created by the layer (only if has_bias is True).
@@ -767,6 +780,9 @@ class Linear(Cell):
             same as `x`. The values of str refer to the function `initializer`. Default: ``None`` ,
             bias will be initialized using Uniform.
         dtype (:class:`mindspore.dtype`): Data type of Parameter. Default: ``None`` .
+            If `dtype` is ``None`` , `dtype` is set to ``mstype.float32`` when initializing the method.
+            When `weight_init` is Tensor, Parameter has the same data type as `weight_init` ,
+            in other cases, Parameter has the same data type as `dtype`, the same goes for `bias_init`.
     Inputs:
         - **x** (Tensor) - Tensor of shape :math:`(*, in\_features)`. The `in_features` in `Args` should be equal
@@ -782,6 +798,7 @@ class Linear(Cell):
                     is not equal to `out_features` or shape[1] of `weight_init` is not equal to `in_features`.
         ValueError: If length of shape of `bias_init` is not equal to 1
                     or shape[0] of `bias_init` is not equal to `out_features`.
+        RuntimeError: If `bias` is ``False`` and `x` is greater than 6D in PYNATIVE mode.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``

mindspore/nn/layer/embedding.py CHANGED Viewed

@@ -164,11 +164,11 @@ class Embedding(Cell):
 class EmbeddingExt(Cell):
     r"""
-    Embedding layer.
-    Retrieve the word embeddings in weight stored in the layer using indices specified in `input`.
+    The value in `input` is used as the index, and the corresponding embedding vector is queried from `weight` .
     .. warning::
-        On Ascend, the behavior is unpredictable when the value of `input` is invalid.
+        - This is an experimental API that is subject to change or deletion.
+        - On Ascend, the behavior is unpredictable when the value of `input` is invalid.
     Args:
         num_embeddings (int): Size of the dictionary of embeddings.
@@ -183,14 +183,22 @@ class EmbeddingExt(Cell):
         norm_type (float, optional): Indicated the value of p in p-norm. Default ``2.0``.
         scale_grad_by_freq (bool, optional): If ``True`` the gradients will be scaled by the inverse of frequency
             of the index in `input`. Default ``False``.
-        _weight (Tensor, optional): Used to initialize the weight of Embedding. If ``None``, the weight will be
+        sparse (bool, optional): If ``True``, gradient w.r.t. `weight` matrix will be a sparse tensor which
+            has not been supported. Default: ``False``.
+        _weight (Tensor, optional): Used to initialize the `weight` of Embedding. If ``None``, the weight will be
             initialized from normal distribution :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})`. Default ``None``.
-        dtype (mindspore.dtype, optional) : Dtype of Parameters. It is meaningless when `_weight` is not None.
-            Default: ``mindspore.float32``.
+        _freeze(bool, optional): If `weight` , the learnable weights of this module, should be freezed.
+            Default: ``False``.
+        dtype (mindspore.dtype, optional) : Dtype of Embedding's `weight` . It is meaningless when `_weight` is
+            not None. Default: ``None``.
+    Variables:
+        weight (Parameter): The learnable weights of this module of shape (num_embeddings, embedding_dim), which
+            initialized from :math:`{N}(\text{sigma=1.0}, \text{mean=0.0})` or `_weight` .
     Inputs:
         - **input** (Tensor) - The indices used to lookup in the embedding vector. The data type must be
-          mindspore.int32 or mindspore.int64, and the value should be in range `[0, num_embeddings)`.
+          int32 or int64, and the value should be in range `[0, num_embeddings)`.
     Outputs:
         Tensor, has the same data type as weight, the shape is :math:`(*input.shape, embedding\_dim)`.
@@ -202,6 +210,7 @@ class EmbeddingExt(Cell):
         TypeError: If `max_norm` is not a float.
         TypeError: If `norm_type` is not a float.
         TypeError: If `scale_grad_by_freq` is not a bool.
+        ValueError: If `weight.shape` is invalid.
         TypeError: If `dtype` is not one of mindspore.dtype.
     Supported Platforms:
@@ -212,7 +221,7 @@ class EmbeddingExt(Cell):
         >>> import numpy as np
         >>> from mindspore import Tensor, nn
         >>> input = Tensor([[1, 0, 1, 1], [0, 0, 1, 0]])
-        >>> embedding = nn.mint.nn.Embedding(num_embeddings=10, embedding_dim=3)
+        >>> embedding = nn.EmbeddingExt(num_embeddings=10, embedding_dim=3)
         >>> output = embedding(input)
         >>> print(output)
         [[[-0.0024154  -0.01203444  0.00811537]
@@ -226,23 +235,30 @@ class EmbeddingExt(Cell):
     """
     def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0,
-                 scale_grad_by_freq=False, _weight=None, dtype=mstype.float32):
+                 scale_grad_by_freq=False, sparse=False, _weight=None, _freeze=False, dtype=None):
         """Initialize Embedding."""
         super().__init__()
+        self.sparse = Validator.check_value_type('sparse', sparse, [bool], self.cls_name)
+        if self.sparse:
+            raise ValueError("For Embedding, the scenerio, where `sparse` is True, has not be supported.")
         self.num_embeddings = Validator.check_value_type(
             'num_embeddings', num_embeddings, [int], self.cls_name)
         self.embedding_dim = Validator.check_value_type(
             'embedding_dim', embedding_dim, [int], self.cls_name)
+        self.dtype = dtype if dtype is not None else mstype.float32
         Validator.check_subclass(
-            "dtype", dtype, mstype.number_type, self.cls_name)
-        self.dtype = dtype
+            "dtype", self.dtype, mstype.number_type, self.cls_name)
         self.padding_idx = padding_idx
         if _weight is None:
-            init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=dtype, init=Normal(1, 0))
+            init_tensor = Tensor(shape=[num_embeddings, embedding_dim], dtype=self.dtype, init=Normal(1, 0))
             init_tensor = self._zero_weight_by_index(init_tensor)
-            self.weight = Parameter(init_tensor, name='weight')
+            self.weight = Parameter(init_tensor, name='weight', requires_grad=not _freeze)
         else:
-            self.weight = Parameter(_weight)
+            if _weight.shape != (num_embeddings, embedding_dim):
+                raise ValueError(f"For Embedding, shape of weight should be match with num_embeddings "
+                                 f"and embedding_dim, but got weight.shape: {_weight.shape}, "
+                                 f"and (num_embeddings, embedding_dim): ({num_embeddings}, {embedding_dim})")
+            self.weight = Parameter(_weight, name='weight', requires_grad=not _freeze)
         self.max_norm = max_norm
         if max_norm is not None:
@@ -300,6 +316,7 @@ class EmbeddingLookup(Cell):
         specified 'axis = 0' to lookup table.
         In field slice mode, the manual_shapes must be given. It is a tuple ,where
         the element is vocab[i], vocab[i] is the row numbers for i-th part.
+        This module does not support the PyNative mode.
     Args:
         vocab_size (int): Size of the dictionary of embeddings.

mindspore/nn/optim/tft_wrapper.py CHANGED Viewed

@@ -20,6 +20,8 @@ from mindspore.common.tensor import Tensor
 from mindspore.nn.optim.optimizer import Optimizer
 from mindspore.ops.operations.manually_defined._inner import TensorReport
 from mindspore import ops, context
+from mindspore.common.parameter import Parameter
+import mindspore.common.dtype as mstype
 class OptTFTWrapper(Optimizer):
     r"""
@@ -61,9 +63,9 @@ class OptTFTWrapper(Optimizer):
     """
     def __init__(self, opt, **kwargs):
-        super(OptTFTWrapper, self).__init__(opt.learning_rate, opt._parameters) # pylint: disable=W0212
         if not isinstance(opt, Optimizer):
             raise TypeError(f"For 'OptTFTWrapper', the argument 'opt' must be Optimizer type, " f"but got {type(opt)}.")
+        super(OptTFTWrapper, self).__init__(opt.learning_rate, opt._parameters) # pylint: disable=W0212
         tft_env = os.getenv("MS_ENABLE_TFT", "")
         if ("TTP:1" not in tft_env) and ("UCE:1" not in tft_env):
             raise ValueError("MindIO TFT regitster need custom switch on[MS_ENABLE_TFT='{TTP:1,UCE:1}']!")
@@ -74,13 +76,9 @@ class OptTFTWrapper(Optimizer):
         self.opt = opt
         self.report = TensorReport()
         self.depend = ops.Depend()
-        self.g_one = Tensor([0.1])
-        # enable consistent check by default, only disable when enable_consistent_check is False
-        self.use_allreduce = kwargs.get("enable_consistent_check", True)
-        if self.use_allreduce:
-            self.allreduce_sum = ops.AllReduce()
-            self.allreduce_sum.add_prim_attr("tft_report_before", True)
+        self.allreduce_sum = ops.AllReduce()
+        self.allreduce_sum.add_prim_attr("tft_report_before", True)
+        self.tft_g_one_flag = Parameter(Tensor([1], dtype=mstype.int32))
         self.param_rank = opt.param_rank
         self.optim_filter = opt.optim_filter
@@ -118,10 +116,9 @@ class OptTFTWrapper(Optimizer):
         self.enable_tuple_broaden = opt.enable_tuple_broaden
     def construct(self, gradients):
-        g_one = self.depend(self.g_one, gradients)
-        if self.use_allreduce is True:
-            g_one_res = self.allreduce_sum(g_one)
-        else:
-            g_one_res = g_one
-        self.report("tft_report", g_one_res)
-        return self.opt(gradients)
+        tft_g_one_flag = self.depend(self.tft_g_one_flag, gradients)
+        self.tft_g_one_flag = self.allreduce_sum(tft_g_one_flag)
+        grads = self.depend(gradients, self.report("tft_report", self.tft_g_one_flag))
+        opt_ret = self.opt(grads)
+        return opt_ret

mindspore/ops/_grad_experimental/grad_array_ops.py CHANGED Viewed

@@ -38,7 +38,6 @@ from mindspore.ops.operations.array_ops import SegmentMean
 from mindspore.ops.operations.array_ops import AffineGrid
 from mindspore.ops.operations.array_ops import MaskedScatter
 from mindspore.ops.operations.array_ops import MaskedSelect
-from mindspore.ops.operations.array_ops import CountNonZero
 from mindspore.ops.operations.random_ops import LogNormalReverse
 from mindspore.ops.operations.random_ops import ParameterizedTruncatedNormal
 from mindspore.ops.operations import _inner_ops as inner
@@ -125,16 +124,6 @@ def get_bprop_masked_scatter(self):
     return bprop
-@bprop_getters.register(CountNonZero)
-def get_bprop_countnonzero(self):
-    """Grad definition for CountNonZero"""
-    def bprop(x, out, dout):
-        return (zeros_like(x),)
-    return bprop
 @bprop_getters.register(Mvlgamma)
 def get_bprop_mvlgamma(self):
     """Grad definition for Mvlgamma"""

mindspore/ops/_grad_experimental/grad_comm_ops.py CHANGED Viewed

@@ -31,7 +31,8 @@ from mindspore.ops.operations.comm_ops import (AllGather, _MiniStepAllGather, _H
                                                _GetTensorSlice, _MirrorOperator, _MirrorMiniStepOperator, ReduceOp,
                                                ReduceScatter, _HostReduceScatter, _VirtualDiv, _VirtualAdd, _AllSwap,
                                                _VirtualAssignAdd, _VirtualAccuGrad, _MirrorMicroStepOperator,
-                                               _MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter)
+                                               _MicroStepAllGather, Reduce, CollectiveGather, CollectiveScatter,
+                                               _VirtualAssignKvCache)
 from mindspore.ops._grad_experimental.grad_base import bprop_getters
 from mindspore.ops.operations import _grad_ops as G
 import mindspore as ms
@@ -179,6 +180,24 @@ def get_bprop_virtual_assign_add(self):
     return bprop
+@bprop_getters.register(_VirtualAssignKvCache)
+def get_bprop_virtual_assign_kv_cache(self):
+    """Generate bprop for VirtualAssignAdd."""
+    assign = P.Assign()
+    cast = P.Cast()
+    dtype = P.DType()
+    out_tensor = Tensor(0.0, mstype.float16)
+    def bprop(x, y, seq_chunk, out, dout):
+        dout_update = dout + y
+        kv_equal = F.equal(seq_chunk, 0)
+        update_kv = F.select(kv_equal, F.broadcast_to(cast(out_tensor, dtype(y)), F.shape(y)), dout_update)
+        return F.depend((dout_update, cast(out_tensor, dtype(y)),
+                         cast(out_tensor, dtype(seq_chunk))), assign(y, update_kv))
+    return bprop
 @bprop_getters.register(_VirtualAccuGrad)
 def get_bprop_virtual_accu_grad(self):
     """Generate bprop for VirtualAccuGrad."""

mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py CHANGED Viewed

@@ -65,6 +65,7 @@ op_args_default_value = {
     "ConvolutionGrad": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1, "output_mask": ()},
     "Convolution": {"bias": None, "stride": 1, "padding": 0, "dilation": 1, "transposed": False, "output_padding": 0, "groups": 1},
     "Correlate": {"mode": 'valid'},
+    "CountNonZero": {"dim": None},
     "Cross": {"dim": -65530},
     "CumProd": {"exclusive": False, "reverse": False},
     "CumSum": {"exclusive": False, "reverse": False},
@@ -185,6 +186,11 @@ op_args_default_value = {
     "Qr": {"full_matrices": False},
     "RandExt": {"dtype": None},
     "RandLikeExt": {"dtype": None},
+    "RandIntLike": {"dtype": None},
+    "RandInt": {"dtype": None},
+    "RandnLike": {"dtype": None},
+    "Randn": {"dtype": None},
+    "RandpermExt": {"dtype": mstype.int64},
     "RandpermV2": {"seed": 0, "offset": 0, "dtype": mstype.int64},
     "Range": {"maxlen": 1000000},
     "ReduceAll": {"axis": None, "keep_dims": False},

mindspore/ops/auto_generate/gen_extend_func.py CHANGED Viewed

@@ -1350,6 +1350,39 @@ def prod(input, axis=None, keep_dims=False, dtype=None):
     return prod_impl(input, axis, keep_dims, dtype)
+def select(input, dim, index):
+    r"""
+    Slices the input tensor along the selected dimension at the given index.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): the input tensor.
+        dim (int): the dimension to slice.
+        index (int): the index to select with.
+    Returns:
+        Tensor.
+    Raises:
+        TypeError: If input is not a Tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore
+        >>> from mindspore import Tensor, mint
+        >>> input = Tensor([[2, 3, 4, 5],[3, 2, 4, 5]])
+        >>> y = mint.select(input, 0, 0)
+        >>> y = Tensor([1,2], mindspore.float32)
+        >>> print(y)
+        [2 3 4 5]
+    """
+    return select_impl(input, dim, index)
 def selu(input):
     r"""
     Activation function SELU (Scaled exponential Linear Unit).

mindspore/ops/auto_generate/gen_ops_def.py CHANGED Viewed

@@ -1655,6 +1655,54 @@ def cosh(input):
     return cosh_op(input)
+def count_nonzero(input, dim=None):
+    r"""
+    Counts the number of non-zero values in the tensor input along the given dim. If no dim is specified then all non-zeros in the tensor are counted.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        input (Tensor): Input data is used to count non-zero numbers. With shape
+            :math:`(*)` where :math:`*` means, any number of additional dimensions.
+        dim (Union[int, tuple(int), list(int)], optional): The dimension to reduce. Default value: ``None``, which indicates that the number of non-zero elements is calculated. If `dim` is ``None``, all elements in the tensor are summed up.
+    Returns:
+        Tensor, number of nonzero element across dim specified by `dim`.
+    Raises:
+        TypeError: If `input` is not tensor.
+        TypeError: If `dim` is not int, tuple(int), list(int) or None.
+        ValueError: If any value in `dim` is not in range [-x.ndim, x.ndim).
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore import Tensor, ops
+        >>> import numpy as np
+        >>> import mindspore
+        >>> # case 1: each value specified.
+        >>> x = Tensor(np.array([[0, 1, 0], [1, 1, 0]]).astype(np.float32))
+        >>> nonzero_num = ops.count_nonzero(input=x, dim=[0, 1])
+        >>> print(nonzero_num)
+        [[3]]
+        >>> # case 2: all value is default.
+        >>> nonzero_num = ops.count_nonzero(input=x)
+        >>> print(nonzero_num)
+        3
+        >>> # case 3: dim value was specified 0.
+        >>> nonzero_num = ops.count_nonzero(input=x, dim=[0,])
+        >>> print(nonzero_num)
+        [1 2 0]
+        >>> # case 4: dim value was specified 1.
+        >>> nonzero_num = ops.count_nonzero(input=x, dim=[1,])
+        >>> print(nonzero_num)
+        [1 2]
+    """
+    return count_nonzero_op(input, dim)
 def cummax(input, axis):
     r"""
     Returns a tuple (values,indices) where 'values' is the cumulative maximum value of input Tensor `input`
@@ -1860,7 +1908,8 @@ def dense(input, weight, bias=None):
         output = input * weight^{T} + bias
     .. warning::
-        This is an experimental API that is subject to change or deletion.
+        - This is an experimental API that is subject to change or deletion.
+        - In PYNATIVE mode, if `bias` is not 1D, the `input` cannot be greater than 6D.
     Args:
         input (Tensor): Input Tensor of shape :math:`(*, in\_channels)`,
@@ -1877,6 +1926,7 @@ def dense(input, weight, bias=None):
         TypeError: If `input` is not Tensor.
         TypeError: If `weight` is not Tensor.
         TypeError: If `bias` is not Tensor.
+        RuntimeError: If `bias` is not 1D and `input` is greater than 6D in PYNATIVE mode.
     Supported Platforms:
         ``Ascend`` ``GPU``  ``CPU``
@@ -6404,7 +6454,7 @@ def rotary_position_embedding(x, cos, sin, mode=0):
     Args:
         x (Tensor): 4D tensor, with float16, bfloat16 or float32 data type.
-        cos (Tensor): 4D tensor, has the same type as `x` , in range of [-1, 1].
+        cos (Tensor): 4D constant, has the same type as `x` , in range of [-1, 1].
         sin (Tensor): Same with `cos` .
         mode (int): An optional attribute. Used to select a calculation mode. 0: rotate_half(GPT-NeoX style); 1: rotate_interleaved(GPT-J style). Defaults to ``0`` .
@@ -6420,7 +6470,6 @@ def rotary_position_embedding(x, cos, sin, mode=0):
             11SD, B1SD, BNSD; D < 896 and D is an Even. B, N < 1000;
-            B * N <= 1024 if gradient calculation of cos/sin is used.
           - Supported layout: 11SD, B1SD, BNSD;
             D < 896 and D is an Even.