PyPI - mindspore - Versions diffs - 2.4.10__cp39-none-any.whl → 2.5.0__cp39-none-any.whl - Mend

mindspore 2.4.10cp39-none-any.whl → 2.5.0cp39-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (689) hide show

mindspore/nn/layer/pooling.py CHANGED Viewed

@@ -18,23 +18,21 @@ from __future__ import absolute_import
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
 import mindspore.ops as ops
-from mindspore.ops.function.nn_func import avg_pool2d_ext
 from mindspore._checkparam import _check_3d_int_or_tuple
 from mindspore import _checkparam as validator
 from mindspore.ops.primitive import constexpr, _primexpr
 from mindspore.common.tensor import Tensor
 import mindspore.context as context
 from mindspore.common import dtype as mstype
-from mindspore.ops.operations.nn_ops import AdaptiveMaxPool2D
-from mindspore.ops.operations.nn_ops import AdaptiveMaxPool3D, AdaptiveAvgPool3D
-from mindspore.ops.auto_generate.gen_ops_prim import MaxPoolWithIndices, MaxPoolWithMask
 from mindspore.nn.cell import Cell
 from mindspore._c_expression import MSContext
+from mindspore.ops.auto_generate import avg_pool1d_ext
 __all__ = ['AvgPool3d', 'MaxPool3d', 'AvgPool2d', 'MaxPool2d', 'AvgPool1d', 'MaxPool1d', 'FractionalMaxPool2d',
            'FractionalMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveMaxPool1d', 'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d',
            'AdaptiveAvgPool2d', 'AdaptiveAvgPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'LPPool1d',
-           'LPPool2d', 'AvgPool2dExt', 'MaxPool2dExt']
+           'LPPool2d', 'AvgPool2dExt', 'MaxPool2dExt', 'AvgPool1dExt']
 class _PoolNd(Cell):
@@ -689,9 +687,11 @@ class MaxPool2dExt(Cell):
         self.return_indices = return_indices
         strides = stride if (stride is not None) else kernel_size
         if return_indices:
-            self.max_pool_func_ = MaxPoolWithIndices(kernel_size, strides, padding, dilation, ceil_mode)
+            self.max_pool_func_ = ops.auto_generate.gen_ops_prim.MaxPoolWithIndices(kernel_size, strides, padding,
+                                                                                    dilation, ceil_mode)
         else:
-            self.max_pool_func_ = MaxPoolWithMask(kernel_size, strides, padding, dilation, ceil_mode)
+            self.max_pool_func_ = ops.auto_generate.gen_ops_prim.MaxPoolWithMask(kernel_size, strides, padding,
+                                                                                 dilation, ceil_mode)
     def construct(self, input):
         out, indices = self.max_pool_func_(input)
@@ -1021,6 +1021,40 @@ class AvgPool3d(_PoolNd):
         return out
+class AvgPool1dExt(Cell):
+    r"""
+    Applies a 1D average pooling over an input Tensor which can be regarded as
+    a composition of 2D input planes.
+    For details, please refer to :func:`mindspore.mint.nn.functional.avg_pool1d`.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, nn
+        >>> from mindspore import dtype as mstype
+        >>> input = Tensor(np.arange(1 * 3 * 4).reshape(1, 3, 4), mstype.float32)
+        >>> net = nn.AvgPool1dExt(kernel_size=2, stride=1)
+        >>> output = net(input)
+        >>> print(output.shape)
+        (1, 3, 3)
+    """
+    def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False,
+                 count_include_pad=True):
+        super().__init__()
+        self.kernel_size = kernel_size
+        self.stride = stride
+        self.padding = padding
+        self.ceil_mode = ceil_mode
+        self.count_include_pad = count_include_pad
+    def construct(self, input):
+        return avg_pool1d_ext(input, self.kernel_size, self.stride, self.padding,
+                              self.ceil_mode, self.count_include_pad)
 class AvgPool2dExt(Cell):
     r"""
     Applies a 2D average pooling over an input Tensor which can be regarded as
@@ -1052,8 +1086,8 @@ class AvgPool2dExt(Cell):
         self.divisor_override = divisor_override
     def construct(self, input):
-        return avg_pool2d_ext(input, self.kernel_size, self.stride, self.padding,
-                              self.ceil_mode, self.count_include_pad, self.divisor_override)
+        return ops.function.nn_func.avg_pool2d_ext(input, self.kernel_size, self.stride, self.padding,
+                                                   self.ceil_mode, self.count_include_pad, self.divisor_override)
 class AvgPool2d(_PoolNd):
@@ -1127,7 +1161,7 @@ class AvgPool2d(_PoolNd):
         TypeError: If `kernel_size` or `strides` is neither int nor tuple.
         ValueError: If `pad_mode` is not ``"valid"`` , ``"same"`` or ``"pad"`` with not case sensitive.
         ValueError: If `data_format` is neither ``'NCHW'`` nor ``'NHWC'``.
-        ValueError: If `padding`, `ceil_mode`, `count_include_pad`, or `divisor_override` is used
+        ValueError: If `padding`, `ceil_mode`, `count_include_pad`, or `divisor_override` is used,
             or `pad_mode` is ``"pad"`` when `data_format` is 'NHWC'.
         ValueError: If `kernel_size` or `strides` is less than 1.
         ValueError: If length of `padding` tuple/list is not 1 or 2.
@@ -1592,7 +1626,7 @@ class AdaptiveAvgPool3d(Cell):
     def __init__(self, output_size):
         """Initialize AdaptiveAvgPool3d."""
         super(AdaptiveAvgPool3d, self).__init__()
-        self.adaptive_avg_pool3d = AdaptiveAvgPool3D(output_size)
+        self.adaptive_avg_pool3d = ops.AdaptiveAvgPool3D(output_size)
     def construct(self, input):
         return self.adaptive_avg_pool3d(input)
@@ -1764,7 +1798,7 @@ class AdaptiveMaxPool2d(Cell):
         """Initialize AdaptiveMaxPool2d."""
         super(AdaptiveMaxPool2d, self).__init__()
         validator.check_value_type('return_indices', return_indices, [bool], self.cls_name)
-        self.adaptive_max_pool2d = AdaptiveMaxPool2D(output_size)
+        self.adaptive_max_pool2d = ops.AdaptiveMaxPool2D(output_size)
         self.return_indices = return_indices
     def construct(self, input):
@@ -1823,7 +1857,7 @@ class AdaptiveMaxPool3d(Cell):
             output_size = (output_size, output_size, output_size)
         self.output_size = Tensor(output_size, dtype=mstype.int32)
         self.return_indices = return_indices
-        self.adaptive_max_pool3d = AdaptiveMaxPool3D()
+        self.adaptive_max_pool3d = ops.AdaptiveMaxPool3D()
     def construct(self, input):
         output = self.adaptive_max_pool3d(input, self.output_size)

mindspore/nn/layer/rnn_cells.py CHANGED Viewed

@@ -340,7 +340,7 @@ class GRUCell(RNNCellBase):
     :math:`r` is reset gate. :math:`z` is update gate. :math:`n` is n-th layer. For instance,
     :math:`W_{ir}, b_{ir}` are the weight and bias used to transform from input :math:`x` to :math:`r`.
     Details can be found in paper
-    `Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation
+    `Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation
     <https://aclanthology.org/D14-1179.pdf>`_.
     Args:

mindspore/nn/layer/rnns.py CHANGED Viewed

@@ -237,6 +237,7 @@ class _DynamicGRUCPUGPU(Cell):
                 h_0.view(1, *h_0.shape),
                 weights.astype(x.dtype)
             )
             if seq_length is not None:
                 h_n = get_hidden(output, seq_length)
                 mask = sequence_mask(seq_length, x.shape[0])
@@ -687,7 +688,7 @@ class GRU(_RNNBase):
     are learnable weights between the output and the input in the formula. For instance,
     :math:`W_{ir}, b_{ir}` are the weight and bias used to transform from input :math:`x` to :math:`r`.
     Details can be found in paper
-    `Learning Phrase Representations using RNN Encoder–Decoder for Statistical Machine Translation
+    `Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation
     <https://aclanthology.org/D14-1179.pdf>`_.
     Note:

mindspore/nn/layer/timedistributed.py CHANGED Viewed

@@ -15,8 +15,8 @@
 """Time Distributed."""
 from __future__ import absolute_import
+from mindspore import ops
 from mindspore.ops.primitive import constexpr, Primitive, _primexpr
-from mindspore.ops import Reshape, Transpose, Stack, Unstack
 from mindspore.common import Tensor
 from mindspore import _checkparam as Validator
 from mindspore.nn.cell import Cell
@@ -116,8 +116,8 @@ class TimeDistributed(Cell):
         self.layer = layer
         self.time_axis = time_axis
         self.reshape_with_axis = reshape_with_axis
-        self.transpose = Transpose()
-        self.reshape = Reshape()
+        self.transpose = ops.Transpose()
+        self.reshape = ops.Reshape()
     def construct(self, inputs):
         _check_data(isinstance(inputs, Tensor), self.cls_name)
@@ -143,7 +143,7 @@ class TimeDistributed(Cell):
             outputs_shape_new = (-1,) + outputs_shape_new[1:]
             return self.reshape(outputs, outputs_shape_new)
-        unstack = Unstack(time_axis)
+        unstack = ops.Unstack(time_axis)
         inputs = unstack(inputs)
         y = ()
         for item in inputs:
@@ -151,5 +151,5 @@ class TimeDistributed(Cell):
             _check_data(isinstance(outputs, Tensor), self.cls_name)
             _check_expand_dims_axis(time_axis, outputs.ndim, self.cls_name)
             y += (outputs,)
-        y = Stack(time_axis)(y)
+        y = ops.Stack(time_axis)(y)
         return y

mindspore/nn/layer/transformer.py CHANGED Viewed

@@ -26,12 +26,12 @@ from mindspore.common.tensor import Tensor
 from mindspore.common.parameter import Parameter
 from mindspore.common.initializer import initializer, XavierNormal, XavierUniform, \
     HeUniform, Uniform, _calculate_fan_in_and_fan_out
-from mindspore.ops.function.nn_func import multi_head_attention_forward
 from mindspore.nn.cell import Cell
 from .basic import Dense, Dropout
 from .activation import ReLU, GELU
 from .normalization import LayerNorm
 from .container import CellList
 __all__ = ['MultiheadAttention', 'TransformerEncoderLayer', 'TransformerDecoderLayer',
            'TransformerEncoder', 'TransformerDecoder', 'Transformer']
@@ -212,7 +212,7 @@ class MultiheadAttention(Cell):
                 query, key, value = [x.swapaxes(1, 0) for x in (query, key, value)]
         if not self._qkv_same_embed_dim:
-            attn_output, attn_output_weights = multi_head_attention_forward(
+            attn_output, attn_output_weights = ops.function.nn_func.multi_head_attention_forward(
                 query, key, value, self.embed_dim, self.num_heads,
                 self.in_proj_weight, self.in_proj_bias,
                 self.bias_k, self.bias_v, self.add_zero_attn,
@@ -224,7 +224,7 @@ class MultiheadAttention(Cell):
                 v_proj_weight=self.v_proj_weight, average_attn_weights=average_attn_weights,
                 k_is_v=self.k_is_v, q_is_k=self.q_is_k, dtype=self.dtype)
         else:
-            attn_output, attn_output_weights = multi_head_attention_forward(
+            attn_output, attn_output_weights = ops.function.nn_func.multi_head_attention_forward(
                 query, key, value, self.embed_dim, self.num_heads,
                 self.in_proj_weight, self.in_proj_bias,
                 self.bias_k, self.bias_v, self.add_zero_attn,
@@ -328,7 +328,7 @@ class TransformerEncoderLayer(Cell):
         self.activation1 = activation
         if not isinstance(activation, str) and not isinstance(activation, Cell) \
-            and not callable(activation):
+                and not callable(activation):
             raise ValueError(f"The argument 'activation' must be str, callable or Cell instance,"
                              f" but get {activation}.")
         if isinstance(activation, Cell) and (not isinstance(activation, ReLU) and \
@@ -360,15 +360,23 @@ class TransformerEncoderLayer(Cell):
                 raise AssertionError(
                     "only bool and floating types of key_padding_mask are supported")
-        x = src
+        input_data = src
         if self.norm_first:
-            x = x + self._sa_block(self.norm1(x), src_mask, src_key_padding_mask)
-            x = x + self._ff_block(self.norm2(x))
+            normed_input = self.norm1(input_data)
+            sa_block_result = self._sa_block(normed_input, src_mask, src_key_padding_mask)
+            input_data = input_data + sa_block_result
+            normed_updated_input = self.norm2(input_data)
+            ff_block_result = self._ff_block(normed_updated_input)
+            input_data = input_data + ff_block_result
         else:
-            x = self.norm1(x + self._sa_block(x, src_mask, src_key_padding_mask))
-            x = self.norm2(x + self._ff_block(x))
+            sa_block_result = self._sa_block(input_data, src_mask, src_key_padding_mask)
+            normed_sa_result = self.norm1(input_data + sa_block_result)
+            input_data = normed_sa_result
+            ff_block_result = self._ff_block(input_data)
+            input_data = self.norm2(input_data + ff_block_result)
-        return x
+        return input_data
     def _sa_block(self, x, attn_mask, key_padding_mask):
         x = self.self_attn(x, x, x,
@@ -480,7 +488,7 @@ class TransformerDecoderLayer(Cell):
         self.activation1 = activation
         if not isinstance(activation, str) and not isinstance(activation, Cell) \
-            and not callable(activation):
+                and not callable(activation):
             raise ValueError(f"The argument 'activation' must be str, callable or Cell instance,"
                              f" but get {activation}.")
         if isinstance(activation, Cell) and (not isinstance(activation, ReLU) and \
@@ -507,17 +515,29 @@ class TransformerDecoderLayer(Cell):
     def construct(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
                   memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
                   memory_key_padding_mask: Optional[Tensor] = None):
-        x = tgt
+        input_data = tgt
         if self.norm_first:
-            x = x + self._sa_block(self.norm1(x), tgt_mask, tgt_key_padding_mask)
-            x = x + self._mha_block(self.norm2(x), memory, memory_mask, memory_key_padding_mask)
-            x = x + self._ff_block(self.norm3(x))
+            normed_input = self.norm1(input_data)
+            sa_block_result = self._sa_block(normed_input, tgt_mask, tgt_key_padding_mask)
+            input_data = input_data + sa_block_result
+            normed_updated_input_1 = self.norm2(input_data)
+            mha_block_result = self._mha_block(normed_updated_input_1, memory, memory_mask, memory_key_padding_mask)
+            input_data = input_data + mha_block_result
+            normed_updated_input_2 = self.norm3(input_data)
+            ff_block_result = self._ff_block(normed_updated_input_2)
+            input_data = input_data + ff_block_result
         else:
-            x = self.norm1(x + self._sa_block(x, tgt_mask, tgt_key_padding_mask))
-            x = self.norm2(x + self._mha_block(x, memory, memory_mask, memory_key_padding_mask))
-            x = self.norm3(x + self._ff_block(x))
+            sa_block_result = self._sa_block(input_data, tgt_mask, tgt_key_padding_mask)
+            normed_sa_result = self.norm1(input_data + sa_block_result)
+            input_data = normed_sa_result
+            mha_block_result = self._mha_block(input_data, memory, memory_mask, memory_key_padding_mask)
+            normed_mha_result = self.norm2(input_data + mha_block_result)
+            input_data = normed_mha_result
+            ff_block_result = self._ff_block(input_data)
+            input_data = self.norm3(input_data + ff_block_result)
-        return x
+        return input_data
     def _sa_block(self, x, attn_mask, key_padding_mask):
         x = self.self_attn(x, x, x,
@@ -670,17 +690,19 @@ class TransformerDecoder(Cell):
     def construct(self, tgt: Tensor, memory: Tensor, tgt_mask: Optional[Tensor] = None,
                   memory_mask: Optional[Tensor] = None, tgt_key_padding_mask: Optional[Tensor] = None,
                   memory_key_padding_mask: Optional[Tensor] = None):
-        output = tgt
+        processed_output = tgt
         for mod in self.layers:
-            output = mod(output, memory, tgt_mask=tgt_mask,
-                         memory_mask=memory_mask,
-                         tgt_key_padding_mask=tgt_key_padding_mask,
-                         memory_key_padding_mask=memory_key_padding_mask)
+            layer_output = mod(processed_output, memory,
+                               tgt_mask=tgt_mask,
+                               memory_mask=memory_mask,
+                               tgt_key_padding_mask=tgt_key_padding_mask,
+                               memory_key_padding_mask=memory_key_padding_mask)
+            processed_output = layer_output
         if self.norm is not None:
-            output = self.norm(output)
+            processed_output = self.norm(processed_output)
-        return output
+        return processed_output
 class Transformer(Cell):

mindspore/nn/learning_rate_schedule.py CHANGED Viewed

@@ -223,7 +223,8 @@ class InverseDecayLR(LearningRateSchedule):
         learning_rate (float): The initial value of learning rate.
         decay_rate (float): The decay rate.
         decay_steps (int): Number of steps to decay over.
-        is_stair (bool): If true, learning rate decay once every `decay_steps` times. Default: ``False`` .
+        is_stair (bool): If true, learning rate decay once every `decay_steps` times. If False, the learning rate
+            decays for every step. Default: ``False`` .
     Inputs:
         - **global_step** (Tensor) - The current step number.
@@ -454,8 +455,9 @@ class WarmUpLR(LearningRateSchedule):
         tmp\_step= \min(current\_step, warmup\_steps)
     Args:
-        learning_rate (float): The initial value of learning rate.
-        warmup_steps (int): The warm up steps of learning rate.
+        learning_rate (float): The initial value of learning rate. The value of `learning_rate` must be greater than 0.
+        warmup_steps (int): The warm up steps of learning rate. The value of `warmup_steps` must be greater than
+            or equal to 1.
     Inputs:
         - **global_step** (Tensor) - The current step number. Shape is :math:`()`.

mindspore/nn/loss/loss.py CHANGED Viewed

@@ -24,8 +24,6 @@ from mindspore.common.tensor import Tensor
 from mindspore.common.parameter import Parameter
 from mindspore.ops import operations as P
 from mindspore.ops.operations import _inner_ops as inner
-from mindspore.ops.operations.nn_ops import MultiMarginLoss as MultiMarginLossOp
-from mindspore.ops.operations.nn_ops import MultilabelMarginLoss as MultilabelMarginLossOp
 from mindspore.ops import functional as F
 from mindspore import nn
 from mindspore.ops.primitive import constexpr, _primexpr
@@ -33,7 +31,6 @@ from mindspore.nn.cell import Cell
 from mindspore.nn.layer.activation import get_activation
 from mindspore import _checkparam as validator
 from mindspore import context
-from mindspore.ops.auto_generate import l1_loss_ext_op
 class LossBase(Cell):
@@ -319,7 +316,7 @@ class L1LossExt(LossBase):
         self.reduction = reduction
     def construct(self, logits, labels):
-        return l1_loss_ext_op(logits, labels, self.reduction)
+        return ops.auto_generate.l1_loss_ext_op(logits, labels, self.reduction)
 class MSELoss(LossBase):
@@ -628,11 +625,11 @@ class SmoothL1Loss(LossBase):
     .. math::
         L_{i} =
         \begin{cases}
-        \frac{0.5 (x_i - y_i)^{2}}{\beta}, & \text{if } |x_i - y_i| < {\beta} \\
-        |x_i - y_i| - 0.5 {\beta}, & \text{otherwise.}
+        \frac{0.5 (x_i - y_i)^{2}}{\text{beta}}, & \text{if } |x_i - y_i| < \text{beta} \\
+        |x_i - y_i| - 0.5 * {\text{beta}}, & \text{otherwise.}
         \end{cases}
-    Where :math:`{\beta}` represents the threshold `beta`.
+    Where :math:`{\text{beta}}` represents the threshold `beta`.
     If `reduction` is not `none`, then:
@@ -653,8 +650,11 @@ class SmoothL1Loss(LossBase):
           robust to outliers, and the loss function has better robustness.
     Args:
-        beta (float): The loss function calculates the threshold of the transformation between L1Loss and L2Loss.
-            Default: ``1.0`` .
+        beta (number, optional): The loss function calculates the threshold of the transformation
+            between L1Loss and L2Loss. Default: ``1.0`` .
+            - Ascend: The value should be equal to or greater than zero.
+            - CPU/GPU: The value should be greater than zero.
         reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
             ``'sum'`` . Default: ``'none'`` .
@@ -663,22 +663,26 @@ class SmoothL1Loss(LossBase):
             - ``'sum'``: the output elements will be summed.
     Inputs:
-        - **logits** (Tensor) - Predictive value. Tensor of any dimension. Data type must be one of float16 or
-          float32.
-        - **labels** (Tensor) - Ground truth data, same shape and dtype as the `logits`.
+        - **logits** (Tensor) - Predictive value. Tensor of any dimension. Supported dtypes:
+          - Ascend: float16, float32, bfloat16.
+          - CPU/GPU: float16, float32, float64.
+        - **labels** (Tensor) - Ground truth data.
+          - CPU/Ascend: has the same shape as the `logits`,
+            `logits` and `labels` comply with the implicit type conversion rules to make the data types consistent.
+          - GPU: has the same shape and dtype as the `logits`.
     Outputs:
         Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `logits`.
         Otherwise the shape of output tensor is :math:`()`.
     Raises:
-        TypeError: If `beta` is not a float.
-        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
-        TypeError: If `logits` or `labels` are not Tensor.
-        TypeError: If dtype of `logits` or `labels` is neither float16 not float32.
-        TypeError: If dtype of `logits` is not the same as `labels`.
-        ValueError: If `beta` is less than or equal to 0.
+        TypeError: If input `logits` or `labels` are not Tensor.
+        RuntimeError: If dtype of `logits` or `labels` is not one of float16, float32, float64, bfloat16.
         ValueError: If shape of `logits` is not the same as `labels`.
+        ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
+        TypeError: If `beta` is not a float, int or bool.
+        RuntimeError: If `beta` is less than or equal to 0.
     Supported Platforms:
         ``Ascend`` ``GPU`` ``CPU``
@@ -1631,7 +1635,7 @@ class MultiMarginLoss(LossBase):
     def __init__(self, p=1, margin=1.0, reduction='mean', weight=None):
         """Initialize MultiMarginLoss."""
         super(MultiMarginLoss, self).__init__()
-        self.multi_margin_loss = MultiMarginLossOp(p=p, margin=margin, reduction=reduction)
+        self.multi_margin_loss = ops.MultiMarginLoss(p=p, margin=margin, reduction=reduction)
         self.weight = weight
     def construct(self, x, target, weight=None):
@@ -1718,22 +1722,11 @@ class BCELoss(LossBase):
     def __init__(self, weight=None, reduction='mean'):
         """Initialize BCELoss."""
         super(BCELoss, self).__init__(reduction)
-        self.binary_cross_entropy = P.BinaryCrossEntropy(reduction=reduction)
-        self.weight_one = weight is None
-        if not self.weight_one:
-            self.weight = weight
-        else:
-            self.ones = P.OnesLike()
+        self.reduction = reduction
+        self.weight = weight
     def construct(self, logits, labels):
-        _check_is_tensor('logits', logits, self.cls_name)
-        _check_is_tensor('labels', labels, self.cls_name)
-        if self.weight_one:
-            weight = self.ones(logits)
-        else:
-            weight = self.weight
-        loss = self.binary_cross_entropy(logits, labels, weight)
-        return loss
+        return F.binary_cross_entropy(logits, labels, self.weight, self.reduction)
 class CosineEmbeddingLoss(LossBase):
@@ -1887,7 +1880,7 @@ class MultilabelMarginLoss(LossBase):
     def __init__(self, reduction='mean'):
         super(MultilabelMarginLoss, self).__init__()
-        self.multilabel_margin_loss = MultilabelMarginLossOp(reduction=reduction)
+        self.multilabel_margin_loss = ops.MultilabelMarginLoss(reduction=reduction)
     def construct(self, x, target):
         loss, _ = self.multilabel_margin_loss(x, target)
@@ -2265,7 +2258,8 @@ class TripletMarginLoss(LossBase):
             - ``'mean'``: compute and return the mean of elements in the output.
             - ``'sum'``: the output elements will be summed.
-        margin (Union[Tensor, float]): Make a margin between the positive pair and the negative pair.
+        margin (Union[Tensor, float]): Make a margin between the positive pair and the negative pair. The length of
+            shape of `margin` must be 0.
             Default: ``1.0`` .
     Inputs:
@@ -2275,7 +2269,8 @@ class TripletMarginLoss(LossBase):
           shape as `x`. :math:`p` in the above formula.
         - **negative** (Tensor) - A sample belonging to the different class from `x`, with the same type and shape
           as `x`. :math:`n` in the above formula.
-        - **margin** (Union[Tensor, float]) - Make a margin between the positive pair and the negative pair.
+        - **margin** (Union[Tensor, float]) - Make a margin between the positive pair and the negative pair. The length
+          of shape of `margin` must be 0.
           Default: ``1.0`` .
     Outputs:

mindspore/nn/optim/ada_grad.py CHANGED Viewed

@@ -78,6 +78,7 @@ class Adagrad(Optimizer):
     :math:`state\_sum` stands for the accumulated squared sum of the gradients :math:`accum`.
     :math:`g` stands for `grads`, :math:`\lambda` stands for `weight_decay`.
     :math:`\gamma` stands for `learning_rate`, :math:`w` stands for `params`.
+    :math:`t` represents current `step`.
     Note:
         If parameters are not grouped, the `weight_decay` in optimizer will be applied on the network parameters without

mindspore/nn/optim/adadelta.py CHANGED Viewed

@@ -134,9 +134,9 @@ class Adadelta(Optimizer):
     Raises:
         TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
-        TypeError: If element of `parameters` is neither Parameter nor dict.
+        TypeError: If element of `params` is neither Parameter nor dict.
         TypeError: If `rho`, `epsilon` or `loss_scale` is not a float.
-        TypeError: If `weight_decay` is neither float nor int.
+        TypeError: If `weight_decay` is not float, int or cell.
         ValueError: if `rho` is not in range [0.0, 1.0].
         ValueError: If `loss_scale` is less than or equal to 0.
         ValueError: If `learning_rate`, `epsilon` or `weight_decay` is less than 0.

mindspore/nn/optim/adam.py CHANGED Viewed

@@ -633,7 +633,7 @@ class Adam(Optimizer):
     Raises:
         KeyError: If kwargs got keys other than 'use_lazy' or 'use_offload'.
         TypeError: If `learning_rate` is not one of int, float, Tensor, Iterable, LearningRateSchedule.
-        TypeError: If element of `parameters` is neither Parameter nor dict.
+        TypeError: If element of `params` is neither Parameter nor dict.
         TypeError: If `beta1`, `beta2`, `eps` or `loss_scale` is not a float.
         TypeError: If `weight_decay` is neither float nor int.
         TypeError: If `use_locking`, `use_nesterov`, `use_amsgrad`, `use_lazy` or `use_offload` is not a bool.

mindspore/nn/optim/lars.py CHANGED Viewed

@@ -82,7 +82,7 @@ class LARS(Optimizer):
             &\hline \\[-1.ex]
         \end{array}
-    :math:`w` represents the network parameters, :math:`g` represents `gradients`,
+    :math:`w` represents the network's params, :math:`g` represents `gradients`,
     :math:`t` represents the current step, :math:`\lambda` represents `weight_decay` in `optimizer`,
     :math:`\gamma` represents `learning_rate` in `optimizer`, :math:`\eta` represents `coefficient`.
@@ -98,9 +98,6 @@ class LARS(Optimizer):
         - **gradients** (tuple[Tensor]) - The gradients of `params` in the optimizer, the shape is the
           as same as the `params` in the optimizer.
-    Outputs:
-        Union[Tensor[bool], tuple[Parameter]], it depends on the output of `optimizer`.
     Supported Platforms:
         ``Ascend``

mindspore/nn/optim/optimizer.py CHANGED Viewed

@@ -848,7 +848,7 @@ class Optimizer(Cell):
             optim_result(bool): The results of updating parameters. This input is used to ensure that the parameters are
               updated before they are broadcast.
         Returns:
-             bool, the status flag.
+             The broadcast parameters.
         """
         # If rank_id is 0, 1, 2, 3, there are param0 ~ param7,
         # then the value is[(param0, param4), (param1, param5), (param2, param6), (param3, param7)]

mindspore/nn/optim/rprop.py CHANGED Viewed

@@ -44,8 +44,8 @@ class Rprop(Optimizer):
             &\hspace{15mm} w_{t} \leftarrow w_{t-1}- \Delta_{t} \mathrm{sign}(g_t)                 \\
         \end{gather*}
-    :math:`\Delta_{min/max}` represents the min/max step size, :math:`\eta_{+/-}` represents the factors of
-    etaminus and etaplus, :math:`g` represents `gradients`, :math:`w` represents `parameters`.
+    :math:`g` represents `gradients`, :math:`w` represents `parameters`, :math:`\Delta_{min/max}` represents the
+    min/max step size, :math:`\eta_{+/-}` represents the factors of etaminus and etaplus.
     Note:
         If parameters are not grouped, the `weight_decay` in optimizer will be applied on the parameters without 'beta'

mindspore/nn/optim/thor.py CHANGED Viewed

@@ -21,6 +21,7 @@ from mindspore.ops import functional as F, composite as C, operations as P
 from mindspore.common.initializer import initializer
 from mindspore.common.parameter import Parameter, ParameterTuple
 from mindspore.common.tensor import Tensor
+from mindspore.common import set_recursion_limit
 import mindspore.ops as ops
 import mindspore.nn as nn
 import mindspore.common.dtype as mstype
@@ -355,7 +356,7 @@ def thor(net, learning_rate, damping, momentum, weight_decay=0.0, loss_scale=1.0
         ...                                                 amp_level="O2", keep_batchnorm_fp32=False)
     """
-    context.set_context(max_call_depth=10000)
+    set_recursion_limit(10000)
     ConvertNetUtils().convert_to_thor_net(net)
     if context.get_context("device_target") == "Ascend":
         return ThorAscend(net, learning_rate, damping, momentum, weight_decay, loss_scale, batch_size, decay_filter,

mindspore/nn/utils/init.py CHANGED Viewed

@@ -23,19 +23,21 @@ from mindspore.common.parameter import Parameter
 @contextmanager
 def no_init_parameters():
     r"""
-     In scenarios where a checkpoint is loaded, parameters within the network instantiation will be
-     instantiated and occupy physical memory. Loading a checkpoint will replace the parameter values.
-     Decorator can be applied during network instantiation to add an attribute `init_param` to all
-     parameters within the current Cell, setting it to `init_param=False` .
-     When `init_param=False` is detected, the initialization of the parameters is skipped,
-     and the parameters are assigned values directly from the checkpoint during loading,
-     which can optimize performance and reduce physical memory usage.
+    This interface is used to skip parameter initialization.
-     Note:
-         Initialization of parameters created with `initializer` can only be skipped.
-         Parameters created by `Tensor` or `numpy` cannot be skipped.
+    In scenarios where a checkpoint is loaded, parameters within the network instantiation will be
+    instantiated and occupy physical memory. Loading a checkpoint will replace the parameter values.
+    Decorator can be applied during network instantiation to add an attribute `init_param` to all
+    parameters within the current Cell, setting it to `init_param=False` .
+    When `init_param=False` is detected, the initialization of the parameters is skipped,
+    and the parameters are assigned values directly from the checkpoint during loading,
+    which can optimize performance and reduce physical memory usage.
-     Examples:
+    Note:
+        Initialization of parameters created with `initializer` can only be skipped.
+        Parameters created by `Tensor` or `numpy` cannot be skipped.
+    Examples:
         >>> import mindspore as ms
         >>> from mindspore import nn, ops, load_checkpoint
         >>> from mindspore.common.initializer import initializer

mindspore 2.4.10__cp39-none-any.whl → 2.5.0__cp39-none-any.whl

Potentially problematic release.

mindspore 2.4.10cp39-none-any.whl → 2.5.0cp39-none-any.whl