PyPI - mindspore - Versions diffs - 2.2.0__cp38-cp38-win_amd64.whl → 2.2.11__cp38-cp38-win_amd64.whl - Mend

mindspore 2.2.0__cp38-cp38-win_amd64.whl → 2.2.11__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (112) hide show

mindspore/.commit_id +1 -1
mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
mindspore/_checkparam.py +3 -3
mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
mindspore/_extends/graph_kernel/splitter.py +3 -2
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
mindspore/_extends/parse/__init__.py +3 -2
mindspore/_extends/parse/parser.py +6 -1
mindspore/_extends/parse/standard_method.py +14 -11
mindspore/_extends/remote/kernel_build_server.py +2 -1
mindspore/common/_utils.py +16 -0
mindspore/common/api.py +1 -1
mindspore/common/auto_dynamic_shape.py +81 -85
mindspore/common/dump.py +1 -1
mindspore/common/tensor.py +3 -20
mindspore/config/op_info.config +1 -1
mindspore/context.py +11 -4
mindspore/dataset/engine/cache_client.py +8 -5
mindspore/dataset/engine/datasets_standard_format.py +5 -0
mindspore/dataset/vision/transforms.py +21 -21
mindspore/experimental/optim/adam.py +1 -1
mindspore/gen_ops.py +1 -1
mindspore/include/api/model.h +17 -0
mindspore/include/api/status.h +8 -3
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_shared_lib.dll +0 -0
mindspore/nn/cell.py +0 -3
mindspore/nn/layer/activation.py +4 -5
mindspore/nn/layer/conv.py +39 -23
mindspore/nn/layer/flash_attention.py +54 -129
mindspore/nn/layer/math.py +3 -7
mindspore/nn/layer/rnn_cells.py +5 -5
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +12 -3
mindspore/numpy/utils_const.py +5 -5
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/aicpu/add.py +3 -3
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/_utils/utils.py +2 -0
mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
mindspore/ops/function/array_func.py +10 -7
mindspore/ops/function/grad/grad_func.py +0 -1
mindspore/ops/function/nn_func.py +98 -9
mindspore/ops/function/random_func.py +2 -1
mindspore/ops/op_info_register.py +24 -21
mindspore/ops/operations/__init__.py +6 -2
mindspore/ops/operations/_grad_ops.py +25 -6
mindspore/ops/operations/_inner_ops.py +155 -23
mindspore/ops/operations/array_ops.py +9 -7
mindspore/ops/operations/comm_ops.py +2 -2
mindspore/ops/operations/custom_ops.py +85 -68
mindspore/ops/operations/inner_ops.py +26 -3
mindspore/ops/operations/math_ops.py +7 -6
mindspore/ops/operations/nn_ops.py +193 -49
mindspore/parallel/_parallel_serialization.py +10 -3
mindspore/parallel/_tensor.py +4 -1
mindspore/parallel/checkpoint_transform.py +13 -2
mindspore/parallel/shard.py +17 -10
mindspore/profiler/common/util.py +1 -0
mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
mindspore/profiler/parser/ascend_op_generator.py +1 -1
mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
mindspore/profiler/parser/base_timeline_generator.py +1 -1
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
mindspore/profiler/parser/framework_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +19 -0
mindspore/profiler/profiling.py +46 -24
mindspore/rewrite/api/pattern_engine.py +1 -1
mindspore/rewrite/parsers/for_parser.py +7 -7
mindspore/rewrite/parsers/module_parser.py +4 -4
mindspore/rewrite/symbol_tree.py +1 -4
mindspore/run_check/_check_version.py +5 -3
mindspore/safeguard/rewrite_obfuscation.py +52 -28
mindspore/train/callback/_summary_collector.py +1 -1
mindspore/train/dataset_helper.py +1 -0
mindspore/train/model.py +2 -2
mindspore/train/serialization.py +97 -11
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/summary_record.py +23 -7
mindspore/version.py +1 -1
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +101 -112
mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0

mindspore/ops/op_info_register.py CHANGED Viewed

@@ -36,13 +36,16 @@ if platform.system() == "Linux":
 BUILT_IN_OPS_REGISTER_PATH = "mindspore/ops/_op_impl"
 BUILT_IN_CUSTOM_OPS_REGISTER_PATH = "mindspore/ops/_op_impl/_custom_op"
+KEY_NAME = "name"
+ASCEND_CUSTOM_OPP_PATH = "ASCEND_CUSTOM_OPP_PATH"
-def _get_reg_info_attr(op_info, attr_name):
+def _get_reg_info_attr(op_info, attr_name, default_value=None):
     """get attr value"""
     for _, item in enumerate(op_info.get("attr", [])):
-        if item.get("name") == attr_name:
+        if item.get(KEY_NAME) == attr_name:
             return item.get("defaultValue")
-    return None
+    return default_value
 class _CustomInstaller:
@@ -66,12 +69,12 @@ class _CustomInstaller:
     @staticmethod
     def _set_env(custom_opp_path):
         """set custom file path to env"""
-        if not os.environ.get("ASCEND_CUSTOM_OPP_PATH"):
-            os.environ["ASCEND_CUSTOM_OPP_PATH"] = custom_opp_path
+        if not os.environ.get(ASCEND_CUSTOM_OPP_PATH):
+            os.environ[ASCEND_CUSTOM_OPP_PATH] = custom_opp_path
         else:
-            paths = os.environ["ASCEND_CUSTOM_OPP_PATH"].split(':')
+            paths = os.environ[ASCEND_CUSTOM_OPP_PATH].split(':')
             if custom_opp_path not in paths:
-                os.environ["ASCEND_CUSTOM_OPP_PATH"] = custom_opp_path + ':' + os.environ["ASCEND_CUSTOM_OPP_PATH"]
+                os.environ[ASCEND_CUSTOM_OPP_PATH] = custom_opp_path + ':' + os.environ[ASCEND_CUSTOM_OPP_PATH]
     @staticmethod
     def _create_dir(*dir_names):
@@ -94,11 +97,11 @@ class _CustomInstaller:
         _CustomInstaller.copied_paths.append(src_path)
         if os.path.isfile(src_path):
             lock_file = os.path.join(dst_dir, "file.lock")
-            with open(lock_file, "w") as f:
+            with os.fdopen(os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
                 fcntl.flock(f.fileno(), fcntl.LOCK_EX)
                 shutil.copy(src_path, dst_dir)
-    def _check(self):
+    def check(self):
         """check if the reg info need written"""
         if platform.system() != "Linux":
             return False
@@ -153,12 +156,12 @@ class _CustomInstaller:
         # attr
         attrs_name = []
         for _, item in enumerate(self.op_info.get("attr", [])):
-            attr_name = item.get("name")
+            attr_name = item.get(KEY_NAME)
             attrs_name.append(attr_name)
             key = "attr_" + attr_name
             op_info[key] = {}
             for k, v in item.items():
-                if k != "name":
+                if k != KEY_NAME:
                     op_info[key][k] = v
         if attrs_name:
             op_info["attr"] = {"list": ",".join(attrs_name)}
@@ -171,7 +174,7 @@ class _CustomInstaller:
             item = inputs[i] if i < input_num else outputs[i - input_num]
             key = "input" if i < input_num else "output"
             key += str(item.get("index"))
-            op_info[key] = {"name": item.get("name"),
+            op_info[key] = {KEY_NAME: item.get(KEY_NAME),
                             "paramType": item.get("paramType", "required"),
                             "shape": item.get("shape", "all")}
             dtype, formats = _get_dtype_format(i)
@@ -181,7 +184,8 @@ class _CustomInstaller:
                 op_info[key]["format"] = ",".join(formats)
         return op_info
-    def _gen_ai_cpu_reg_info(self, so_file):
+    @staticmethod
+    def _gen_ai_cpu_reg_info(so_file):
         """generate reg info"""
         op_info = {"opInfo": {"computeCost": "100",
                               "engine": "DNN_VM_AICPU",
@@ -198,7 +202,7 @@ class _CustomInstaller:
         repo = {}
         save_path = os.path.join(dst_dir, file_name)
         lock_file = os.path.join(dst_dir, "file.lock")
-        with open(lock_file, "w") as f:
+        with os.fdopen(os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
             fcntl.flock(f.fileno(), fcntl.LOCK_EX)
             if os.path.isfile(save_path):
                 with open(save_path, 'r') as fr:
@@ -211,7 +215,7 @@ class _CustomInstaller:
     def run(self):
         """save reg info to file"""
-        if not self._check():
+        if not self.check():
             return
         so_name = _get_reg_info_attr(self.op_info, "cust_aicpu")
         if so_name:
@@ -380,7 +384,6 @@ class RegOp:
         """
         if not isinstance(value, str):
             raise TypeError("%s value must be str" % str(value))
-        return True
     def _is_int(self, value):
         """
@@ -394,7 +397,6 @@ class RegOp:
         """
         if not isinstance(value, int):
             raise TypeError("%s value must be int" % str(value))
-        return True
     def _is_bool(self, value):
         """
@@ -408,7 +410,6 @@ class RegOp:
         """
         if not isinstance(value, bool):
             raise TypeError("%s value must be bool" % str(value))
-        return True
     @staticmethod
     def _is_list(value):
@@ -423,7 +424,6 @@ class RegOp:
         """
         if not isinstance(value, list):
             raise TypeError("%s value must be list" % str(value))
-        return True
     def _check_param(self, param_list, key_list, fn_list, kwargs):
         """
@@ -491,7 +491,9 @@ class RegOp:
             self._is_string(arg[1])
             if len(arg) == 3:
                 self._is_string(arg[2])
-            dtype_format.append(arg)
+                dtype_format.append(arg)
+            else:
+                dtype_format.append(arg)
         self.dtype_format_.append(tuple(dtype_format))
         return self
@@ -920,7 +922,8 @@ class TBERegOp(RegOp):
         Args:
             pattern (str): Value of op pattern, e.g. "broadcast", "reduce". Default: ``None`` .
         """
-        if pattern is not None and self._is_string(pattern):
+        if pattern is not None:
+            self._is_string(pattern)
             self.op_pattern_ = pattern
         return self

mindspore/ops/operations/__init__.py CHANGED Viewed

@@ -118,7 +118,7 @@ from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSpa
                      Dilation2D, DataFormatVecPermute, DeformableOffsets, Dense, FractionalAvgPool,
                      FractionalMaxPool, FractionalMaxPool3DWithFixedKsize, FractionalMaxPoolWithFixedKsize,
                      GridSampler2D, TripletMarginLoss, UpsampleNearest3D, UpsampleTrilinear3D, PadV3, ChannelShuffle,
-                     GLU, MaxUnpool3D, Pdist)
+                     GLU, MaxUnpool3D, Pdist, RmsNorm, PagedAttention, PagedAttentionMask, ReshapeAndCache)
 from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
                         ConfusionMatrix, UpdateState, Load, StopGradient,
                         CheckValid, Partial, Depend, Push, Pull, PyExecute, PyFunc, _DynamicLossScale,
@@ -691,7 +691,11 @@ __all__ = [
     "IndexPut",
     "MaskedScatter",
     "Ormqr",
-    "RandpermV2"
+    "RandpermV2",
+    "RmsNorm",
+    "PagedAttention",
+    "PagedAttentionMask",
+    "ReshapeAndCache"
 ]
 __custom__ = [

mindspore/ops/operations/_grad_ops.py CHANGED Viewed

@@ -3845,7 +3845,7 @@ class FlashAttentionScoreGrad(Primitive):
     """
     @prim_attr_register
     def __init__(self, head_num, keep_prob=1.0, scale_value=1.0, pre_tokens=65536, next_tokens=65536, inner_precise=1,
-                 input_layout='BSH'):
+                 input_layout='BSH', sparse_mode=0):
         """Initialize FlashAttentionScoreGrad."""
         validator.check_value_type('head_num', head_num, [int], self.name)
         validator.check_value_type('keep_prob', keep_prob, [int, float], self.name)
@@ -3855,11 +3855,30 @@ class FlashAttentionScoreGrad(Primitive):
         validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
         validator.check_value_type('next_tokens', next_tokens, [int], self.name)
         validator.check_value_type('inner_precise', inner_precise, [int], self.name)
+        validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
         if inner_precise not in [0, 1]:
             raise ValueError(f"Attribute 'inner_precise' must be either 0 or 1, but got {inner_precise}")
         validator.check_value_type('input_layout', input_layout, [str], self.name)
-        if input_layout not in ["BSH"]:
-            raise ValueError(f"Attribute 'input_layout' must be either 'bsh' or 'sbh', but got {input_layout}")
-        self.init_prim_io_names(inputs=['query', 'key', 'value', 'attn_mask', 'attention_in', 'softmax_max',
-                                        'softmax_sum', 'dy', 'drop_mask', 'real_shift', "padding_mask", 'softmax_out'],
-                                outputs=['dq', 'dk', 'dv'])
+        if input_layout not in ["BSH", "BNSD"]:
+            raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
+        self.init_prim_io_names(inputs=['query', 'key', 'value', 'dy', 'pse_shift', 'drop_mask', "padding_mask",
+                                        'attn_mask', 'softmax_max', 'softmax_sum', 'softmax_out', 'attention_in',
+                                        'prefix'],
+                                outputs=['dq', 'dk', 'dv', 'dpse'])
+class RmsNormGrad(Primitive):
+    r"""
+    Calculates the gradient of RmsNorm operation.
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Supported Platforms:
+        ``Ascend``
+    """
+    @prim_attr_register
+    def __init__(self):
+        """Initialize RmsNormGrad."""
+        self.init_prim_io_names(inputs=["dy", "x", "rstd", "gamma"],
+                                outputs=["dx", "dgamma"])

mindspore/ops/operations/_inner_ops.py CHANGED Viewed

@@ -26,7 +26,7 @@ from mindspore.ops.operations._scalar_ops import bit_or, bit_and
 from mindspore.ops.operations.comm_ops import ReduceOp
 from mindspore.ops import signature as sig
 from mindspore.ops.operations.math_ops import _infer_shape_reduce
-from mindspore.ops.primitive import PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register, Primitive,\
+from mindspore.ops.primitive import PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register, Primitive, \
     _run_op, _check_contains_variable
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore._c_expression import typing
@@ -167,6 +167,7 @@ class Quant(PrimitiveWithInfer):
         self.sqrt_mode = validator.check_value_type("sqrt_mode", sqrt_mode, [bool], self.name)
         self.round_mode = validator.check_string(round_mode, ["Round", "Floor", "Ceil", "Trunc"],
                                                  "round_mode", self.name)
+        self.add_prim_attr("dst_type", mstype.int8)
     def infer_shape(self, x_shape):
         return x_shape
@@ -174,7 +175,7 @@ class Quant(PrimitiveWithInfer):
     def infer_dtype(self, x_type):
         validator.check_subclass("input_x", x_type, mstype.tensor_type, self.name)
         validator.check_type_name("input_x", x_type, [mstype.float16, mstype.float32], self.name)
-        return mstype.int8
+        return self.get_attr_dict()['dst_type']
 class Lamb(PrimitiveWithInfer):
@@ -491,7 +492,7 @@ class Receive(PrimitiveWithInfer):
         self.dtype = dtype
         self.group = group
         self.add_prim_attr("no_eliminate", True)
-        valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
+        valid_type = [mstype.float16, mstype.bfloat16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
         args = {"dtype": dtype}
         validator.check_scalar_or_tensor_types_same(args, valid_type, self.name)
@@ -2146,13 +2147,14 @@ class ClipByNorm(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, axis=None):
         """Initialize ClipByNorm"""
+        self.axis_str = 'axis'
         self.axis = () if axis is None else axis
-        validator.check_value_type('axis', self.axis, [int, tuple, list], self.name)
+        validator.check_value_type(self.axis_str, self.axis, [int, tuple, list], self.name)
         axis_check = self.axis if isinstance(self.axis, Iterable) else (self.axis,)
         for i, value in enumerate(axis_check):
             validator.check_value_type('axis[%d]' % i, value, [int], self.name)
-        self.init_attrs['axis'] = self.axis
-        self.add_prim_attr('axis', self.axis)
+        self.init_attrs[self.axis_str] = self.axis
+        self.add_prim_attr(self.axis_str, self.axis)
         self.init_prim_io_names(inputs=['x', 'clip_norm'], outputs=['output'])
     def infer_shape(self, x_shape, clip_norm_shape):
@@ -2729,27 +2731,29 @@ class CopyWithSlice(Primitive):
         self.init_prim_io_names(inputs=['x', 'y'], outputs=['x'])
-class MoeFFN(Primitive):
+class FFN(Primitive):
     r"""
-    The MoeFFN computation is similar to Feed-Forward Network, it contains matmul + gelu + matmul.
+    The FFN computation is similar to Feed-Forward Network, it contains matmul + gelu + matmul.
     Args:
         activation (string): The activation type, set to 'fastgelu' or 'gelu'.
-        Only support 'fastgelu' for now. Default: "fastgelu".
+            Only support 'fastgelu' for now. Default: "fastgelu".
+        inner_precise (int): The precise mode, set to 0 for high precision or 1 for high performance.
+            Only support 1 for now. Default: 0.
     Inputs:
         - **x** (Tensor) - The input tensor with data type of int8, float16.
           Input tensor of shape :math:`(batch\_size * seq\_length, hidden\_size)`.
+        - **weight1** (Tensor) - The weight1 tensor with data type of float16.
+          Weight1 tensor of shape :math:`(expert\_num, hidden\_size, ffn\_hidden\_size)`.
+        - **weight2** (Tensor) - The weight2 tensor with data type of float16.
+          Weight2 tensor of shape :math:`(expert\_num, ffn\_hidden\_size, hidden\_size)`.
         - **expert_tokens** (Tensor]) - The expert tokens tensor with data type of int64.
           Expert tokens tensor of shape :math:`(16,)`. For example, `(2, 1, 0, .., 9)`
           indicate that the 0th expert deals with 2 tokens, the 1th expert deals with 1 tokens,
           the 2th expert do noting and so on.
-        - **weight1** (Tensor) - The weight1 tensor with data type of float16.
-          Weight1 tensor of shape :math:`(expert\_num, hidden\_size, ffn\_hidden\_size)`.
         - **bias1** (Tensor) - The bias1 tensor with data type of float16.
           Bias1 tensor of shape :math:`(expert\_num, ffn\_hidden\_size)`.
-        - **weight2** (Tensor) - The weight2 tensor with data type of float16.
-          Weight2 tensor of shape :math:`(expert\_num, ffn\_hidden\_size, hidden\_size)`.
         - **bias2** (Tensor) - The bias2 tensor with data type of float16.
           Bias2 tensor of shape :math:`(expert\_num, hidden\_size)`.
         - **scale** (Tensor) - The scale tensor with data type of float16. Not enable now.
@@ -2771,21 +2775,149 @@ class MoeFFN(Primitive):
         >>> h_f = 4 * h
         >>> e = 16
         >>> x = Tensor(np.random.randn(b * s, h).astype(np.float16))
-        >>> expert_tokens = Tensor(np.random.randn(e).astype(np.int64))
         >>> w1 = Tensor(np.random.randn(e, h, h_f).astype(np.float16))
-        >>> bias1 = Tensor(np.random.randn(e, h_f).astype(np.float16))
         >>> w2 = Tensor(np.random.randn(e, h_f, h).astype(np.float16))
+        >>> expert_tokens = Tensor(np.random.randn(e).astype(np.int64))
+        >>> bias1 = Tensor(np.random.randn(e, h_f).astype(np.float16))
         >>> bias2 = Tensor(np.random.randn(e, h).astype(np.float16))
-        >>> moe_ffn = _inner_ops.MoeFFN("fastgelu")
-        >>> output = moe_ffn(x, w1, bias1, w2, bias2)
+        >>> ffn = _inner_ops.FFN("fastgelu", 1)
+        >>> output = ffn(x, w1, w2, expert_tokens, bias1, bias2)
         >>> print(output)
     """
     @prim_attr_register
-    def __init__(self, activation):
-        """Initialize MoeFFN."""
-        self.init_prim_io_names(inputs=["x", "expert_tokens", "weight1", "bias1",
-                                        "weight2", "bias2", "scale", "offset", "deq_scale1"
-                                        "deq_scale2"],
+    def __init__(self, activation, inner_precise):
+        """Initialize FFN."""
+        self.init_prim_io_names(inputs=["x", "weight1", "weight2", "expert_tokens", "bias1",
+                                        "bias2", "scale", "offset", "deq_scale1", "deq_scale2"],
                                 outputs=["y"])
-        self.activation = activation
+        cls_name = self.name
+        validator.check_value_type("activation", activation, [str], cls_name)
+        validator.check_value_type("inner_precise", inner_precise, [int], cls_name)
+class DecoderKVCache(Primitive):
+    r"""
+    The DecoderKVCache is used for decoding the KVCache of transformer network.
+    Args:
+        cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
+            When seq_len_axis is 2, cache tensor of shape
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, cache tensor of shape
+            :math:`(batch\_size, max\_seq\_length, num_head, hidden\_size)`.
+        update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
+            When seq_len_axis is 2, update tensor of shape
+            :math:`(batch\_size, num_head, update\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, update tensor of shape
+            :math:`(batch\_size, update\_seq\_length, num_head, hidden\_size)`.
+        valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
+            Valid_seq_len tensor of shape :math:`(batch\_size)`.
+        batch_index (Tensor): The batch_index tensor with data type of int64.
+            Batch_index tensor of shape :math:`(1)`. Indicate that which batch of cache tensor is going to be update.
+        seq_len_axis (int64): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Default: "2".
+        new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            New_max_seq_len tensor of shape :math:`(1)`.
+            Indicate that user want to change the shape of cache tensor from
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
+            :math:
+            `(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
+            to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
+        cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
+    Outputs:
+        With same data type and same shape as `cache` tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore.ops.operations import _inner_ops
+        >>> b = 4
+        >>> h = 40
+        >>> max_s = 1024
+        >>> s = 1
+        >>> d = 128
+        >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
+        >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
+        >>> valid_seq_len = Tensor(np.random.randn(b).astype(np.int64))
+        >>> batch_index = Tensor(np.random.randn(1).astype(np.int64))
+        >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
+        >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, 2, new_max_seq_len, cur_max_seq_len)
+        >>> print(cache)
+    """
+    @prim_attr_register
+    def __init__(self):
+        """Initialize DecoderKVCache."""
+        self.init_prim_io_names(inputs=["cache", "update", "valid_seq_len", "batch_index", "seq_len_axis",
+                                        "new_max_seq_len", "cur_max_seq_len"],
+                                outputs=["out"])
+        self.add_prim_attr('side_effect_mem', True)
+class PromptKVCache(Primitive):
+    r"""
+    The PromptKVCache is used for prefill the KVCache of transformer network.
+    Args:
+        cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
+            When seq_len_axis is 2, cache tensor of shape
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, cache tensor of shape
+            :math:`(batch\_size, max\_seq\_length, num_head, hidden\_size)`.
+        update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
+            When seq_len_axis is 2, update tensor of shape
+            :math:`(batch\_size, num_head, update\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, update tensor of shape
+            :math:`(batch\_size, update\_seq\_length, num_head, hidden\_size)`.
+        valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
+            Valid_seq_len tensor of shape :math:`(batch\_size)`.
+        batch_index (Tensor): The batch_index tensor with data type of int64.
+            Batch_index tensor of shape :math:`(1)`. Indicate that which batch of cache tensor is going to be update.
+        seq_len_axis (int64): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Default: "2".
+        new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            New_max_seq_len tensor of shape :math:`(1)`.
+            Indicate that user want to change the shape of cache tensor from
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
+            :math:
+            `(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
+            to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
+        cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
+        align_mode (int64): indicate which axis is seq_eln, 0 is 'right', 1 is 'left'. Default: 0.
+    Outputs:
+        With same data type and same shape as `cache` tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore import Tensor
+        >>> from mindspore.ops.operations import _inner_ops
+        >>> b = 4
+        >>> h = 40
+        >>> max_s = 1024
+        >>> s = 256
+        >>> d = 128
+        >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
+        >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
+        >>> valid_seq_len = Tensor(np.random.randn(b).astype(np.int64))
+        >>> batch_index = Tensor(np.random.randn(1).astype(np.int64))
+        >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
+        >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, 2, new_max_seq_len, cur_max_seq_len)
+        >>> print(cache)
+    """
+    @prim_attr_register
+    def __init__(self, padding_mode="right"):
+        """Initialize PromptKVCache."""
+        self.init_prim_io_names(inputs=["cache", "update", "valid_seq_len", "batch_index", "seq_len_axis",
+                                        "new_max_seq_len", "cur_max_seq_len"],
+                                outputs=["out"])
+        self.add_prim_attr('side_effect_mem', True)
+        self.padding_mode = padding_mode

mindspore/ops/operations/array_ops.py CHANGED Viewed

@@ -1208,7 +1208,7 @@ class UniqueWithPad(Primitive):
 class Split(Primitive):
-    """
+    r"""
     Splits the input tensor into output_num of tensors along the given axis and output numbers.
     Refer to :func:`mindspore.ops.split` for more details.
@@ -1222,7 +1222,7 @@ class Split(Primitive):
     Outputs:
         tuple[Tensor], the shape of each output tensor is the same, which is
-        :math:`(x_0, x_1, ..., x_{axis}/{output_num}, ..., x_{R-1})`.
+        :math:`(x_0, x_1, ..., x_{axis}/{output\_num}, ..., x_{R-1})`.
         And the data type is the same as `input_x`.
     Supported Platforms:
@@ -1763,16 +1763,18 @@ class FillV2(PrimitiveWithCheck):
         self.init_prim_io_names(inputs=['shape', 'value'], outputs=['y'])
     def check_elim(self, dims, x):
-        if x is None or (not isinstance(x, (Tensor, Tensor_))) or (x.shape != ()) or\
-            dims is None or (isinstance(dims, (tuple, list)) and dims) or\
-            isinstance(dims, (Tensor, Tensor_)):
+        x_is_invalid = x is None or (not isinstance(x, (Tensor, Tensor_))) or (x.shape != ())
+        dims_is_invalid = dims is None or (isinstance(dims, (tuple, list)) and dims) or\
+            isinstance(dims, (Tensor, Tensor_))
+        if x_is_invalid or dims_is_invalid:
             return (False, None)
         return (True, x)
     def infer_value(self, dims, x):
-        if x is None or dims is None or\
+        dims_is_invalid = dims is None or\
             (isinstance(dims, (tuple, list)) and dims) or\
-            isinstance(dims, (Tensor, Tensor_)):
+            isinstance(dims, (Tensor, Tensor_))
+        if x is None or dims_is_invalid:
             return None
         return x

mindspore/ops/operations/comm_ops.py CHANGED Viewed

@@ -94,7 +94,7 @@ class ReduceOp:
 def check_collective_target_dtype(data_name, data_dtype, prim_name):
     """Check if data type is valid."""
-    default_target_dtypes = (mstype.int8, mstype.int32, mstype.float16, mstype.float32)
+    default_target_dtypes = (mstype.int8, mstype.uint8, mstype.int32, mstype.float16, mstype.bfloat16, mstype.float32)
     gpu_target_dtypes = (mstype.bool_, mstype.int8, mstype.int32, mstype.int64, mstype.uint32, mstype.uint64,
                          mstype.float16, mstype.float32, mstype.float64)
@@ -1310,4 +1310,4 @@ class _GetTensorSlice(PrimitiveWithInfer):
         from mindspore.parallel._tensor import _load_tensor
         validator.check_value_type("dev_mat", dev_mat, [tuple], self.name)
         validator.check_value_type("tensor_map", tensor_map, [tuple], self.name)
-        return Tensor(_load_tensor(x, dev_mat, tensor_map))
+        return Tensor(_load_tensor(x, dev_mat, tensor_map), x.dtype)