PyPI - mindspore - Versions diffs - 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl - Mend

mindspore 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (170) hide show

mindspore/.commit_id +1 -1
mindspore/_akg/akg/composite/build_module.py +104 -20
mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
mindspore/_akg/akg/utils/kernel_exec.py +41 -15
mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
mindspore/_akg/akg/utils/util.py +56 -1
mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_checkparam.py +3 -3
mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
mindspore/_extends/graph_kernel/splitter.py +3 -2
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
mindspore/_extends/parse/__init__.py +3 -2
mindspore/_extends/parse/parser.py +6 -1
mindspore/_extends/parse/standard_method.py +14 -11
mindspore/_extends/remote/kernel_build_server.py +2 -1
mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/common/_utils.py +16 -0
mindspore/common/api.py +1 -1
mindspore/common/auto_dynamic_shape.py +81 -85
mindspore/common/dump.py +1 -1
mindspore/common/tensor.py +3 -20
mindspore/config/op_info.config +1 -1
mindspore/context.py +11 -4
mindspore/dataset/engine/cache_client.py +8 -5
mindspore/dataset/engine/datasets_standard_format.py +5 -0
mindspore/dataset/vision/transforms.py +21 -21
mindspore/experimental/optim/adam.py +1 -1
mindspore/gen_ops.py +1 -1
mindspore/include/api/model.h +17 -0
mindspore/include/api/status.h +8 -3
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libmindspore.so +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libnnacl.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
mindspore/nn/cell.py +0 -3
mindspore/nn/layer/activation.py +4 -5
mindspore/nn/layer/conv.py +39 -23
mindspore/nn/layer/flash_attention.py +54 -129
mindspore/nn/layer/math.py +3 -7
mindspore/nn/layer/rnn_cells.py +5 -5
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +12 -3
mindspore/numpy/utils_const.py +5 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/aicpu/add.py +3 -3
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/_utils/utils.py +2 -0
mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
mindspore/ops/function/array_func.py +10 -7
mindspore/ops/function/grad/grad_func.py +0 -1
mindspore/ops/function/nn_func.py +98 -9
mindspore/ops/function/random_func.py +2 -1
mindspore/ops/op_info_register.py +24 -21
mindspore/ops/operations/__init__.py +6 -2
mindspore/ops/operations/_grad_ops.py +25 -6
mindspore/ops/operations/_inner_ops.py +155 -23
mindspore/ops/operations/array_ops.py +9 -7
mindspore/ops/operations/comm_ops.py +2 -2
mindspore/ops/operations/custom_ops.py +85 -68
mindspore/ops/operations/inner_ops.py +26 -3
mindspore/ops/operations/math_ops.py +7 -6
mindspore/ops/operations/nn_ops.py +193 -49
mindspore/parallel/_parallel_serialization.py +10 -3
mindspore/parallel/_tensor.py +4 -1
mindspore/parallel/checkpoint_transform.py +13 -2
mindspore/parallel/shard.py +17 -10
mindspore/profiler/common/util.py +1 -0
mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
mindspore/profiler/parser/ascend_op_generator.py +1 -1
mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
mindspore/profiler/parser/base_timeline_generator.py +1 -1
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
mindspore/profiler/parser/framework_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +19 -0
mindspore/profiler/profiling.py +46 -24
mindspore/rewrite/api/pattern_engine.py +1 -1
mindspore/rewrite/parsers/for_parser.py +7 -7
mindspore/rewrite/parsers/module_parser.py +4 -4
mindspore/rewrite/symbol_tree.py +1 -4
mindspore/run_check/_check_version.py +5 -3
mindspore/safeguard/rewrite_obfuscation.py +52 -28
mindspore/scipy/ops.py +55 -5
mindspore/scipy/optimize/__init__.py +3 -2
mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
mindspore/train/callback/_summary_collector.py +1 -1
mindspore/train/dataset_helper.py +1 -0
mindspore/train/model.py +2 -2
mindspore/train/serialization.py +97 -11
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/summary_record.py +23 -7
mindspore/version.py +1 -1
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
/mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0

mindspore/ops/operations/inner_ops.py CHANGED Viewed

@@ -238,13 +238,14 @@ class LambApplyOptimizerAssign(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self):
         """Initialize LambApplyOptimizerAssign"""
+        self.var_shape = "var_shape"
         self.add_prim_attr('side_effect_mem', True)
     def infer_shape(self, grad_shape, v_shape, m_shape, var_shape, beta1_shape, sub1_shape,
                     beta2_shape, sub2_shape, eps_shape, steps_shape, use_weight_shape, weight_decay_shape):
-        validator.check("var_shape", var_shape, "m_shape", m_shape, validator.EQ, self.name)
-        validator.check("var_shape", var_shape, "v_shape", v_shape, validator.EQ, self.name)
-        validator.check("var_shape", var_shape, "grad_shape", grad_shape, validator.EQ, self.name)
+        validator.check(self.var_shape, var_shape, "m_shape", m_shape, validator.EQ, self.name)
+        validator.check(self.var_shape, var_shape, "v_shape", v_shape, validator.EQ, self.name)
+        validator.check(self.var_shape, var_shape, "grad_shape", grad_shape, validator.EQ, self.name)
         return m_shape, v_shape, m_shape
     def infer_dtype(self, grad_dtype, v_dtype, m_dtype, var_dtype, beta1_dtype, sub1_dtype,
@@ -658,3 +659,25 @@ class ScaleGrad(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self):
         """Initialize ScaleGrad"""
+class KVCacheMgr(Primitive):
+    """
+    Update past with cur and index along sequence axis.
+    Inputs:
+        - **past** (Parameter) - 4-D tensor with shape: :math:`(batch_size, num_head, seq_len, hidden_size)`.
+        - **cur** (Tensor) - 4-D tensor with shape: :math:`(batch_size, num_head, 1, hidden_size)`.
+        - **index** (Tensor) - 1-D tensor with shape: :math:`(batch_size,)`.
+    Outputs:
+        Tensor, has the same data type and shape as original `past`.
+    Supported Platforms:
+        ``Ascend``
+    """
+    @prim_attr_register
+    def __init__(self):
+        self.init_prim_io_names(inputs=['past', 'cur', 'index'], outputs=['past'])
+        self.add_prim_attr('side_effect_mem', True)

mindspore/ops/operations/math_ops.py CHANGED Viewed

@@ -1536,9 +1536,8 @@ class LpNorm(Primitive):
     """
     @prim_attr_register
-    def __init__(self, axis, p=2, keep_dims=False, epsilon=1e-12):
+    def __init__(self, axis=(), p=2, keep_dims=False, epsilon=1e-12):
         """Initialize LpNorm"""
-        super().__init__("LpNorm")
         validator.check_value_type("p", p, [int], self.name)
         validator.check_value_type("axis", axis, [int, tuple, list], self.name)
         validator.check_value_type("keep_dims", keep_dims, [bool], self.name)
@@ -2494,6 +2493,7 @@ class Reciprocal(PrimitiveWithCheck):
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
     def infer_value(self, x):
+        """Infer value for Reciprocal"""
         if x is not None:
             x = x.asnumpy()
             out = 1.0 / x
@@ -2551,6 +2551,7 @@ class Pow(Primitive):
         self.init_prim_io_names(inputs=['x1', 'x2'], outputs=['y'])
     def infer_value(self, x, power):
+        """infer value for _BinaryOp"""
         if x is not None and power is not None:
             x = x.asnumpy()
             power = power.asnumpy()
@@ -2931,7 +2932,7 @@ class Histogram(Primitive):
     """
     @prim_attr_register
-    def __init__(self, bins=100, min=0.0, max=0.0):  # pylint: disable=W0622
+    def __init__(self, bins=100, min=0.0, max=0.0):
         """Initialize Histogram."""
         self.init_prim_io_names(inputs=['x'], outputs=['y'])
         validator.check_value_type("bins", bins, [int], self.name)
@@ -6568,9 +6569,9 @@ class LinSpace(Primitive):
     Inputs:
         - **start** (Tensor) -  Start value of interval, 0-D Tensor with dtype float32 or float64.
-        - **stop** (Tensor) - Last value of interval, 0-D Tensor with dtype  float32 or float64.
-        - **num** (int) - Number of ticks in the interval, inclusive of `start` and `stop`.
-          Supported dtypes: int32, int64.
+        - **stop** (Tensor) - Last value of interval, 0-D Tensor with dtype float32 or float64.
+        - **num** (Union[int, Tensor]) - Number of ticks in the interval, inclusive of `start` and `stop`.
+          Must be a positive integer. When the input is Tensor, it must be a 0-D Tensor with dtype int32 or int64.
     Outputs:
         Tensor, has the same shape and dtype as `start`.

mindspore/ops/operations/nn_ops.py CHANGED Viewed

@@ -1990,6 +1990,7 @@ class MaxPoolV1(Primitive):
         self.add_prim_attr("kernel_size", kernel_size_adapted)
         self.add_prim_attr("strides", strides_adapted)
 class MaxPool3D(Primitive):
     r"""
     Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes.
@@ -3918,7 +3919,6 @@ class ResizeBilinear(PrimitiveWithInfer):
     def infer_dtype(self, input_dtype):
         validator.check_tensor_dtype_valid('input_dtype', input_dtype, [mstype.float16, mstype.float32],
                                            self.name)
-        self.add_prim_attr("dtype", input_dtype)
         return input_dtype
@@ -4009,6 +4009,7 @@ class OneHot(Primitive):
     Note:
         If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
+        On Ascend, if `on_value` is Int64 dtype, `indices` must be Int64 dtype.
     Args:
         axis (int): Position to insert the value. e.g. If shape of `indices` is :math:`(N, C)`, and `axis` is -1,
@@ -4019,12 +4020,14 @@ class OneHot(Primitive):
         - **indices** (Tensor) - A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
           Data type must be int32 or int64.
         - **depth** (int) - A scalar defining the depth of the one-hot dimension.
-        - **on_value** (Tensor) - A value to fill in output when `indices[j] = i`.
+        - **on_value** (Tensor) - A value to fill in output when `indices[j] = i`. Data type must be int32, int64,
+          float16 or float32.
         - **off_value** (Tensor) - A value to fill in output when `indices[j] != i`.
           It has the same data type as `on_value`.
     Outputs:
-        Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`.
+        Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`,
+        and it has the same data type as `on_value`.
     Raises:
         TypeError: If `axis` or `depth` is not an int.
@@ -8259,8 +8262,12 @@ class Conv3D(Primitive):
         self.add_prim_attr('data_format', self.format)
         self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
         validator.check_value_type("group", group, (int,), self.name)
+        validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name)
+        device_target = context.get_context("device_target")
         if self.out_channel % group != 0:
             raise ValueError("The argument 'group' should be divisible by 'out_channel'")
+        if device_target == "Ascend" and group != 1:
+            raise ValueError("On Ascend platform, group = 1 must be satisfied.")
         self.group = group
         self.add_prim_attr('groups', self.group)
@@ -8956,8 +8963,10 @@ class Dilation2D(Primitive):
         self.pad_mode = validator.check_string(pad_mode, ['VALID', 'SAME', 'valid', 'same'], 'pad_mode', self.name)
         self.add_prim_attr('pad_mode', self.pad_mode.upper())
         self.stride = _check_format_stride_or_dilation("stride", stride, self.name, self.data_format)
         def is_in_range(x):
             return 1 <= x <= 255
         if not is_in_range(self.stride[2]) or not is_in_range(self.stride[3]):
             raise ValueError(f'For Dilation2D, size of stride is not supported, '
                              f'stride should be in the range of [1, 255], '
@@ -11325,9 +11334,24 @@ class PromptFlashAttention(Primitive):
     S -- Sequence length
     H -- Hidden size
+    Refer to :func:mindspore.ops.prompt_flash_attention for more detail.
     .. warning::
         This is an experimental API that is subject to change or deletion.
+    Args:
+        num_heads (int): The number of heads.
+        scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
+          Muls in the calculation. Default: 1.0.
+        pre_tokens (int): Previous tokens. Default: 2147483547.
+        next_tokens (int): next tokens.  Default: 0.
+          indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
+          indicates that the data blocks in the upper triangle are not involved in the calculation
+        input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
+        num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
+          The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
+        sparse_mode (int): Default: 0
     Inputs:
         - **query** (Tensor) - The query tensor with data type of float16 or float32.
           Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
@@ -11337,28 +11361,42 @@ class PromptFlashAttention(Primitive):
           Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
         - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
           For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
-        - **padding_mask** (Tensor) - The padding mask tensor with data type of float16 or float32
         - **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int.
-        - **num_heads**  (int): The number of heads.
-        - **scale_value** (float): The scale value indicating the scale coefficient, which is used as the scalar of
-          Muls in the calculation. Default: 1.0.
-        - **pre_tokens** (int): Previous tokens. Default: 2147483547.
-        - **next_tokens** (int): next tokens.  Default: 0.
-          indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
-          indicates that the data blocks in the upper triangle are not involved in the calculation
-        - **input_layout** (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
-        - **num_key_value_heads** (int): head numbers of key/value which are used in GQA algorithm.
-          The value o indicates if the key and value have the same head nums, use numHeads.  Default: 0.
+        - **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int.
+        - **padding_mask** (Tensor) - The padding mask tensor with data type of float16 or float32
+        - **dep_scale1** (Tensor)
+        - **quant_scale1** (Tensor)
+        - **deq_scale2** (Tensor)
+        - **quant_scale2** (Tensor)
+        - **quant_offset2** (Tensor)
     Outputs:
         - **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
-    Supported Platforms:
-        ``Ascend910B``
+        Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> import mindspore.ops.operations.nn_ops as P
+        >>> from mindspore import Tensor
+        >>> import numpy as np
+        >>> B = 1
+        >>> N = 16
+        >>> S = 256
+        >>> D = 16
+        >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
+        >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
+        >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
+        >>> pfa = P.PromptFlashAttention(N, input_layout='BNSD')
+        >>> out = pfa(query, key, value, None, None, None, None, None, None, None, None, None)
+        >>> print(out[0].shape)
+        (1, 16, 256, 16)
     """
     @prim_attr_register
     def __init__(self, num_heads, scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
-                 num_key_value_heads=0):
+                 num_key_value_heads=0, sparse_mode=0):
         """Initialize PromptFlashAttention."""
         validator.check_value_type('num_heads', num_heads, [int], self.name)
         validator.check_value_type('scale_value', scale_value, [float], self.name)
@@ -11366,7 +11404,10 @@ class PromptFlashAttention(Primitive):
         validator.check_value_type('next_tokens', next_tokens, [int], self.name)
         validator.check_value_type('input_layout', input_layout, [str], self.name)
         validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
-        self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "padding_mask", "actual_seq_lengths"],
+        validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
+        self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
+                                        "actual_seq_lengths_kv", "padding_mask", "deq_scale1", "quant_scale1",
+                                        "deq_scale2", "quant_scale2", "quant_offset2"],
                                 outputs=["attention_out"])
@@ -11376,46 +11417,57 @@ class FlashAttentionScore(Primitive):
     .. warning::
         This is an experimental API that is subject to change or deletion.
     B -- Batch size
-    S -- Sequence length
-    H -- Hidden size
-    N -- Num heads
-    D -- Dim size
+    S1 -- Sequence length of query
+    S2 -- Sequence length of key and value
+    N1 -- Num heads of query
+    N2 -- Num heads of key and value, and N2 must be a factor of N1
+    D -- head size
+    H1 -- Hidden size of query, which equals to N1 * D
+    H2 -- Hidden size of key and value, which equals to N2 * D
     Args:
-        head_num (int): The number of the heads.
+        head_num (int): The head num of query.
         keep_prob (float): The keep probability of dropout. Default: 1.0.
         scale_value (float): The scale value. Default: 1.0.
         pre_tokens (int): Previous tokens. Default: 65536.
         next_tokens (int): Next tokens. Default: 65536.
         inner_precise (int): Specify the execution mode, where 0 indicates high precision mode and 1 indicates high
-        performance mode. Default: 0.
-        input_layout (str, optional): Specifies the layout of `query`, the value must be one of ["BSH", "SBH"].
-        Currently, only BSH is supported. Default: "BSH".
-    Inputs:
-        - **query** (Tensor) - The query tensor with data type of float16 or float32.
-          Input tensor of shape :math:`(B, S, H)`.
-        - **key** (Tensor) - The key tensor with data type of float16 or float32.
-          Input tensor of shape :math:`(B, S, H)`.
-        - **value** (Tensor) - The value tensor with data type of float16 or float32.
-          Input tensor of shape :math:`(B, S, H)`.
-        - **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
-          For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
-        - **drop_mask** (Tensor) - The dropout mask tensor with data type of UInt8.
-          Input tensor of shape :math:`(B, N, S, S // 8) or ()`.
-        - **real_shift** (None) - The position embedding code of float16 or float32, not implemented yet.
+        performance mode. Only support 0 currently. Default: 0.
+        input_layout (str, optional): Specifies the layout of `query`, the value must be one of ["BSH", "BNSD"].
+        Default: "BSH".
+        sparse_mode (int): Default 0.
+    Inputs:
+        - **query** (Tensor[float16, float32, bfloat16]) - The query tensor.
+          Input tensor of shape :math:`(B, S1, H1)` or `(B, N1, S1, D)`.
+        - **key** (Tensor[float16, float32, bfloat16]) - The key tensor.
+          Input tensor of shape :math:`(B, S2, H2)` or `(B, N2, S2, D)`.
+        - **value** (Tensor[float16, float32, bfloat16]) - The value tensor.
+          Input tensor of shape :math:`(B, S2, H2)` or `(B, N2, S2, D)`.
+        - **real_shift** (Tensor[float16, float32, bfloat16], None) - The position embedding code.
+          Input tensor of shape :math: `(B, N1, S1, S2)` or `(B, N1, 1, S2)`.
+        - **drop_mask** (Tensor[uint8], None) - The dropout mask tensor.
+          Input tensor of shape :math:`(B, N1, S1, S2 // 8) or None`.
         - **padding_mask** (None) - The padding mask of float16 or float32, not implemented yet.
+        - **attn_mask** (Tensor[uint8], None) - The attention mask tensor.
+          For each element, 0 indicates retention and 1 indicates discard.
+          Input tensor of shape :math:`(B, N1, S1, S2)`, `(B, 1, S1, S2)` or `(S1, S2)`.
+        - **prefix** (Tensor[int64], None) - Not implemented yet.
+          Input tensor of shape :math:`(B,)`.
     Outputs:
-        - **attention_out** (Tensor) - (B, S, H)
-        - **softmax_max** (Tensor) - (B, N, S, 16)/(B, N, S, 8) when fp16/fp32
-        - **softmax_sum** (Tensor) - (B, N, S, 16)/(B, N, S, 8) when fp16/fp32
+        - **softmax_max** (Tensor[float32]) - (B, N1, S1, 8)
+        - **softmax_sum** (Tensor[float32]) - (B, N1, S1, 8)
+        - **softmax_out** (Tensor[float32]) - Useless output, ignore it. Output tensor of shape : `()`
+        - **attention_out** (Tensor[float16, float32, bfloat16]) - The output of attention, its shape, and data type
+          are the same as the query.
     Supported Platforms:
         ``Ascend``
     """
     @prim_attr_register
     def __init__(self, head_num, keep_prob=1.0, scale_value=1.0, pre_tokens=65536, next_tokens=65536, inner_precise=0,
-                 input_layout="BSH"):
+                 input_layout="BSH", sparse_mode=0):
         """Initialize FlashAttentionScore"""
         validator.check_value_type('head_num', head_num, [int], self.name)
         validator.check_value_type('keep_prob', keep_prob, [int, float], self.name)
@@ -11425,11 +11477,103 @@ class FlashAttentionScore(Primitive):
         validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
         validator.check_value_type('next_tokens', next_tokens, [int], self.name)
         validator.check_value_type('inner_precise', inner_precise, [int], self.name)
-        if inner_precise not in [0, 1]:
-            raise ValueError(f"Attribute 'inner_precise' must be either 0 or 1, but got {inner_precise}")
+        validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
+        if inner_precise not in [0]:
+            raise ValueError(f"Attribute 'inner_precise' must be 0, but got {inner_precise}")
         validator.check_value_type('input_layout', input_layout, [str], self.name)
-        if input_layout not in ["BSH"]:
-            raise ValueError(f"Attribute 'input_layout' must be either 'bsh' or 'sbh', but got {input_layout}")
+        if input_layout not in ["BSH", "BNSD"]:
+            raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
         self.init_prim_io_names(
-            inputs=['query', 'key', 'value', 'attn_mask', 'drop_mask', 'real_shift', 'padding_mask'],
-            outputs=['attention_out', 'softmax_max', 'softmax_sum'])
+            inputs=['query', 'key', 'value', 'real_shift', 'drop_mask', 'padding_mask', 'attn_mask', 'prefix'],
+            outputs=['softmax_max', 'softmax_sum', 'softmax_out', 'attention_out'])
+class RmsNorm(Primitive):
+    r"""
+    The RmsNorm operator is a normalization operation, and its formula is:
+    .. math::
+        y=\frac{x_i}{\sqrt{\frac{1}{n}}\sum_{i=1}^{n}{ x_i^2}+\varepsilon  }\gamma_i
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    Args:
+        epsilon (float): prevent division by 0, default value is `1e-6`
+    Inputs:
+        - **input_x** (Tensor) - Input data of RmsNorm, support data type: float16, float32, bfloat16.
+        - **gamma** (Tensor) - Support data type: float16, float32, bfloat16.
+    Outputs:
+        - **y** (Tensor) - Has the same type and shape with `input_x`.
+        - **rstd** (Tensor) - Has the same type with `input_x`, used by gradient calculation.
+    Raises:
+        TypeError: If data type of `input_x` is not one of the following: float16, float32, bfloat16.
+        TypeError: If data type of `gamma` is not one of the following: float16, float32, bfloat16.
+        TypeError: If data type of "input_x" is not the same with the data type of "gamma"
+    Supported Platforms:
+        ``Ascend``
+    """
+    @prim_attr_register
+    def __init__(self, epsilon=1e-6):
+        """Initialize Dense."""
+        validator.check_value_type("epsilon", epsilon, [float], self.name)
+        self.init_prim_io_names(inputs=['x', 'gamma'], outputs=["y", "rstd"])
+class PagedAttention(Primitive):
+    r"""
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    """
+    @prim_attr_register
+    def __init__(self, head_num, scale_value=1.0, kv_head_num=0):
+        """Initialize PagedAttention"""
+        validator.check_value_type('head_num', head_num, [int], self.name)
+        validator.check_value_type('scale_value', scale_value, [float], self.name) # scale after qkbmm
+        validator.check_value_type('kv_head_num', kv_head_num, [int], self.name) # for MQA
+        self.init_prim_io_names(
+            inputs=['query', 'key_cache', 'value_cache', 'block_tables', 'context_lens'],
+            outputs=['attention_out'])
+class PagedAttentionMask(Primitive):
+    r"""
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    """
+    @prim_attr_register
+    def __init__(self, head_num, scale_value=1.0, kv_head_num=0):
+        """Initialize PagedAttentionMask"""
+        validator.check_value_type('head_num', head_num, [int], self.name)
+        validator.check_value_type('scale_value', scale_value, [float], self.name) # scale after qkbmm
+        validator.check_value_type('kv_head_num', kv_head_num, [int], self.name) # for MQA
+        self.init_prim_io_names(
+            inputs=['query', 'key_cache', 'value_cache', 'block_tables', 'context_lens', 'alibi_mask'],
+            outputs=['attention_out'])
+class ReshapeAndCache(Primitive):
+    r"""
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    """
+    __mindspore_signature__ = (
+        sig.make_sig('key', dtype=sig.sig_dtype.T),
+        sig.make_sig('value', dtype=sig.sig_dtype.T),
+        sig.make_sig('key_cache', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('value_cache', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('slot_mapping', dtype=sig.sig_dtype.T1),
+    )
+    @prim_attr_register
+    def __init__(self):
+        """Initialize ReshapeAndCache"""
+        self.init_prim_io_names(
+            inputs=['key', 'value', 'key_cache', 'value_cache', 'slot_mapping'],
+            outputs=['key_out'])
+        self.add_prim_attr('side_effect_mem', True)

mindspore/parallel/_parallel_serialization.py CHANGED Viewed

@@ -335,7 +335,8 @@ def _rank_list_for_transform_parallel_checkpoint(rank_id, src_strategy_list, dst
     return list(result_list)
-def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, src_strategy_list, dst_strategy_list):
+def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, src_strategy_list,
+                                   dst_strategy_list, param_type_dict):
     """
     Transform model parallel dimension for distributed checkpoint files.
     """
@@ -397,15 +398,21 @@ def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, s
         transform_tensor = ms.Tensor(param_total_dict[param_name][rank_id % device_num])
         requires_grad = param_attr_dict[param_name][rank_id % device_num][0]
         layerwise_parallel = param_attr_dict[param_name][rank_id % device_num][1]
-        transform_param_dict[param_name] = ms.Parameter(transform_tensor, param_name, requires_grad, layerwise_parallel)
+        transform_para = ms.Parameter(transform_tensor, param_name, requires_grad, layerwise_parallel)
+        if param_type_dict[param_name][rank_id % device_num] == "BFloat16":
+            transform_para.set_dtype(ms.bfloat16)
+        transform_param_dict[param_name] = transform_para
     # Handle those parameter like learning_rate, global_step which not in strategy_file.
     for param_name, _ in param_total_dict.items():
         if param_name not in transform_param_dict:
-            transform_param_dict[param_name] = ms.Parameter(
+            transform_para = ms.Parameter(
                 ms.Tensor(param_total_dict[param_name][rank_id % device_num]), param_name,
                 param_attr_dict[param_name][rank_id % device_num][0],
                 param_attr_dict[param_name][rank_id % device_num][1])
+            if param_type_dict[param_name][rank_id % device_num] == "BFloat16":
+                transform_para.set_dtype(ms.bfloat16)
+            transform_param_dict[param_name] = transform_para
     transform_param_list = [{"name": param_name, "data": param_data}
                             for param_name, param_data in transform_param_dict.items()]

mindspore/parallel/_tensor.py CHANGED Viewed

@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import absolute_import
 import numpy as np
+from mindspore.common import dtype as mstype
 from mindspore.common.tensor import Tensor
 from mindspore.communication.management import get_rank, get_group_size
 from mindspore._c_expression import TensorTransform
@@ -221,6 +222,8 @@ def _load_tensor(tensor, dev_mat, tensor_map, rank_id=-1):
         rank = rank_id
     tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
     tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
+    if tensor.dtype == mstype.bfloat16:
+        tensor = tensor.float()
     np_tensor = tensor.asnumpy()
     np_tensor_list = _chunk_tensor_by_strategy(np_tensor, tensor_strategy)
     np_tensor_slice = np_tensor_list[int(tensor_slice_index)]
@@ -260,7 +263,7 @@ def _load_tensor_by_layout(tensor, layout, rank_id):
         rank = get_rank(group)
         size = get_group_size(group)
         tensor_slice = np.split(tensor_slice, size)[rank]
-    return Tensor(tensor_slice)
+    return Tensor(tensor_slice, tensor.dtype)
 def _reshape_param_data(param_data, dev_mat, tensor_map):

mindspore/parallel/checkpoint_transform.py CHANGED Viewed

@@ -21,6 +21,7 @@ import copy
 from collections import defaultdict
 import numpy as np
 import mindspore as ms
+from mindspore.common import dtype as mstype
 from mindspore.parallel._parallel_serialization import _rank_list_for_transform_parallel_checkpoint, \
     _transform_parallel_checkpoint, _get_device_num_from_strategy, _make_dir, \
     _extract_layout_map, _extract_src_dst_layout_map, _parameter_not_in_local_stage, _extract_pipeline_stage_num, \
@@ -192,6 +193,7 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
             raise ValueError("Checkpoint file {} in rank {} not exits: ".format(local_file, rank))
     param_total_dict = defaultdict(dict)
     param_attr_dict = defaultdict(dict)
+    param_type_dict = defaultdict(dict)
     src_strategy_list, dst_strategy_list = _extract_src_dst_layout_map(rank_id, src_strategy_file, dst_strategy_file)
     # src rank => local rank inside pipeline stage
     src_stage_device_num = np.prod(src_strategy_list.get(list(src_strategy_list.keys())[0])[0]) if src_strategy_list \
@@ -208,11 +210,15 @@ def transform_checkpoint_by_rank(rank_id, checkpoint_files_map, save_checkpoint_
                     and _parameter_not_in_local_stage(param_name, origin_dst_strategy_list, dst_strategy_list):
                 continue
             src_rank = rank % src_stage_device_num
+            param_type_dict[param_name][src_rank] = str(param.data.dtype)
+            if param.data.dtype == mstype.bfloat16:
+                param.set_dtype(mstype.float32)
             param_total_dict[param_name][src_rank] = param.data.asnumpy()
             param_attr_dict[param_name][src_rank] = (param.requires_grad, param.layerwise_parallel)
     local_rank_id = rank_id % dst_stage_device_num
     transform_param_list = _transform_parallel_checkpoint(local_rank_id, param_total_dict,
-                                                          param_attr_dict, src_strategy_list, dst_strategy_list)
+                                                          param_attr_dict, src_strategy_list, dst_strategy_list,
+                                                          param_type_dict)
     ms.save_checkpoint(transform_param_list, save_checkpoint_file_name)
@@ -297,11 +303,15 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
     for needed_rank_list_key, transform_rank_list in needed_rank_list_map.items():
         param_total_dict = defaultdict(dict)
         param_attr_dict = defaultdict(dict)
+        param_type_dict = defaultdict(dict)
         needed_rank_list = needed_rank_list_key.split("-")
         for needed_rank in needed_rank_list:
             ckpt_dict = ms.load_checkpoint(all_checkpoint_files_map.get(int(needed_rank)))
             for param_name, param in ckpt_dict.items():
                 src_rank = int(needed_rank) % src_stage_device_num
+                param_type_dict[param_name][src_rank] = str(param.data.dtype)
+                if param.data.dtype == mstype.bfloat16:
+                    param.set_dtype(mstype.float32)
                 param_total_dict[param_name][src_rank] = param.data.asnumpy()
                 param_attr_dict[param_name][src_rank] = (param.requires_grad, param.layerwise_parallel)
         for transform_rank in transform_rank_list:
@@ -316,7 +326,8 @@ def transform_checkpoints(src_checkpoints_dir, dst_checkpoints_dir, ckpt_prefix,
             local_rank_id = transform_rank % dst_stage_device_num
             transform_param_list = _transform_parallel_checkpoint(local_rank_id, param_total_dict_copy,
-                                                                  param_attr_dict, src_strategy_list, dst_strategy_list)
+                                                                  param_attr_dict, src_strategy_list, dst_strategy_list,
+                                                                  param_type_dict)
             save_checkpoint_file = "{}{}.ckpt".format(ckpt_prefix, transform_rank)
             save_checkpoint_file_dir = os.path.join(dst_checkpoints_dir, "rank_{}".format(transform_rank))
             if not os.path.exists(save_checkpoint_file_dir):

mindspore/parallel/shard.py CHANGED Viewed

@@ -36,13 +36,17 @@ class Shard(Shard_):
     def __call__(self, fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
         if ms.context.get_context("mode") != ms.context.PYNATIVE_MODE or \
                 ms.context.get_auto_parallel_context("parallel_mode") not in ["auto_parallel"]:
-            raise AssertionError(f"Cell shard only supports auto parallel under PyNative mode.")
+            raise AssertionError(
+                f"Cell shard only supports auto parallel under PyNative mode.")
         if ms.context.get_context("device_target") not in ["Ascend", "GPU"]:
-            raise AssertionError(f"'Shard' now only supports 'Ascend' and 'GPU'")
+            raise AssertionError(
+                f"'Shard' now only supports 'Ascend' and 'GPU'")
         if ms.context.get_auto_parallel_context("search_mode") != "sharding_propagation":
-            raise AssertionError(f"'search_mode' must be 'sharding_propagation' for 'Shard'")
+            raise AssertionError(
+                f"'search_mode' must be 'sharding_propagation' for 'Shard'")
         if not isinstance(in_strategy, tuple):
-            raise TypeError(f"For 'Shard', the 'in_strategy' should be a tuple, but got {type(in_strategy).__name__}")
+            raise TypeError(
+                f"For 'Shard', the 'in_strategy' should be a tuple, but got {type(in_strategy).__name__}")
         if not isinstance(out_strategy, (type(None), tuple)):
             raise TypeError(f"For 'Shard', the 'out_strategy' should be None or tuple, "
                             f"but got {type(out_strategy).__name__}")
@@ -117,7 +121,8 @@ class Shard(Shard_):
             return
         if isinstance(parameter_plan, dict):
             if not isinstance(fn, ms.nn.Cell):
-                raise TypeError(f"If parameter_plan is set, type of fn must be mindspore.nn.Cell, but got {type(fn)}")
+                raise TypeError(
+                    f"If parameter_plan is set, type of fn must be mindspore.nn.Cell, but got {type(fn)}")
             for k in parameter_plan.keys():
                 v = parameter_plan[k]
                 if not isinstance(k, str) or not isinstance(v, tuple):
@@ -131,10 +136,12 @@ class Shard(Shard_):
             param_strategy = parameter_plan[param_name]
             param = self._search_parameter_by_name(param_name, fn)
             if param is None:
-                logger.warning(f"{param_name} is not exist, ignored its setting.")
+                logger.warning(
+                    f"{param_name} is not exist, ignored its setting.")
                 continue
-            self._check_layout_is_valid(param_name, param.shape, param_strategy)
+            self._check_layout_is_valid(
+                param_name, param.shape, param_strategy)
             if param.param_info.param_strategy:
                 logger.warning(f"The layout of parameter '{param_name}' "
                                f"has been set to {param.param_info.param_strategy}, "
@@ -143,7 +150,7 @@ class Shard(Shard_):
     def _is_attrs_has_been_set(self, fn, in_strategy, out_strategy, device, level):
         return self.shard_fn is not None and self.fn == fn and self.in_strategy == in_strategy and \
-               self.out_strategy == out_strategy and self.device == device and self.level == level
+            self.out_strategy == out_strategy and self.device == device and self.level == level
 def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascend", level=0):
@@ -216,8 +223,8 @@ def shard(fn, in_strategy, out_strategy=None, parameter_plan=None, device="Ascen
         ...                           device_num=2)
         >>> def test_shard(x, y):
         ...     return x + y
-        >>> x = Tensor(np.ones(shape=(32, 10)))
-        >>> y = Tensor(np.ones(shape=(32, 10)))
+        >>> x = Tensor(np.ones(shape=(32, 10)), dtype=ms.float32)
+        >>> y = Tensor(np.ones(shape=(32, 10)), dtype=ms.float32)
         >>> output = ms.shard(test_shard, in_strategy=((2, 1), (2, 1)))(x, y)
         >>> print(output.shape)
         (32, 10)

mindspore/profiler/common/util.py CHANGED Viewed

@@ -25,6 +25,7 @@ import stat
 from mindspore import log as logger
 def to_int(param, param_name):
     """
     Transfer param to int type.