PyPI - mindspore - Versions diffs - 2.2.10__cp37-cp37m-manylinux1_x86_64.whl → 2.2.14__cp37-cp37m-manylinux1_x86_64.whl - Mend

mindspore 2.2.10__cp37-cp37m-manylinux1_x86_64.whl → 2.2.14__cp37-cp37m-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (162) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +2 -1
mindspore/_akg/akg/composite/build_module.py +95 -5
mindspore/_akg/akg/topi/cpp/impl.py +1 -1
mindspore/_akg/akg/tvm/_ffi/base.py +1 -1
mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
mindspore/_akg/akg/utils/util.py +18 -1
mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/_c_mindrecord.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/_extends/parse/__init__.py +3 -2
mindspore/_extends/parse/parser.py +6 -1
mindspore/_extends/parse/standard_method.py +12 -2
mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/common/_utils.py +16 -0
mindspore/common/tensor.py +0 -2
mindspore/communication/management.py +3 -0
mindspore/context.py +34 -4
mindspore/dataset/engine/cache_client.py +8 -5
mindspore/dataset/engine/datasets.py +23 -0
mindspore/dataset/engine/validators.py +1 -1
mindspore/dataset/vision/py_transforms_util.py +2 -2
mindspore/experimental/optim/lr_scheduler.py +5 -6
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +118 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
mindspore/mindrecord/tools/cifar100_to_mr.py +49 -57
mindspore/mindrecord/tools/cifar10_to_mr.py +46 -55
mindspore/mindrecord/tools/csv_to_mr.py +3 -8
mindspore/mindrecord/tools/mnist_to_mr.py +4 -9
mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -4
mindspore/nn/layer/activation.py +1 -1
mindspore/nn/layer/embedding.py +2 -2
mindspore/nn/layer/flash_attention.py +48 -135
mindspore/nn/loss/loss.py +1 -1
mindspore/nn/optim/ada_grad.py +2 -2
mindspore/nn/optim/sgd.py +3 -2
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +6 -3
mindspore/numpy/math_ops.py +1 -1
mindspore/ops/__init__.py +3 -0
mindspore/ops/_grad_experimental/grad_array_ops.py +0 -31
mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
mindspore/ops/_grad_experimental/grad_math_ops.py +37 -17
mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/function/array_func.py +6 -5
mindspore/ops/function/debug_func.py +1 -1
mindspore/ops/function/linalg_func.py +21 -11
mindspore/ops/function/math_func.py +3 -0
mindspore/ops/function/nn_func.py +13 -11
mindspore/ops/function/parameter_func.py +2 -0
mindspore/ops/function/sparse_unary_func.py +2 -2
mindspore/ops/function/vmap_func.py +1 -0
mindspore/ops/operations/__init__.py +5 -2
mindspore/ops/operations/_embedding_cache_ops.py +1 -1
mindspore/ops/operations/_grad_ops.py +3 -4
mindspore/ops/operations/_inner_ops.py +56 -1
mindspore/ops/operations/_quant_ops.py +4 -4
mindspore/ops/operations/_rl_inner_ops.py +1 -1
mindspore/ops/operations/array_ops.py +15 -4
mindspore/ops/operations/custom_ops.py +1 -1
mindspore/ops/operations/debug_ops.py +1 -1
mindspore/ops/operations/image_ops.py +3 -3
mindspore/ops/operations/inner_ops.py +49 -0
mindspore/ops/operations/math_ops.py +65 -3
mindspore/ops/operations/nn_ops.py +95 -28
mindspore/ops/operations/random_ops.py +2 -0
mindspore/ops/operations/sparse_ops.py +4 -4
mindspore/ops/silent_check.py +162 -0
mindspore/parallel/__init__.py +3 -2
mindspore/parallel/_auto_parallel_context.py +82 -3
mindspore/parallel/_parallel_serialization.py +34 -2
mindspore/parallel/_tensor.py +3 -1
mindspore/parallel/_transformer/transformer.py +8 -8
mindspore/parallel/checkpoint_transform.py +191 -45
mindspore/profiler/parser/ascend_cluster_generator.py +111 -0
mindspore/profiler/parser/ascend_communicate_generator.py +315 -0
mindspore/profiler/parser/ascend_flops_generator.py +8 -2
mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
mindspore/profiler/parser/ascend_hccl_generator.py +2 -2
mindspore/profiler/parser/ascend_msprof_exporter.py +30 -6
mindspore/profiler/parser/ascend_msprof_generator.py +16 -5
mindspore/profiler/parser/ascend_op_generator.py +15 -7
mindspore/profiler/parser/ascend_timeline_generator.py +5 -2
mindspore/profiler/parser/base_timeline_generator.py +11 -3
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
mindspore/profiler/parser/framework_parser.py +8 -2
mindspore/profiler/parser/memory_usage_parser.py +8 -2
mindspore/profiler/parser/minddata_analyzer.py +8 -2
mindspore/profiler/parser/minddata_parser.py +1 -1
mindspore/profiler/parser/msadvisor_analyzer.py +4 -2
mindspore/profiler/parser/msadvisor_parser.py +9 -3
mindspore/profiler/profiling.py +97 -25
mindspore/rewrite/api/node.py +1 -1
mindspore/rewrite/api/symbol_tree.py +2 -2
mindspore/rewrite/parsers/for_parser.py +6 -6
mindspore/rewrite/parsers/module_parser.py +4 -4
mindspore/scipy/ops.py +55 -5
mindspore/scipy/optimize/__init__.py +3 -2
mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
mindspore/train/callback/_checkpoint.py +8 -8
mindspore/train/callback/_landscape.py +2 -3
mindspore/train/callback/_summary_collector.py +6 -7
mindspore/train/dataset_helper.py +6 -0
mindspore/train/model.py +17 -5
mindspore/train/serialization.py +6 -1
mindspore/train/summary/_writer_pool.py +1 -1
mindspore/train/summary/summary_record.py +5 -6
mindspore/version.py +1 -1
{mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/METADATA +3 -2
{mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/RECORD +150 -158
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
{mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/WHEEL +0 -0
{mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/entry_points.txt +0 -0
{mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/top_level.txt +0 -0

mindspore/ops/operations/math_ops.py CHANGED Viewed

@@ -123,6 +123,64 @@ class _MathBinaryOp(_BinaryOp):
         real_shape = [dim if cmp_dim > 0 else cmp_dim for dim, cmp_dim in zip(shape_value, cmp_shape)]
         return tuple(real_shape)
+class SilentCheck(Primitive):
+    """
+    Implement SilentCheck on `pre_val`, `min_val`, `max_val`, `result` and
+    update them inplace with given parameters.
+    Args:
+        c_min_steps (int): an int determines...
+        c_thresh_l1 (float): a float determines...
+        c_coeff_l1 (float): a float determines...
+        c_thresh_l2 (float): a float determines...
+        c_coeff_l2 (float): a float determines...
+    Inputs:
+        - **val** (Tensor) - Tensor with dtype float32.
+        - **input_grad** (Parameter) - Tensor with dtype float32.
+        - **pre_val** (Parameter) - Input Parameter with dtype float32.
+        - **min_val** (Parameter) - Input Parameter with dtype float32.
+        - **max_val** (Parameter) - Input Parameter with dtype float32.
+        - **val_counter** (Parameter) - Input Parameter with dtype int32.
+    Outputs:
+        Tuple of 5 Tensors, the updated parameters.
+        - **input_grad** (Tensor) - Tensor with dtype float32.
+        - **pre_val** (Tensor) - Tensor with dtype float32.
+        - **min_val** (Tensor) - Tensor with dtype float32.
+        - **max_val** (Tensor) - Tensor with dtype float32.
+        - **result** (Tensor) - Tensor with dtype int32.
+    Raises:
+        TypeError: If `val` is not Tensor with dtype float32.
+        TypeError: If `result` is not Tensor with dtype int32.
+        TypeError: If `pre_val`, `min_val`, `max_val`, `input_grad` are not all Parameter type with dtype float32.
+        TypeError: If `c_thresh_l1` or `c_coeff_l1` is not a float number.
+        TypeError: If `c_min_steps` is not an int number.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore.ops.operations.math_ops import SilentCheck
+        >>> silent_check = SilentCheck()
+        xxx
+    """
+    @prim_attr_register
+    def __init__(self, c_min_steps, c_thresh_l1, c_coeff_l1, c_thresh_l2, c_coeff_l2):
+        """Initialize SilentCheck."""
+        validator.check_value_type("c_min_steps", c_min_steps, [int], self.name)
+        validator.check_value_type("c_thresh_l1", c_thresh_l1, [float], self.name)
+        validator.check_value_type("c_coeff_l1", c_coeff_l1, [float], self.name)
+        validator.check_value_type("c_thresh_l2", c_thresh_l2, [float], self.name)
+        validator.check_value_type("c_coeff_l2", c_coeff_l2, [float], self.name)
+        self.add_prim_attr('side_effect_mem', True)
 class _BitwiseBinaryOp(_MathBinaryOp):
     """
@@ -462,6 +520,7 @@ class AssignAdd(Primitive):
         >>> import mindspore
         >>> import numpy as np
         >>> from mindspore import Tensor, ops, nn
+        >>> from mindspore.common.initializer import initializer
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
@@ -512,6 +571,7 @@ class AssignSub(Primitive):
         >>> import mindspore
         >>> import numpy as np
         >>> from mindspore import Tensor, ops, nn
+        >>> from mindspore.common.initializer import initializer
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
@@ -6569,9 +6629,9 @@ class LinSpace(Primitive):
     Inputs:
         - **start** (Tensor) -  Start value of interval, 0-D Tensor with dtype float32 or float64.
-        - **stop** (Tensor) - Last value of interval, 0-D Tensor with dtype  float32 or float64.
-        - **num** (int) - Number of ticks in the interval, inclusive of `start` and `stop`.
-          Supported dtypes: int32, int64.
+        - **stop** (Tensor) - Last value of interval, 0-D Tensor with dtype float32 or float64.
+        - **num** (Union[int, Tensor]) - Number of ticks in the interval, inclusive of `start` and `stop`.
+          Must be a positive integer. When the input is Tensor, it must be a 0-D Tensor with dtype int32 or int64.
     Outputs:
         Tensor, has the same shape and dtype as `start`.
@@ -7253,6 +7313,7 @@ class Igamma(Primitive):
     Examples:
         >>> import numpy as np
+        >>> import mindspore
         >>> from mindspore import Tensor, ops
         >>> a = Tensor(np.array([2.0, 4.0, 6.0, 8.0]).astype(np.float32))
         >>> x = Tensor(np.array([2.0, 3.0, 4.0, 5.0]).astype(np.float32))
@@ -7291,6 +7352,7 @@ class Igammac(Primitive):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import mindspore
         >>> import numpy as np
         >>> from mindspore import Tensor, ops
         >>> a = Tensor(np.array([2.0, 4.0, 6.0, 8.0]).astype(np.float32))

mindspore/ops/operations/nn_ops.py CHANGED Viewed

@@ -3777,7 +3777,7 @@ class LayerNorm(Primitive):
         - **output_x** (Tensor) - The normalized input, has the same type and shape as the `input_x`.
         - **mean** (Tensor) - The first `begin_norm_axis` dimensions of `mean` shape is the same as `input_x`,
           and the remaining dimensions are 1. Suppose the shape of the `input_x` is :math:`(x_1, x_2, \ldots, x_R)`,
-          the shape of the `mean` is :math:`(x_1, \ldots, x_{begin_params_axis}, 1, \ldots, 1)`
+          the shape of the `mean` is :math:`(x_1, \ldots, x_{begin\_params\_axis}, 1, \ldots, 1)`
           (when `begin_params_axis=0`, the shape of `mean` is :math:`(1, \ldots, 1)` ).
         - **variance** (Tensor) - Shape is the same as `mean` .
@@ -4917,6 +4917,7 @@ class Adam(Primitive):
         >>> import mindspore
         >>> import numpy as np
         >>> from mindspore import Tensor, nn, ops
+        >>> from mindspore import Parameter
         >>> class Net(nn.Cell):
         ...     def __init__(self):
         ...         super(Net, self).__init__()
@@ -9991,6 +9992,9 @@ class FractionalMaxPool3DWithFixedKsize(Primitive):
         ``Ascend`` ``GPU`` ``CPU``
     Examples:
+        >>> import numpy as np
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> x = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16])
         ...       .reshape([1, 1, 2, 2, 4]), mstype.float32)
         >>> random_samples = Tensor(np.array([0.7, 0.7, 0.7]).reshape([1, 1, 3]), mstype.float32)
@@ -11363,7 +11367,7 @@ class PromptFlashAttention(Primitive):
           For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
         - **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int.
         - **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int.
-        - **padding_mask** (Tensor) - The padding mask tensor with data type of float16 or float32
+        - **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
         - **dep_scale1** (Tensor)
         - **quant_scale1** (Tensor)
         - **deq_scale2** (Tensor)
@@ -11406,7 +11410,7 @@ class PromptFlashAttention(Primitive):
         validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
         validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
         self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
-                                        "actual_seq_lengths_kv", "padding_mask", "deq_scale1", "quant_scale1",
+                                        "actual_seq_lengths_kv", "pse_shift", "deq_scale1", "quant_scale1",
                                         "deq_scale2", "quant_scale2", "quant_offset2"],
                                 outputs=["attention_out"])
@@ -11417,41 +11421,50 @@ class FlashAttentionScore(Primitive):
     .. warning::
         This is an experimental API that is subject to change or deletion.
     B -- Batch size
-    S -- Sequence length
-    H -- Hidden size
-    N -- Num heads
-    D -- Dim size
+    S1 -- Sequence length of query
+    S2 -- Sequence length of key and value
+    N1 -- Num heads of query
+    N2 -- Num heads of key and value, and N2 must be a factor of N1
+    D -- head size
+    H1 -- Hidden size of query, which equals to N1 * D
+    H2 -- Hidden size of key and value, which equals to N2 * D
     Args:
-        head_num (int): The number of the heads.
+        head_num (int): The head num of query.
         keep_prob (float): The keep probability of dropout. Default: 1.0.
         scale_value (float): The scale value. Default: 1.0.
         pre_tokens (int): Previous tokens. Default: 65536.
         next_tokens (int): Next tokens. Default: 65536.
         inner_precise (int): Specify the execution mode, where 0 indicates high precision mode and 1 indicates high
-        performance mode. Default: 0.
+        performance mode. Only support 0 currently. Default: 0.
         input_layout (str, optional): Specifies the layout of `query`, the value must be one of ["BSH", "BNSD"].
         Default: "BSH".
         sparse_mode (int): Default 0.
     Inputs:
-        - **query** (Tensor) - The query tensor with data type must be in [float16, float32, bfloat16].
-          Input tensor of shape :math:`(B, S, H)`.
-        - **key** (Tensor) - The key tensor with data must be in [float16, float32, bfloat16].
-          Input tensor of shape :math:`(B, S, H)`.
-        - **value** (Tensor) - The value tensor with data must be in [float16, float32, bfloat16].
-          Input tensor of shape :math:`(B, S, H)`.
-        - **attn_mask** (Tensor) - The attention mask tensor with data type of uint8.
-          For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
-        - **drop_mask** (Tensor) - The dropout mask tensor with data type of UInt8.
-          Input tensor of shape :math:`(B, N, S, S // 8) or ()`.
-        - **real_shift** (None) - The position embedding code of float16 or float32, not implemented yet.
+        - **query** (Tensor[float16, float32, bfloat16]) - The query tensor.
+          Input tensor of shape :math:`(B, S1, H1)` or `(B, N1, S1, D)`.
+        - **key** (Tensor[float16, float32, bfloat16]) - The key tensor.
+          Input tensor of shape :math:`(B, S2, H2)` or `(B, N2, S2, D)`.
+        - **value** (Tensor[float16, float32, bfloat16]) - The value tensor.
+          Input tensor of shape :math:`(B, S2, H2)` or `(B, N2, S2, D)`.
+        - **real_shift** (Tensor[float16, float32, bfloat16], None) - The position embedding code.
+          Input tensor of shape :math: `(B, N1, S1, S2)` or `(B, N1, 1, S2)`.
+        - **drop_mask** (Tensor[uint8], None) - The dropout mask tensor.
+          Input tensor of shape :math:`(B, N1, S1, S2 // 8) or None`.
         - **padding_mask** (None) - The padding mask of float16 or float32, not implemented yet.
-        - **prefix** (None) - Not implemented yet.
+        - **attn_mask** (Tensor[uint8], None) - The attention mask tensor.
+          For each element, 0 indicates retention and 1 indicates discard.
+          Input tensor of shape :math:`(B, N1, S1, S2)`, `(B, 1, S1, S2)` or `(S1, S2)`.
+        - **prefix** (Tensor[int64], None) - Not implemented yet.
+          Input tensor of shape :math:`(B,)`.
     Outputs:
-        - **attention_out** (Tensor) - (B, S, H)
-        - **softmax_max** (Tensor) - (B, N, S, 16)/(B, N, S, 8) when fp16/fp32
-        - **softmax_sum** (Tensor) - (B, N, S, 16)/(B, N, S, 8) when fp16/fp32
+        - **softmax_max** (Tensor[float32]) - (B, N1, S1, 8)
+        - **softmax_sum** (Tensor[float32]) - (B, N1, S1, 8)
+        - **softmax_out** (Tensor[float32]) - Useless output, ignore it. Output tensor of shape : `()`
+        - **attention_out** (Tensor[float16, float32, bfloat16]) - The output of attention, its shape, and data type
+          are the same as the query.
     Supported Platforms:
         ``Ascend``
     """
@@ -11469,14 +11482,14 @@ class FlashAttentionScore(Primitive):
         validator.check_value_type('next_tokens', next_tokens, [int], self.name)
         validator.check_value_type('inner_precise', inner_precise, [int], self.name)
         validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
-        if inner_precise not in [0, 1]:
-            raise ValueError(f"Attribute 'inner_precise' must be either 0 or 1, but got {inner_precise}")
+        if inner_precise not in [0]:
+            raise ValueError(f"Attribute 'inner_precise' must be 0, but got {inner_precise}")
         validator.check_value_type('input_layout', input_layout, [str], self.name)
         if input_layout not in ["BSH", "BNSD"]:
             raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
         self.init_prim_io_names(
-            inputs=['query', 'key', 'value', 'attn_mask', 'drop_mask', 'real_shift', 'padding_mask', 'prefix'],
-            outputs=['attention_out', 'softmax_max', 'softmax_sum'])
+            inputs=['query', 'key', 'value', 'real_shift', 'drop_mask', 'padding_mask', 'attn_mask', 'prefix'],
+            outputs=['softmax_max', 'softmax_sum', 'softmax_out', 'attention_out'])
 class RmsNorm(Primitive):
@@ -11514,3 +11527,57 @@ class RmsNorm(Primitive):
         """Initialize Dense."""
         validator.check_value_type("epsilon", epsilon, [float], self.name)
         self.init_prim_io_names(inputs=['x', 'gamma'], outputs=["y", "rstd"])
+class PagedAttention(Primitive):
+    r"""
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    """
+    @prim_attr_register
+    def __init__(self, head_num, scale_value=1.0, kv_head_num=0):
+        """Initialize PagedAttention"""
+        validator.check_value_type('head_num', head_num, [int], self.name)
+        validator.check_value_type('scale_value', scale_value, [float], self.name) # scale after qkbmm
+        validator.check_value_type('kv_head_num', kv_head_num, [int], self.name) # for MQA
+        self.init_prim_io_names(
+            inputs=['query', 'key_cache', 'value_cache', 'block_tables', 'context_lens'],
+            outputs=['attention_out'])
+class PagedAttentionMask(Primitive):
+    r"""
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    """
+    @prim_attr_register
+    def __init__(self, head_num, scale_value=1.0, kv_head_num=0):
+        """Initialize PagedAttentionMask"""
+        validator.check_value_type('head_num', head_num, [int], self.name)
+        validator.check_value_type('scale_value', scale_value, [float], self.name) # scale after qkbmm
+        validator.check_value_type('kv_head_num', kv_head_num, [int], self.name) # for MQA
+        self.init_prim_io_names(
+            inputs=['query', 'key_cache', 'value_cache', 'block_tables', 'context_lens', 'alibi_mask'],
+            outputs=['attention_out'])
+class ReshapeAndCache(Primitive):
+    r"""
+    .. warning::
+        This is an experimental API that is subject to change or deletion.
+    """
+    __mindspore_signature__ = (
+        sig.make_sig('key', dtype=sig.sig_dtype.T),
+        sig.make_sig('value', dtype=sig.sig_dtype.T),
+        sig.make_sig('key_cache', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('value_cache', sig.sig_rw.RW_WRITE, dtype=sig.sig_dtype.T),
+        sig.make_sig('slot_mapping', dtype=sig.sig_dtype.T1),
+    )
+    @prim_attr_register
+    def __init__(self):
+        """Initialize ReshapeAndCache"""
+        self.init_prim_io_names(
+            inputs=['key', 'value', 'key_cache', 'value_cache', 'slot_mapping'],
+            outputs=['key_out'])
+        self.add_prim_attr('side_effect_mem', True)

mindspore/ops/operations/random_ops.py CHANGED Viewed

@@ -1271,6 +1271,8 @@ class RandpermV2(Primitive):
         ``Ascend`` ``CPU``
     Examples:
+        >>> from mindspore import Tensor, ops
+        >>> from mindspore import dtype as mstype
         >>> n = Tensor([4], mstype.int64)
         >>> seed = 0
         >>> offset = 0

mindspore/ops/operations/sparse_ops.py CHANGED Viewed

@@ -479,8 +479,8 @@ class SparseToDenseV2(Primitive):
         Tensor, converted from sparse tensor. The dtype is same as `values`, and the shape is `output_shape`.
     Raises:
-        TypeError: If the dtype of `indices` is neither Int32 nor Int64.
-        TypeError: If the dtype of `outputshape` is neither Int32 nor Int64.
+        TypeError: If the dtype of `indices` is neither int32 nor int64.
+        TypeError: If the dtype of `outputshape` is neither int32 nor int64.
         ValueError: If the shape of `output_shape`, shape of `indices`,
             shape of `default_value` and shape of `values` don't meet the parameter description.
         ValueError: If each Element of `output_shape` is not > 0.
@@ -2382,8 +2382,8 @@ class SparseCountSparseOutput(Primitive):
     Args:
         binary_output (bool) - If ``False`` , output the number of occurrences of each value,
                                if ``True`` output 1 for orresponding values. Default: ``False`` .
-        minlength(Scalar) - Int type minimum value to count, Default: ``-1`` .
-        maxlength(Scalar) - Int type maximum value to count, Default: ``-1`` .
+        minlength(Scalar) - int type minimum value to count, Default: ``-1`` .
+        maxlength(Scalar) - int type maximum value to count, Default: ``-1`` .
     Inputs:
         - **indices** (Tensor) - Tensor representing the position of the element in the sparse

mindspore/ops/silent_check.py ADDED Viewed

@@ -0,0 +1,162 @@
+# Copyright 2024 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""Silent Check."""
+import os
+from mindspore.common.tensor import Tensor
+from mindspore.common.parameter import Parameter
+import mindspore.common.dtype as mstype
+from . import operations
+from .operations._inner_ops import _MirrorSilentCheck
+from .operations import RmsNorm as OriginRmsNorm
+from .operations import LayerNorm as OriginLayerNorm
+from .primitive import Primitive
+NPU_ASD_ENABLE = 'NPU_ASD_ENABLE'
+class ASDBase:
+    """
+    ASDBase is the base class of operator with accuracy-sensitive detection feature in python.
+    Args:
+        cls (Primitive): Original operator requiring accuracy-sensitive detection feature.
+        args (tuple):  A variable parameter tuple to the original operator.
+        kwargs (dict): A variable parameter dictionary passed the original operator.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore.ops.silent_check import ASDBase
+        >>> from mindspore.ops import LayerNorm as OriginLayerNorm
+        >>> class LayerNormASD(ASDBase):
+        ...     def __init__(self, *args, **kwargs):
+        ...         super().__init__(OriginLayerNorm, *args, **kwargs)
+        ...         # init parameters for accuracy-sensitive detection by calling the base class method generate_params()
+        ...         self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
+        ...
+        ...     def __call__(self, input_x, gamma, beta):
+        ...         if self.enable_check:
+        ...             # execute accuracy-sensitive detection by calling the check_op of base class
+        ...             input_x = self.check_op(
+        ...                 input_x, self.pre_val, self.min_val, self.max_val, self.cnt, None)
+        ...             self.cnt += 1
+        ...         # return the result of original operator
+        ...         return self.op(input_x, gamma, beta)
+    """
+    _index = 0
+    __ms_class__ = True
+    def __init__(self, cls, *args, **kwargs):
+        self.op = cls(*args, **kwargs)
+        self.check_op = _MirrorSilentCheck()
+        self._suffix = "ASD_" + cls.__name__
+        primitive_attr = dir(Primitive)
+        self._op_attr_dict = {
+            name for name in primitive_attr if not name.startswith("_")}
+        self.enable_check = os.environ.get(NPU_ASD_ENABLE) == "1"
+    def __getattr__(self, name):
+        def method_wrapper(*args, **kwargs):
+            out = getattr(self.op, name)(*args, **kwargs)
+            if out is self.op:
+                return self
+            return out
+        if name in self._op_attr_dict:
+            if callable(getattr(self.op, name)):
+                return method_wrapper
+        if hasattr(self.op, name):
+            return getattr(self.op, name)
+        return super().__getattr__(self, name)
+    def __repr__(self):
+        return self.op.__repr__()
+    def generate_params(self):
+        """
+        Generate support params for accuracy-sensitive detection.
+        Returns:
+            tuple consisting of four elements.
+            The derived class initializes the parameters required for accuracy-sensitive detection by calling
+            this function.
+        Examples:
+            >>> from mindspore.ops.silent_check import ASDBase
+            >>> from mindspore.ops import LayerNorm as OriginLayerNorm
+            >>> class LayerNormASD(ASDBase):
+            ...     def __init__(self, *args, **kwargs):
+            ...         super().__init__(OriginLayerNorm, *args, **kwargs)
+            ...         # init parameters for accuracy-sensitive detection by calling the base class function
+            ...         self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
+        """
+        pre_val = Parameter(Tensor(0, mstype.float32),
+                            name=f"{self._suffix}_pre_val_{self._index}",
+                            requires_grad=False)
+        min_val = Parameter(Tensor(0, mstype.float32),
+                            name=f"{self._suffix}_min_val_{self._index}",
+                            requires_grad=False)
+        max_val = Parameter(Tensor(0, mstype.float32),
+                            name=f"{self._suffix}_max_val_{self._index}",
+                            requires_grad=False)
+        cnt = Parameter(Tensor(0, mstype.int32),
+                        name=f"{self._suffix}_cnt_{self._index}",
+                        requires_grad=False)
+        ASDBase._index += 1
+        return pre_val, min_val, max_val, cnt
+class RmsNormASD(ASDBase):
+    """
+    RmsNorm with ASD.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(OriginRmsNorm, *args, **kwargs)
+        self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
+    def __call__(self, input_x, gamma):
+        if self.enable_check:
+            input_x = self.check_op(
+                input_x, self.pre_val, self.min_val, self.max_val, self.cnt, None)
+            self.cnt += 1
+        return self.op(input_x, gamma)
+class LayerNormASD(ASDBase):
+    """
+    LayerNorm with ASD.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(OriginLayerNorm, *args, **kwargs)
+        self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
+    def __call__(self, input_x, gamma, beta):
+        if self.enable_check:
+            input_x = self.check_op(
+                input_x, self.pre_val, self.min_val, self.max_val, self.cnt, None)
+            self.cnt += 1
+        return self.op(input_x, gamma, beta)
+def _silent_check():
+    if os.environ.get(NPU_ASD_ENABLE) == "1":
+        operations.LayerNorm = LayerNormASD
+        operations.RmsNorm = RmsNormASD

mindspore/parallel/__init__.py CHANGED Viewed

@@ -18,8 +18,9 @@ from __future__ import absolute_import
 from mindspore.parallel.algo_parameter_config import get_algo_parameters, reset_algo_parameters, \
     set_algo_parameters
 from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
-    transform_checkpoints, merge_pipeline_strategys
+    transform_checkpoints, merge_pipeline_strategys, load_segmented_checkpoints
 from mindspore.parallel.shard import shard
 __all__ = ["set_algo_parameters", "reset_algo_parameters", "get_algo_parameters", "rank_list_for_transform",
-           "transform_checkpoint_by_rank", "transform_checkpoints", "merge_pipeline_strategys", "shard"]
+           "transform_checkpoint_by_rank", "transform_checkpoints", "merge_pipeline_strategys", "shard",
+           "load_segmented_checkpoints"]

mindspore/parallel/_auto_parallel_context.py CHANGED Viewed

@@ -65,6 +65,19 @@ class _ParallelOptimizerConfig:
     OPTIMIZER_WEIGHT_SHARD_SIZE = "optimizer_weight_shard_size"
+class _PipelineConfig:
+    """
+    The key of the Pipeline parallelism.
+    """
+    PIPELINE_INTERLEAVE = "pipeline_interleave"
+    PIPELINE_SCHEDULER = "pipeline_scheduler"
+class _PipelineScheduler:
+    PIPELINE_1F1B = "1f1b"
+    PIPELINE_GPIPE = "gpipe"
 class _AutoParallelContext:
     """
     _AutoParallelContext is the environment in which operations are executed
@@ -105,11 +118,11 @@ class _AutoParallelContext:
             device_num (int): The device number.
         Raises:
-            ValueError: If the device num is not in [1, 4096].
+            ValueError: If the device num is not a positive integer.
         """
         self.check_context_handle()
-        if device_num < 1 or device_num > 4096:
-            raise ValueError("The context configuration parameter 'device_num' must be in [1, 4096], "
+        if device_num < 1:
+            raise ValueError("The context configuration parameter 'device_num' must be a positive integer, "
                              "but got the value of device_num : {}.".format(device_num))
         from mindspore.communication._comm_helper import _HCCL_TEST_AVAILABLE
         self._context_handle.set_hccl_test_avaible(_HCCL_TEST_AVAILABLE)
@@ -229,6 +242,16 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_pipeline_stage_split_num()
+    def get_pipeline_interleave(self):
+        """Get pipeline interleave flag"""
+        self.check_context_handle()
+        return self._context_handle.get_pipeline_interleave()
+    def get_pipeline_scheduler(self):
+        """Get pipeline scheduler"""
+        self.check_context_handle()
+        return self._context_handle.get_pipeline_scheduler()
     def set_pipeline_segments(self, segments):
         """Set the segments of the pipeline"""
         if isinstance(segments, bool) or not isinstance(segments, int):
@@ -782,6 +805,57 @@ class _AutoParallelContext:
         self.check_context_handle()
         return self._context_handle.get_enable_fold_pipeline()
+    def set_pipeline_config(self, pipeline_config):
+        r"""
+        Set the configuration for pipeline parallelism. The configuration provides more detailed behavior control about
+        parallel training when pipeline parallelism is enabled.
+        Args:
+            pipeline_config (dict): The configuration for pipeline parallelism. It supports following keys:
+            - pipeline_interleave(bool): Setting true enable interleave scheduler for pipeline parallelism. This
+                                         scheduler requires more memory but less bubble.
+            - pipeline_scheduler(string): There are two choices, "1f1b" and "gpipe". default is "1f1b"
+              - 1f1b: It requires less memory and bubble ratio, for it run backward pass when corresponding forward pass
+                      finished.
+              - gpipe: It requires more memory and bubble ratio, for it run backward pass after all forward pass
+                       finished.
+        Raises:
+            TypeError: If the type of `pipeline_config` is not `dict`.
+            ValueError: If the key in `pipeline_config` not  in ["pipeline_interleave", "pipeline_scheduler"].
+            ValueError: If pipeline interleave is False, pipeline scheduler is not `1f1b`.
+        """
+        self.check_context_handle()
+        if not isinstance(pipeline_config, dict):
+            raise TypeError("For 'set_pipeline_config', the argument 'pipeine_config' "
+                            "must be dict, but got the type : {}.".format(type(pipeline_config)))
+        pp_interleave = _PipelineConfig.PIPELINE_INTERLEAVE
+        pp_scheduler = _PipelineConfig.PIPELINE_SCHEDULER
+        for config_name in pipeline_config:
+            unknown_config = []
+            if config_name not in [pp_interleave, pp_scheduler]:
+                unknown_config.append(config_name)
+            if unknown_config:
+                raise ValueError("Unknown config: {}".format(unknown_config))
+        Validator.check_bool(
+            pipeline_config[pp_interleave], pp_interleave, pp_interleave)
+        self._context_handle.set_pipeline_interleave(
+            pipeline_config[pp_interleave])
+        Validator.check_string(pipeline_config[pp_scheduler], [_PipelineScheduler.PIPELINE_1F1B,
+                                                               _PipelineScheduler.PIPELINE_GPIPE])
+        if not pipeline_config[pp_interleave] and pipeline_config[pp_scheduler] != _PipelineScheduler.PIPELINE_1F1B:
+            raise ValueError(f"When pipeline_interleave is False, {pp_scheduler} is not supported")
+        self._context_handle.set_pipeline_scheduler(pipeline_config[pp_scheduler])
     def get_enable_parallel_optimizer(self):
         """Get parallel optimizer flag."""
         self.check_context_handle()
@@ -1068,6 +1142,7 @@ class _AutoParallelContext:
             self.set_enable_all_gather_fusion(openstate)
             self.set_enable_reduce_scatter_fusion(openstate)
 def _set_ops_strategy_json_config(type="SAVE", path="", mode="all"):
     """
     Set strategy json configuration.
@@ -1091,6 +1166,7 @@ def _set_ops_strategy_json_config(type="SAVE", path="", mode="all"):
     else:
         raise KeyError("Type must be 'SAVE' or 'LOAD' and mode must be 'all' or 'principal'")
 _AUTO_PARALLEL_CONTEXT = None
@@ -1126,6 +1202,7 @@ _set_auto_parallel_context_func_map = {
     "dataset_strategy": auto_parallel_context().set_dataset_strategy,
     "enable_parallel_optimizer": auto_parallel_context().set_enable_parallel_optimizer,
     "parallel_optimizer_config": auto_parallel_context().set_parallel_optimizer_config,
+    "pipeline_config": auto_parallel_context().set_pipeline_config,
     "grad_accumulation_step": auto_parallel_context().set_grad_accumulation_step,
     "all_reduce_fusion_config": auto_parallel_context().set_all_reduce_fusion_split_indices,
     "communi_parallel_mode": auto_parallel_context().set_communi_parallel_mode,
@@ -1143,6 +1220,8 @@ _get_auto_parallel_context_func_map = {
     "gradient_fp32_sync": auto_parallel_context().get_gradient_fp32_sync,
     "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean,
     "pipeline_stages": auto_parallel_context().get_pipeline_stages,
+    "pipeline_interleave": auto_parallel_context().get_pipeline_interleave,
+    "pipeline_scheduler": auto_parallel_context().get_pipeline_scheduler,
     "parallel_mode": auto_parallel_context().get_parallel_mode,
     "search_mode": auto_parallel_context().get_strategy_search_mode,
     "auto_parallel_search_mode": auto_parallel_context().get_auto_parallel_search_mode,