PyPI - mindspore - Versions diffs - 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl - Mend

mindspore 2.2.0__cp38-cp38-manylinux1_x86_64.whl → 2.2.11__cp38-cp38-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (170) hide show

mindspore/.commit_id +1 -1
mindspore/_akg/akg/composite/build_module.py +104 -20
mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
mindspore/_akg/akg/utils/kernel_exec.py +41 -15
mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
mindspore/_akg/akg/utils/util.py +56 -1
mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/_checkparam.py +3 -3
mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
mindspore/_extends/graph_kernel/splitter.py +3 -2
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
mindspore/_extends/parse/__init__.py +3 -2
mindspore/_extends/parse/parser.py +6 -1
mindspore/_extends/parse/standard_method.py +14 -11
mindspore/_extends/remote/kernel_build_server.py +2 -1
mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/common/_utils.py +16 -0
mindspore/common/api.py +1 -1
mindspore/common/auto_dynamic_shape.py +81 -85
mindspore/common/dump.py +1 -1
mindspore/common/tensor.py +3 -20
mindspore/config/op_info.config +1 -1
mindspore/context.py +11 -4
mindspore/dataset/engine/cache_client.py +8 -5
mindspore/dataset/engine/datasets_standard_format.py +5 -0
mindspore/dataset/vision/transforms.py +21 -21
mindspore/experimental/optim/adam.py +1 -1
mindspore/gen_ops.py +1 -1
mindspore/include/api/model.h +17 -0
mindspore/include/api/status.h +8 -3
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libmindspore.so +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libnnacl.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
mindspore/nn/cell.py +0 -3
mindspore/nn/layer/activation.py +4 -5
mindspore/nn/layer/conv.py +39 -23
mindspore/nn/layer/flash_attention.py +54 -129
mindspore/nn/layer/math.py +3 -7
mindspore/nn/layer/rnn_cells.py +5 -5
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +12 -3
mindspore/numpy/utils_const.py +5 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/aicpu/add.py +3 -3
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/_utils/utils.py +2 -0
mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
mindspore/ops/function/array_func.py +10 -7
mindspore/ops/function/grad/grad_func.py +0 -1
mindspore/ops/function/nn_func.py +98 -9
mindspore/ops/function/random_func.py +2 -1
mindspore/ops/op_info_register.py +24 -21
mindspore/ops/operations/__init__.py +6 -2
mindspore/ops/operations/_grad_ops.py +25 -6
mindspore/ops/operations/_inner_ops.py +155 -23
mindspore/ops/operations/array_ops.py +9 -7
mindspore/ops/operations/comm_ops.py +2 -2
mindspore/ops/operations/custom_ops.py +85 -68
mindspore/ops/operations/inner_ops.py +26 -3
mindspore/ops/operations/math_ops.py +7 -6
mindspore/ops/operations/nn_ops.py +193 -49
mindspore/parallel/_parallel_serialization.py +10 -3
mindspore/parallel/_tensor.py +4 -1
mindspore/parallel/checkpoint_transform.py +13 -2
mindspore/parallel/shard.py +17 -10
mindspore/profiler/common/util.py +1 -0
mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
mindspore/profiler/parser/ascend_op_generator.py +1 -1
mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
mindspore/profiler/parser/base_timeline_generator.py +1 -1
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
mindspore/profiler/parser/framework_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +19 -0
mindspore/profiler/profiling.py +46 -24
mindspore/rewrite/api/pattern_engine.py +1 -1
mindspore/rewrite/parsers/for_parser.py +7 -7
mindspore/rewrite/parsers/module_parser.py +4 -4
mindspore/rewrite/symbol_tree.py +1 -4
mindspore/run_check/_check_version.py +5 -3
mindspore/safeguard/rewrite_obfuscation.py +52 -28
mindspore/scipy/ops.py +55 -5
mindspore/scipy/optimize/__init__.py +3 -2
mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
mindspore/train/callback/_summary_collector.py +1 -1
mindspore/train/dataset_helper.py +1 -0
mindspore/train/model.py +2 -2
mindspore/train/serialization.py +97 -11
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/summary_record.py +23 -7
mindspore/version.py +1 -1
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
/mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
{mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0

mindspore/ops/operations/_inner_ops.py CHANGED Viewed

@@ -26,7 +26,7 @@ from mindspore.ops.operations._scalar_ops import bit_or, bit_and
 from mindspore.ops.operations.comm_ops import ReduceOp
 from mindspore.ops import signature as sig
 from mindspore.ops.operations.math_ops import _infer_shape_reduce
-from mindspore.ops.primitive import PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register, Primitive,\
+from mindspore.ops.primitive import PrimitiveWithCheck, PrimitiveWithInfer, prim_attr_register, Primitive, \
     _run_op, _check_contains_variable
 from mindspore._c_expression import Tensor as Tensor_
 from mindspore._c_expression import typing
@@ -167,6 +167,7 @@ class Quant(PrimitiveWithInfer):
         self.sqrt_mode = validator.check_value_type("sqrt_mode", sqrt_mode, [bool], self.name)
         self.round_mode = validator.check_string(round_mode, ["Round", "Floor", "Ceil", "Trunc"],
                                                  "round_mode", self.name)
+        self.add_prim_attr("dst_type", mstype.int8)
     def infer_shape(self, x_shape):
         return x_shape
@@ -174,7 +175,7 @@ class Quant(PrimitiveWithInfer):
     def infer_dtype(self, x_type):
         validator.check_subclass("input_x", x_type, mstype.tensor_type, self.name)
         validator.check_type_name("input_x", x_type, [mstype.float16, mstype.float32], self.name)
-        return mstype.int8
+        return self.get_attr_dict()['dst_type']
 class Lamb(PrimitiveWithInfer):
@@ -491,7 +492,7 @@ class Receive(PrimitiveWithInfer):
         self.dtype = dtype
         self.group = group
         self.add_prim_attr("no_eliminate", True)
-        valid_type = [mstype.float16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
+        valid_type = [mstype.float16, mstype.bfloat16, mstype.float32, mstype.int32, mstype.int8, mstype.uint8]
         args = {"dtype": dtype}
         validator.check_scalar_or_tensor_types_same(args, valid_type, self.name)
@@ -2146,13 +2147,14 @@ class ClipByNorm(PrimitiveWithInfer):
     @prim_attr_register
     def __init__(self, axis=None):
         """Initialize ClipByNorm"""
+        self.axis_str = 'axis'
         self.axis = () if axis is None else axis
-        validator.check_value_type('axis', self.axis, [int, tuple, list], self.name)
+        validator.check_value_type(self.axis_str, self.axis, [int, tuple, list], self.name)
         axis_check = self.axis if isinstance(self.axis, Iterable) else (self.axis,)
         for i, value in enumerate(axis_check):
             validator.check_value_type('axis[%d]' % i, value, [int], self.name)
-        self.init_attrs['axis'] = self.axis
-        self.add_prim_attr('axis', self.axis)
+        self.init_attrs[self.axis_str] = self.axis
+        self.add_prim_attr(self.axis_str, self.axis)
         self.init_prim_io_names(inputs=['x', 'clip_norm'], outputs=['output'])
     def infer_shape(self, x_shape, clip_norm_shape):
@@ -2729,27 +2731,29 @@ class CopyWithSlice(Primitive):
         self.init_prim_io_names(inputs=['x', 'y'], outputs=['x'])
-class MoeFFN(Primitive):
+class FFN(Primitive):
     r"""
-    The MoeFFN computation is similar to Feed-Forward Network, it contains matmul + gelu + matmul.
+    The FFN computation is similar to Feed-Forward Network, it contains matmul + gelu + matmul.
     Args:
         activation (string): The activation type, set to 'fastgelu' or 'gelu'.
-        Only support 'fastgelu' for now. Default: "fastgelu".
+            Only support 'fastgelu' for now. Default: "fastgelu".
+        inner_precise (int): The precise mode, set to 0 for high precision or 1 for high performance.
+            Only support 1 for now. Default: 0.
     Inputs:
         - **x** (Tensor) - The input tensor with data type of int8, float16.
           Input tensor of shape :math:`(batch\_size * seq\_length, hidden\_size)`.
+        - **weight1** (Tensor) - The weight1 tensor with data type of float16.
+          Weight1 tensor of shape :math:`(expert\_num, hidden\_size, ffn\_hidden\_size)`.
+        - **weight2** (Tensor) - The weight2 tensor with data type of float16.
+          Weight2 tensor of shape :math:`(expert\_num, ffn\_hidden\_size, hidden\_size)`.
         - **expert_tokens** (Tensor]) - The expert tokens tensor with data type of int64.
           Expert tokens tensor of shape :math:`(16,)`. For example, `(2, 1, 0, .., 9)`
           indicate that the 0th expert deals with 2 tokens, the 1th expert deals with 1 tokens,
           the 2th expert do noting and so on.
-        - **weight1** (Tensor) - The weight1 tensor with data type of float16.
-          Weight1 tensor of shape :math:`(expert\_num, hidden\_size, ffn\_hidden\_size)`.
         - **bias1** (Tensor) - The bias1 tensor with data type of float16.
           Bias1 tensor of shape :math:`(expert\_num, ffn\_hidden\_size)`.
-        - **weight2** (Tensor) - The weight2 tensor with data type of float16.
-          Weight2 tensor of shape :math:`(expert\_num, ffn\_hidden\_size, hidden\_size)`.
         - **bias2** (Tensor) - The bias2 tensor with data type of float16.
           Bias2 tensor of shape :math:`(expert\_num, hidden\_size)`.
         - **scale** (Tensor) - The scale tensor with data type of float16. Not enable now.
@@ -2771,21 +2775,149 @@ class MoeFFN(Primitive):
         >>> h_f = 4 * h
         >>> e = 16
         >>> x = Tensor(np.random.randn(b * s, h).astype(np.float16))
-        >>> expert_tokens = Tensor(np.random.randn(e).astype(np.int64))
         >>> w1 = Tensor(np.random.randn(e, h, h_f).astype(np.float16))
-        >>> bias1 = Tensor(np.random.randn(e, h_f).astype(np.float16))
         >>> w2 = Tensor(np.random.randn(e, h_f, h).astype(np.float16))
+        >>> expert_tokens = Tensor(np.random.randn(e).astype(np.int64))
+        >>> bias1 = Tensor(np.random.randn(e, h_f).astype(np.float16))
         >>> bias2 = Tensor(np.random.randn(e, h).astype(np.float16))
-        >>> moe_ffn = _inner_ops.MoeFFN("fastgelu")
-        >>> output = moe_ffn(x, w1, bias1, w2, bias2)
+        >>> ffn = _inner_ops.FFN("fastgelu", 1)
+        >>> output = ffn(x, w1, w2, expert_tokens, bias1, bias2)
         >>> print(output)
     """
     @prim_attr_register
-    def __init__(self, activation):
-        """Initialize MoeFFN."""
-        self.init_prim_io_names(inputs=["x", "expert_tokens", "weight1", "bias1",
-                                        "weight2", "bias2", "scale", "offset", "deq_scale1"
-                                        "deq_scale2"],
+    def __init__(self, activation, inner_precise):
+        """Initialize FFN."""
+        self.init_prim_io_names(inputs=["x", "weight1", "weight2", "expert_tokens", "bias1",
+                                        "bias2", "scale", "offset", "deq_scale1", "deq_scale2"],
                                 outputs=["y"])
-        self.activation = activation
+        cls_name = self.name
+        validator.check_value_type("activation", activation, [str], cls_name)
+        validator.check_value_type("inner_precise", inner_precise, [int], cls_name)
+class DecoderKVCache(Primitive):
+    r"""
+    The DecoderKVCache is used for decoding the KVCache of transformer network.
+    Args:
+        cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
+            When seq_len_axis is 2, cache tensor of shape
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, cache tensor of shape
+            :math:`(batch\_size, max\_seq\_length, num_head, hidden\_size)`.
+        update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
+            When seq_len_axis is 2, update tensor of shape
+            :math:`(batch\_size, num_head, update\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, update tensor of shape
+            :math:`(batch\_size, update\_seq\_length, num_head, hidden\_size)`.
+        valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
+            Valid_seq_len tensor of shape :math:`(batch\_size)`.
+        batch_index (Tensor): The batch_index tensor with data type of int64.
+            Batch_index tensor of shape :math:`(1)`. Indicate that which batch of cache tensor is going to be update.
+        seq_len_axis (int64): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Default: "2".
+        new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            New_max_seq_len tensor of shape :math:`(1)`.
+            Indicate that user want to change the shape of cache tensor from
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
+            :math:
+            `(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
+            to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
+        cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
+    Outputs:
+        With same data type and same shape as `cache` tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore.ops.operations import _inner_ops
+        >>> b = 4
+        >>> h = 40
+        >>> max_s = 1024
+        >>> s = 1
+        >>> d = 128
+        >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
+        >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
+        >>> valid_seq_len = Tensor(np.random.randn(b).astype(np.int64))
+        >>> batch_index = Tensor(np.random.randn(1).astype(np.int64))
+        >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
+        >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, 2, new_max_seq_len, cur_max_seq_len)
+        >>> print(cache)
+    """
+    @prim_attr_register
+    def __init__(self):
+        """Initialize DecoderKVCache."""
+        self.init_prim_io_names(inputs=["cache", "update", "valid_seq_len", "batch_index", "seq_len_axis",
+                                        "new_max_seq_len", "cur_max_seq_len"],
+                                outputs=["out"])
+        self.add_prim_attr('side_effect_mem', True)
+class PromptKVCache(Primitive):
+    r"""
+    The PromptKVCache is used for prefill the KVCache of transformer network.
+    Args:
+        cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
+            When seq_len_axis is 2, cache tensor of shape
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, cache tensor of shape
+            :math:`(batch\_size, max\_seq\_length, num_head, hidden\_size)`.
+        update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
+            When seq_len_axis is 2, update tensor of shape
+            :math:`(batch\_size, num_head, update\_seq\_length, hidden\_size)`.
+            When seq_len_axis is 1, update tensor of shape
+            :math:`(batch\_size, update\_seq\_length, num_head, hidden\_size)`.
+        valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
+            Valid_seq_len tensor of shape :math:`(batch\_size)`.
+        batch_index (Tensor): The batch_index tensor with data type of int64.
+            Batch_index tensor of shape :math:`(1)`. Indicate that which batch of cache tensor is going to be update.
+        seq_len_axis (int64): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Default: "2".
+        new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            New_max_seq_len tensor of shape :math:`(1)`.
+            Indicate that user want to change the shape of cache tensor from
+            :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
+            :math:
+            `(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
+            to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
+        cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
+            Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
+        align_mode (int64): indicate which axis is seq_eln, 0 is 'right', 1 is 'left'. Default: 0.
+    Outputs:
+        With same data type and same shape as `cache` tensor.
+    Supported Platforms:
+        ``Ascend``
+    Examples:
+        >>> from mindspore import Tensor
+        >>> from mindspore.ops.operations import _inner_ops
+        >>> b = 4
+        >>> h = 40
+        >>> max_s = 1024
+        >>> s = 256
+        >>> d = 128
+        >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
+        >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
+        >>> valid_seq_len = Tensor(np.random.randn(b).astype(np.int64))
+        >>> batch_index = Tensor(np.random.randn(1).astype(np.int64))
+        >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
+        >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
+        >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, 2, new_max_seq_len, cur_max_seq_len)
+        >>> print(cache)
+    """
+    @prim_attr_register
+    def __init__(self, padding_mode="right"):
+        """Initialize PromptKVCache."""
+        self.init_prim_io_names(inputs=["cache", "update", "valid_seq_len", "batch_index", "seq_len_axis",
+                                        "new_max_seq_len", "cur_max_seq_len"],
+                                outputs=["out"])
+        self.add_prim_attr('side_effect_mem', True)
+        self.padding_mode = padding_mode

mindspore/ops/operations/array_ops.py CHANGED Viewed

@@ -1208,7 +1208,7 @@ class UniqueWithPad(Primitive):
 class Split(Primitive):
-    """
+    r"""
     Splits the input tensor into output_num of tensors along the given axis and output numbers.
     Refer to :func:`mindspore.ops.split` for more details.
@@ -1222,7 +1222,7 @@ class Split(Primitive):
     Outputs:
         tuple[Tensor], the shape of each output tensor is the same, which is
-        :math:`(x_0, x_1, ..., x_{axis}/{output_num}, ..., x_{R-1})`.
+        :math:`(x_0, x_1, ..., x_{axis}/{output\_num}, ..., x_{R-1})`.
         And the data type is the same as `input_x`.
     Supported Platforms:
@@ -1763,16 +1763,18 @@ class FillV2(PrimitiveWithCheck):
         self.init_prim_io_names(inputs=['shape', 'value'], outputs=['y'])
     def check_elim(self, dims, x):
-        if x is None or (not isinstance(x, (Tensor, Tensor_))) or (x.shape != ()) or\
-            dims is None or (isinstance(dims, (tuple, list)) and dims) or\
-            isinstance(dims, (Tensor, Tensor_)):
+        x_is_invalid = x is None or (not isinstance(x, (Tensor, Tensor_))) or (x.shape != ())
+        dims_is_invalid = dims is None or (isinstance(dims, (tuple, list)) and dims) or\
+            isinstance(dims, (Tensor, Tensor_))
+        if x_is_invalid or dims_is_invalid:
             return (False, None)
         return (True, x)
     def infer_value(self, dims, x):
-        if x is None or dims is None or\
+        dims_is_invalid = dims is None or\
             (isinstance(dims, (tuple, list)) and dims) or\
-            isinstance(dims, (Tensor, Tensor_)):
+            isinstance(dims, (Tensor, Tensor_))
+        if x is None or dims_is_invalid:
             return None
         return x

mindspore/ops/operations/comm_ops.py CHANGED Viewed

@@ -94,7 +94,7 @@ class ReduceOp:
 def check_collective_target_dtype(data_name, data_dtype, prim_name):
     """Check if data type is valid."""
-    default_target_dtypes = (mstype.int8, mstype.int32, mstype.float16, mstype.float32)
+    default_target_dtypes = (mstype.int8, mstype.uint8, mstype.int32, mstype.float16, mstype.bfloat16, mstype.float32)
     gpu_target_dtypes = (mstype.bool_, mstype.int8, mstype.int32, mstype.int64, mstype.uint32, mstype.uint64,
                          mstype.float16, mstype.float32, mstype.float64)
@@ -1310,4 +1310,4 @@ class _GetTensorSlice(PrimitiveWithInfer):
         from mindspore.parallel._tensor import _load_tensor
         validator.check_value_type("dev_mat", dev_mat, [tuple], self.name)
         validator.check_value_type("tensor_map", tensor_map, [tuple], self.name)
-        return Tensor(_load_tensor(x, dev_mat, tensor_map))
+        return Tensor(_load_tensor(x, dev_mat, tensor_map), x.dtype)

mindspore/ops/operations/custom_ops.py CHANGED Viewed

@@ -42,6 +42,24 @@ from ._pyfunc_registry import add_pyfunc
 if platform.system() != "Windows":
     import fcntl
+KEY_ATTR = "attr"
+KEY_NAME = "name"
+INPUT_NAMES = "input_names"
+ATTR_NAMES = "attr_names"
+AUTO_DIFF = "autodiff"
+IMPLY_TYPE = "imply_type"
+FUSION_TYPE = "fusion_type"
+MS_KERNEL_FLAG = "ms_kernel_flag"
+AKG = "AKG"
+TBE = "TBE"
+CUDA = "CUDA"
+AICORE = "AiCore"
+CPU = "CPU"
+GPU = "GPU"
+ASCEND = "Ascend"
+HYBRID_TYPE = "hybrid"
+OP_NAME = "op_name"
 def _get_cache_path():
     """
@@ -150,7 +168,6 @@ class Custom(ops.PrimitiveWithInfer):
     .. warning::
         - This is an experimental API that is subject to change.
-        - Currently, the functionality of Custom does not support Ascend 910B.
     .. note::
         The supported platforms are determined by the input `func_type`. The supported platforms are as follows:
@@ -453,10 +470,10 @@ class Custom(ops.PrimitiveWithInfer):
     op_path_in_cache = []  # Save paths for op functions created in the cached.
     custom_aot_warning = True  # Flag to enable warnings about custom aot path white list
-    def __init__(self, func, out_shape=None, out_dtype=None, func_type="hybrid", bprop=None, reg_info=None):
-        ops.PrimitiveWithInfer.__init__(self, "Custom")
+    def __init__(self, func, out_shape=None, out_dtype=None, func_type=HYBRID_TYPE, bprop=None, reg_info=None):
+        super().__init__("Custom")
-        self.supported_targets = ["Ascend", "GPU", "CPU"]
+        self.supported_targets = [ASCEND, GPU, CPU]
         self.supported_func_type = ["hybrid", "akg", "tbe", "aicpu", "aot", "pyfunc", "julia"]
         self.log_prefix = "For '{}', 'func_type': {}, 'func': {}".format(self.name, func_type, func)
         self.func = func
@@ -473,7 +490,7 @@ class Custom(ops.PrimitiveWithInfer):
         self._update_func_info(reg_info)
         self.add_prim_attr("func_name", self.func_name)
         self.add_prim_attr("uniq_name", self.uniq_name)
-        if self.func_type == "hybrid":
+        if self.func_type == HYBRID_TYPE:
             self.add_prim_attr("func_compile_attrs", self._func_compile_attrs)
         self.add_prim_attr("imply_path", self.imply_path)
@@ -502,7 +519,7 @@ class Custom(ops.PrimitiveWithInfer):
         if func_type == "akg":
             self._set_akg_kernel_type()
-        if not self.bprop and self.func_type == "hybrid":
+        if not self.bprop and self.func_type == HYBRID_TYPE:
             self._hybrid_autodiff(func_type)
         self.add_prim_attr("func_type", self.func_type)
@@ -577,7 +594,7 @@ class Custom(ops.PrimitiveWithInfer):
         elif "compute" in self.func_source_str:
             self.func_type = "tvm_compute"
         else:
-            self.func_type = "hybrid"
+            self.func_type = HYBRID_TYPE
             self._hybrid_func_analyser()
     def _check_julia_func(self):
@@ -633,18 +650,18 @@ class Custom(ops.PrimitiveWithInfer):
         elif self.func_type == "julia":
             self._check_julia_func()
-        elif self.func_type == "hybrid":
-            if not hasattr(self.func, "ms_kernel_flag"):
+        elif self.func_type == HYBRID_TYPE:
+            if not hasattr(self.func, MS_KERNEL_FLAG):
                 raise TypeError("{}, 'func' must be a function decorated by kernel".format(self.log_prefix))
             self._is_ms_kernel = True
             self._func_compile_attrs = getattr(self.func, "compile_attrs", {})
         elif self.func_type == "akg":
-            if hasattr(self.func, "ms_kernel_flag"):
+            if hasattr(self.func, MS_KERNEL_FLAG):
                 logger.warning("{}. To have a better user experience, the mode hybrid is suggested "
                                "for the input function with decorator @kernel. "
                                "To enable this mode, set the 'func_type' to be \"hybrid\"".format(self.log_prefix))
         elif self.func_type == "pyfunc":
-            if hasattr(self.func, "ms_kernel_flag"):
+            if hasattr(self.func, MS_KERNEL_FLAG):
                 logger.warning("{}. Now you are using the function with decorator @kernel in the mode pyfunc. "
                                "The kernel will be executed as a native python function, which might lead to "
                                "low efficiency. To accelerate the kernel, set the 'func_type' to be \"hybrid\""
@@ -758,7 +775,7 @@ class Custom(ops.PrimitiveWithInfer):
                     continue
                 if isinstance(reg_info_item, str):
                     reg_info_item = json.loads(reg_info_item)
-                prefix = "_".join([prefix, reg_info_item.get("op_name", "")])
+                prefix = "_".join([prefix, reg_info_item.get(OP_NAME, "")])
             self.uniq_name = prefix + "_" + self.func_name
         else:
             raise TypeError("For '{}', 'func' must be of type function or str, but got {}"
@@ -768,23 +785,23 @@ class Custom(ops.PrimitiveWithInfer):
         """Update op attrs in reg_info."""
         output_name_list = []
         for _, item in enumerate(reg_info.get("outputs", [])):
-            if isinstance(item, dict) and item.get("name"):
-                output_name_list.append(item.get("name"))
+            if isinstance(item, dict) and item.get(KEY_NAME):
+                output_name_list.append(item.get(KEY_NAME))
         if output_name_list:
             self.add_prim_attr("output_names", output_name_list)
-        if isinstance(reg_info.get("op_name"), str):
-            self.add_prim_attr("reg_op_name", reg_info.get("op_name"))
+        if isinstance(reg_info.get(OP_NAME), str):
+            self.add_prim_attr("reg_op_name", reg_info.get(OP_NAME))
         if self.func_type == "aicpu":
-            self.uniq_name = reg_info["op_name"]
+            self.uniq_name = reg_info[OP_NAME]
             self.add_prim_attr("uniq_name", self.uniq_name)
         if self.func_type in ["aot", "aicpu"]:
-            if reg_info.get("attr") is not None and isinstance(reg_info["attr"], list):
-                for item in reg_info["attr"]:
+            if reg_info.get(KEY_ATTR) is not None and isinstance(reg_info[KEY_ATTR], list):
+                for item in reg_info[KEY_ATTR]:
                     if isinstance(item, dict) and item.get("value") is not None:
-                        self.add_prim_attr(item["name"], item["value"])
+                        self.add_prim_attr(item[KEY_NAME], item["value"])
     def _register_info(self, info):
         """Register reg_info."""
@@ -802,7 +819,7 @@ class Custom(ops.PrimitiveWithInfer):
             if isinstance(reg_info, str):
                 reg_info = json.loads(reg_info)
             if self.fake_output:
-                reg_info["outputs"].append(dict({"index": 0, "name": "y", "param_type": "required"}))
+                reg_info["outputs"].append(dict({"index": 0, KEY_NAME: "y", "param_type": "required"}))
                 new_dtype_format = []
                 for i in reg_info["dtype_format"]:
                     new_dtype_format.append(i + (DataType.I32_Default,))
@@ -874,16 +891,16 @@ class Custom(ops.PrimitiveWithInfer):
                             "'CustomRegOp' to generate the registration information, then pass it to 'reg_info' or "
                             "use 'custom_info_register' to bind it to 'func' if 'func' is a function."
                             .format(self.log_prefix, reg_info, type(reg_info)))
-        reg_info["op_name"] = self.uniq_name
-        reg_info["imply_type"] = self._get_imply_type(reg_info, target)
-        if not isinstance(reg_info.get("fusion_type"), str) or not reg_info["fusion_type"].strip():
-            reg_info["fusion_type"] = "OPAQUE"
+        reg_info[OP_NAME] = self.uniq_name
+        reg_info[IMPLY_TYPE] = self._get_imply_type(reg_info, target)
+        if not isinstance(reg_info.get(FUSION_TYPE), str) or not reg_info[FUSION_TYPE].strip():
+            reg_info[FUSION_TYPE] = "OPAQUE"
         # Supplement necessary info for TBE if these information is missing in reg_info
-        if reg_info["imply_type"] == "TBE":
-            if reg_info.get("attr") is not None and isinstance(reg_info["attr"], list):
-                for i, item in enumerate(reg_info["attr"]):
+        if reg_info[IMPLY_TYPE] == TBE:
+            if reg_info.get(KEY_ATTR) is not None and isinstance(reg_info[KEY_ATTR], list):
+                for i, item in enumerate(reg_info[KEY_ATTR]):
                     if isinstance(item, dict) and item.get("value") is None:
-                        reg_info["attr"][i]["value"] = "all"
+                        reg_info[KEY_ATTR][i]["value"] = "all"
             reg_info["async_flag"] = reg_info.get("async_flag", False)
             reg_info["binfile"] = "%s.so" % self.func_name
             reg_info["compute_cost"] = reg_info.get("compute_cost", 10)
@@ -891,8 +908,8 @@ class Custom(ops.PrimitiveWithInfer):
             reg_info["partial_flag"] = reg_info.get("partial_flag", True)
             reg_info["needCheckSupport"] = reg_info.get("need_check_supported", False)
         # Supplement necessary info for AKG if these information is missing in reg_info
-        if reg_info["imply_type"] == "AKG":
-            target_to_processor = {"Ascend": "AiCore", "GPU": "CUDA", "CPU": "CPU"}
+        if reg_info[IMPLY_TYPE] == AKG:
+            target_to_processor = {ASCEND: AICORE, GPU: CUDA, CPU: CPU}
             reg_info["processor"] = reg_info.get("processor", target_to_processor.get(target))
         return reg_info
@@ -905,15 +922,15 @@ class Custom(ops.PrimitiveWithInfer):
             # Infer target from reg_info["processor"], reg_info generated from AkgGpuRegOp or AkgAscendRegOp
             #   will have the processor information.
             if target not in self.supported_targets:
-                processor_to_target = {"AiCore": "Ascend", "CUDA": "GPU", "CPU": "CPU"}
+                processor_to_target = {AICORE: ASCEND, CUDA: GPU, CPU: CPU}
                 target = processor_to_target.get(reg_info.get("processor"))
-            # Infer target from reg_info["imply_type"]
+            # Infer target from reg_info[IMPLY_TYPE]
             if target not in self.supported_targets:
-                imply_type_to_target = {"TBE": "Ascend", "GPU": "GPU", "CPU": "CPU"}
-                target = imply_type_to_target.get(reg_info.get("imply_type"))
+                imply_type_to_target = {TBE: ASCEND, GPU: GPU, CPU: CPU}
+                target = imply_type_to_target.get(reg_info.get(IMPLY_TYPE))
         # Infer target from func_type
         if target not in self.supported_targets:
-            func_type_to_target = {"tbe": "Ascend", "pyfunc": "CPU"}
+            func_type_to_target = {"tbe": ASCEND, "pyfunc": CPU}
             target = func_type_to_target.get(self.func_type)
         if target not in self.supported_targets:
             raise ValueError("{}, target set in registration information must be one of {}, but got {}"
@@ -922,14 +939,14 @@ class Custom(ops.PrimitiveWithInfer):
     def _get_imply_type(self, reg_info, target):
         """Get imply_typ information."""
-        # Get imply_type from reg_info["imply_type"]
-        if isinstance(reg_info, dict) and isinstance(reg_info.get("imply_type"), str) and \
-                reg_info["imply_type"].strip():
-            return reg_info["imply_type"]
+        # Get imply_type from reg_info[IMPLY_TYPE]
+        if isinstance(reg_info, dict) and isinstance(reg_info.get(IMPLY_TYPE), str) and \
+                reg_info[IMPLY_TYPE].strip():
+            return reg_info[IMPLY_TYPE]
         # Infer imply_type from func_type
-        func_type_to_imply_type = {"hybrid": "AKG", "akg": "AKG", "tbe": "TBE", "aicpu": "AiCPU", "pyfunc": target,
-                                   "julia": target, "aot": "BiSheng" if target == "Ascend" else target}
-        return func_type_to_imply_type.get(self.func_type, "AKG")
+        func_type_to_imply_type = {"hybrid": AKG, "akg": AKG, "tbe": TBE, "aicpu": "AiCPU", "pyfunc": target,
+                                   "julia": target, "aot": "BiSheng" if target == ASCEND else target}
+        return func_type_to_imply_type.get(self.func_type, AKG)
     def _save_attr(self, reg_info):
         """Save input_names and attr_names of current func."""
@@ -943,18 +960,18 @@ class Custom(ops.PrimitiveWithInfer):
             return value
         tensor_inputs = _get_value_list("inputs")
-        attr = _get_value_list("attr")
+        attr = _get_value_list(KEY_ATTR)
         input_names = []  # include tensor input names and attr input names
         attr_names = []
         pure_input_names = []
         for item in tensor_inputs:
-            if isinstance(item, dict) and item.get("name") is not None:
-                input_names.append(item["name"])
-                pure_input_names.append(item["name"])
+            if isinstance(item, dict) and item.get(KEY_NAME) is not None:
+                input_names.append(item[KEY_NAME])
+                pure_input_names.append(item[KEY_NAME])
         # attr is converted from inputs only when graph mode or when inputs name is also in reg info
         attr_to_input_safe = bool(input_names) or context.get_context("mode") == ms.GRAPH_MODE
         for item in attr:
-            if isinstance(item, dict) and item.get("name") is not None:
+            if isinstance(item, dict) and item.get(KEY_NAME) is not None:
                 # for custom op with function tbe, we always add attrs to inputs as we don't
                 # deal with attr value here and leave them to the backend process to fit the
                 # usual process of tbe op compiling in mindspore
@@ -963,9 +980,9 @@ class Custom(ops.PrimitiveWithInfer):
                 # add attr name to input name only when the value of attr is None in reg info
                 # as we need to get values of attrs from inputs
                 if attr_to_input_safe and (self.func_type == "tbe" or item.get("value", None) is None):
-                    input_names.append(item["name"])
-                attr_names.append(item["name"])
-        cur_attr = {"input_names": input_names, "attr_names": attr_names, "pure_input_names": pure_input_names}
+                    input_names.append(item[KEY_NAME])
+                attr_names.append(item[KEY_NAME])
+        cur_attr = {INPUT_NAMES: input_names, ATTR_NAMES: attr_names, "pure_input_names": pure_input_names}
         # If func does not have attr, save current attr.
         # Else, check if current attr is same as previous saved one.
         prev_attr_names = attr_names
@@ -974,13 +991,13 @@ class Custom(ops.PrimitiveWithInfer):
             if not isinstance(func_attr, dict):
                 setattr(self.func, "func_attr", cur_attr)
             else:
-                prev_attr_names = func_attr.get("attr_names")
+                prev_attr_names = func_attr.get(ATTR_NAMES)
         elif isinstance(self.func, str):
             func_attr = Custom.attr_dict.get(self.func)
             if not isinstance(func_attr, dict):
                 Custom.attr_dict[self.func] = cur_attr
             else:
-                prev_attr_names = func_attr.get("attr_names")
+                prev_attr_names = func_attr.get(ATTR_NAMES)
         if attr_names != prev_attr_names:
             raise ValueError("{}, attr names set in registration information must be the same as previous saved one, "
                              "but got {} vs {}".format(self.log_prefix, attr_names, prev_attr_names))
@@ -989,23 +1006,23 @@ class Custom(ops.PrimitiveWithInfer):
         """Add primitive_target to primitive's attr."""
         registered_targets = self._get_registered_targets()
         if self.func_type == "pyfunc":
-            self.set_device("CPU")
-            if registered_targets and registered_targets != ["CPU"]:
+            self.set_device(CPU)
+            if registered_targets and registered_targets != [CPU]:
                 logger.warning("{}, only supports CPU platform, but got registered target {}. "
                                "We will run it on CPU".format(self.log_prefix, registered_targets))
         elif self.func_type == "aot":
             if len(registered_targets) != 1:
                 logger.info("{}, target will be set according to context.".format(self.log_prefix))
-            elif registered_targets == ["GPU"]:
-                self.set_device("GPU")
-            elif registered_targets == ["CPU"]:
-                self.set_device("CPU")
+            elif registered_targets == [GPU]:
+                self.set_device(GPU)
+            elif registered_targets == [CPU]:
+                self.set_device(CPU)
         elif self.func_type == "julia":
-            self.set_device("CPU")
+            self.set_device(CPU)
             device_target = context.get_context('device_target')
-            if device_target == "CPU":
+            if device_target == CPU:
                 pass
-            elif device_target == "GPU" and registered_targets and registered_targets == ["CPU"]:
+            elif device_target == GPU and registered_targets and registered_targets == [CPU]:
                 logger.warning("{}, only supports CPU platform, but got registered target {}. "
                                "We will run it on CPU".format(self.log_prefix, registered_targets))
             else:
@@ -1028,15 +1045,15 @@ class Custom(ops.PrimitiveWithInfer):
         elif isinstance(self.func, str):
             func_attr = Custom.attr_dict.get(self.func)
         if isinstance(func_attr, dict):
-            _add_prim_attr("input_names")
-            _add_prim_attr("attr_names")
+            _add_prim_attr(INPUT_NAMES)
+            _add_prim_attr(ATTR_NAMES)
             _add_prim_attr("pure_input_names")
         self._add_prim_target()
         if callable(self.func) and callable(self.out_shape):
-            if hasattr(self.out_shape, "type") and getattr(self.out_shape, "type") == "autodiff":
-                self.add_prim_attr("autodiff", True)
+            if hasattr(self.out_shape, "type") and getattr(self.out_shape, "type") == AUTO_DIFF:
+                self.add_prim_attr(AUTO_DIFF, True)
         else:
-            self.add_prim_attr("autodiff", False)
+            self.add_prim_attr(AUTO_DIFF, False)
     def _hybrid_autodiff(self, input_func_type):
         """generate backward op for a custom hybrid op"""
@@ -1052,7 +1069,7 @@ class Custom(ops.PrimitiveWithInfer):
             def infer_func(*args):
                 return args[:inputs_num]
-            setattr(infer_func, "type", "autodiff")
+            setattr(infer_func, "type", AUTO_DIFF)
             op = Custom(func=self.func, out_shape=infer_func, out_dtype=infer_func,
                         func_type=input_func_type, bprop=True)
             self.bprop = grad_func(op)