mindspore 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mindspore/.commit_id +1 -1
- mindspore/_akg/akg/composite/build_module.py +104 -20
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
- mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
- mindspore/_akg/akg/utils/kernel_exec.py +41 -15
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
- mindspore/_akg/akg/utils/util.py +56 -1
- mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_checkparam.py +3 -3
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/splitter.py +3 -2
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +14 -11
- mindspore/_extends/remote/kernel_build_server.py +2 -1
- mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/common/_utils.py +16 -0
- mindspore/common/api.py +1 -1
- mindspore/common/auto_dynamic_shape.py +81 -85
- mindspore/common/dump.py +1 -1
- mindspore/common/tensor.py +3 -20
- mindspore/config/op_info.config +1 -1
- mindspore/context.py +11 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets_standard_format.py +5 -0
- mindspore/dataset/vision/transforms.py +21 -21
- mindspore/experimental/optim/adam.py +1 -1
- mindspore/gen_ops.py +1 -1
- mindspore/include/api/model.h +17 -0
- mindspore/include/api/status.h +8 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/nn/cell.py +0 -3
- mindspore/nn/layer/activation.py +4 -5
- mindspore/nn/layer/conv.py +39 -23
- mindspore/nn/layer/flash_attention.py +54 -129
- mindspore/nn/layer/math.py +3 -7
- mindspore/nn/layer/rnn_cells.py +5 -5
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +12 -3
- mindspore/numpy/utils_const.py +5 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
- mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_utils/utils.py +2 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
- mindspore/ops/function/array_func.py +10 -7
- mindspore/ops/function/grad/grad_func.py +0 -1
- mindspore/ops/function/nn_func.py +98 -9
- mindspore/ops/function/random_func.py +2 -1
- mindspore/ops/op_info_register.py +24 -21
- mindspore/ops/operations/__init__.py +6 -2
- mindspore/ops/operations/_grad_ops.py +25 -6
- mindspore/ops/operations/_inner_ops.py +155 -23
- mindspore/ops/operations/array_ops.py +9 -7
- mindspore/ops/operations/comm_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +85 -68
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +7 -6
- mindspore/ops/operations/nn_ops.py +193 -49
- mindspore/parallel/_parallel_serialization.py +10 -3
- mindspore/parallel/_tensor.py +4 -1
- mindspore/parallel/checkpoint_transform.py +13 -2
- mindspore/parallel/shard.py +17 -10
- mindspore/profiler/common/util.py +1 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
- mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
- mindspore/profiler/parser/ascend_op_generator.py +1 -1
- mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
- mindspore/profiler/parser/base_timeline_generator.py +1 -1
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
- mindspore/profiler/parser/framework_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +19 -0
- mindspore/profiler/profiling.py +46 -24
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/parsers/for_parser.py +7 -7
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/rewrite/symbol_tree.py +1 -4
- mindspore/run_check/_check_version.py +5 -3
- mindspore/safeguard/rewrite_obfuscation.py +52 -28
- mindspore/scipy/ops.py +55 -5
- mindspore/scipy/optimize/__init__.py +3 -2
- mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
- mindspore/train/callback/_summary_collector.py +1 -1
- mindspore/train/dataset_helper.py +1 -0
- mindspore/train/model.py +2 -2
- mindspore/train/serialization.py +97 -11
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +23 -7
- mindspore/version.py +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
|
@@ -660,14 +660,14 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
|
|
|
660
660
|
|
|
661
661
|
Note:
|
|
662
662
|
If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
|
|
663
|
+
On Ascend, if `on_value` is Int64 dtype, `indices` must be Int64 dtype.
|
|
663
664
|
|
|
664
665
|
Args:
|
|
665
666
|
indices(Tensor): A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
|
|
666
667
|
Data type must be int32 or int64.
|
|
667
668
|
depth(int): A scalar defining the depth of the one-hot dimension.
|
|
668
669
|
on_value(Union[Tensor, int, float], optional): A value to fill in output when `indices[j] = i`.
|
|
669
|
-
|
|
670
|
-
bool, complex64, complex128. Default: ``1`` .
|
|
670
|
+
Data type must be int32, int64, float16 or float32. Default: ``1`` .
|
|
671
671
|
off_value(Union[Tensor, int, float], optional): A value to fill in output when `indices[j] != i`.
|
|
672
672
|
Has the same data type as `on_value`. Default: ``0`` .
|
|
673
673
|
axis(int, optional): Position to insert the value. e.g. If shape of `self` is :math:`(N, C)`, and `axis` is -1,
|
|
@@ -676,7 +676,8 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
|
|
|
676
676
|
Default: ``-1`` .
|
|
677
677
|
|
|
678
678
|
Returns:
|
|
679
|
-
Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)
|
|
679
|
+
Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`,
|
|
680
|
+
and it has the same data type as `on_value`.
|
|
680
681
|
|
|
681
682
|
Raises:
|
|
682
683
|
TypeError: If `axis` or `depth` is not an int.
|
|
@@ -1734,7 +1735,11 @@ def flatten(input, order='C', *, start_dim=1, end_dim=-1):
|
|
|
1734
1735
|
raise TypeError(f"For 'flatten', both 'start_dim' and 'end_dim' must be int.")
|
|
1735
1736
|
check_flatten_order_const(order)
|
|
1736
1737
|
if order == 'F':
|
|
1737
|
-
|
|
1738
|
+
x_rank = rank_(input)
|
|
1739
|
+
# If input is a 0-dimensional Tensor, a 1-dimensional Tensor will be returned.
|
|
1740
|
+
if x_rank in (0, 1):
|
|
1741
|
+
return reshape_(input, (-1,))
|
|
1742
|
+
perm = ops.make_range(0, x_rank)
|
|
1738
1743
|
new_order = ops.tuple_reversed(perm)
|
|
1739
1744
|
input = _get_cache_prim(P.Transpose)()(input, new_order)
|
|
1740
1745
|
|
|
@@ -2161,8 +2166,6 @@ def concat(tensors, axis=0):
|
|
|
2161
2166
|
|
|
2162
2167
|
Tutorial Examples:
|
|
2163
2168
|
- `Tensor - Tensor Operation <https://mindspore.cn/tutorials/en/r2.2/beginner/tensor.html#tensor-operation>`_
|
|
2164
|
-
- `FGSM Network Adversarial Attack - Implementing FGSM
|
|
2165
|
-
<https://mindspore.cn/tutorials/application/en/r2.2/cv/fgsm.html#implementing-fgsm>`_
|
|
2166
2169
|
- `Vision Transformer Image Classification - Building ViT as a whole
|
|
2167
2170
|
<https://mindspore.cn/tutorials/application/en/r2.2/cv/vit.html#building-vit-as-a-whole>`_
|
|
2168
2171
|
- `Sentiment Classification Implemented by RNN - Dense
|
|
@@ -6828,7 +6831,7 @@ def diagonal(input, offset=0, dim1=0, dim2=1):
|
|
|
6828
6831
|
"""
|
|
6829
6832
|
x_ndim = input.ndim
|
|
6830
6833
|
if x_ndim < 2:
|
|
6831
|
-
raise ValueError(f"ops.diagonal
|
|
6834
|
+
raise ValueError(f"For 'ops.diagonal', the original tensor requires at least two dimensions, but got {x_ndim}")
|
|
6832
6835
|
_check_attr_dtype("dim1", dim1, [int], "diagonal")
|
|
6833
6836
|
_check_attr_dtype("dim2", dim2, [int], "diagonal")
|
|
6834
6837
|
dtype = input.dtype
|
|
@@ -4609,6 +4609,19 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
|
|
|
4609
4609
|
|
|
4610
4610
|
- **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
4611
4611
|
It has the same data type as `x`.
|
|
4612
|
+
|
|
4613
|
+
.. math::
|
|
4614
|
+
D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
|
|
4615
|
+
(\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
|
|
4616
|
+
|
|
4617
|
+
.. math::
|
|
4618
|
+
H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
|
|
4619
|
+
(\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
|
|
4620
|
+
|
|
4621
|
+
.. math::
|
|
4622
|
+
W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
|
|
4623
|
+
(\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
|
|
4624
|
+
|
|
4612
4625
|
- **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
|
|
4613
4626
|
only when `return_indices` is ``True`` .
|
|
4614
4627
|
|
|
@@ -6037,20 +6050,20 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
6037
6050
|
|
|
6038
6051
|
.. math::
|
|
6039
6052
|
\begin{array}{ll} \\
|
|
6040
|
-
D_{out}
|
|
6041
|
-
H_{out}
|
|
6042
|
-
W_{out}
|
|
6053
|
+
D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
|
|
6054
|
+
H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
|
|
6055
|
+
W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
|
|
6043
6056
|
\end{array}
|
|
6044
6057
|
|
|
6045
6058
|
`pad_mode` is ``"valid"``:
|
|
6046
6059
|
|
|
6047
6060
|
.. math::
|
|
6048
6061
|
\begin{array}{ll} \\
|
|
6049
|
-
D_{out}
|
|
6062
|
+
D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
|
|
6050
6063
|
{\text{stride[0]}} + 1} \right \rfloor \\
|
|
6051
|
-
H_{out}
|
|
6064
|
+
H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
|
|
6052
6065
|
{\text{stride[1]}} + 1} \right \rfloor \\
|
|
6053
|
-
W_{out}
|
|
6066
|
+
W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
|
|
6054
6067
|
{\text{stride[2]}} + 1} \right \rfloor \\
|
|
6055
6068
|
\end{array}
|
|
6056
6069
|
|
|
@@ -6058,11 +6071,11 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
6058
6071
|
|
|
6059
6072
|
.. math::
|
|
6060
6073
|
\begin{array}{ll} \\
|
|
6061
|
-
D_{out}
|
|
6074
|
+
D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
|
|
6062
6075
|
\text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
|
|
6063
|
-
H_{out}
|
|
6076
|
+
H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
|
|
6064
6077
|
\text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
|
|
6065
|
-
W_{out}
|
|
6078
|
+
W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
|
|
6066
6079
|
\text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
|
|
6067
6080
|
\end{array}
|
|
6068
6081
|
|
|
@@ -7431,6 +7444,82 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
|
|
|
7431
7444
|
return out
|
|
7432
7445
|
|
|
7433
7446
|
|
|
7447
|
+
def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
|
|
7448
|
+
actual_seq_lengths_kv, deq_scale1, quant_scale1,
|
|
7449
|
+
deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
|
|
7450
|
+
next_tokens=0, input_layout='BSH',
|
|
7451
|
+
num_key_value_heads=0, sparse_mode=0):
|
|
7452
|
+
r"""
|
|
7453
|
+
The interface for fully inference.
|
|
7454
|
+
B -- Batch size
|
|
7455
|
+
S -- Sequence length
|
|
7456
|
+
H -- Hidden size
|
|
7457
|
+
|
|
7458
|
+
Note:
|
|
7459
|
+
is only supported on ascend910B
|
|
7460
|
+
|
|
7461
|
+
.. warning::
|
|
7462
|
+
This is an experimental API that is subject to change or deletion.
|
|
7463
|
+
|
|
7464
|
+
Inputs:
|
|
7465
|
+
query (Tensor) - The query tensor with data type of float16 or float32.
|
|
7466
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7467
|
+
key (Tensor) - The key tensor with data type of float16 or float32.
|
|
7468
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7469
|
+
value (Tensor) - The value tensor with data type of float16 or float32.
|
|
7470
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7471
|
+
padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
|
|
7472
|
+
attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
7473
|
+
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
7474
|
+
actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7475
|
+
actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7476
|
+
dep_scale1 (Tensor)
|
|
7477
|
+
quant_scale1 (Tensor)
|
|
7478
|
+
deq_scale2 (Tensor)
|
|
7479
|
+
quant_scale2 (Tensor)
|
|
7480
|
+
quant_offset2 (Tensor)
|
|
7481
|
+
num_heads (int): The number of heads.
|
|
7482
|
+
scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
|
|
7483
|
+
Muls in the calculation. Default: 1.0.
|
|
7484
|
+
pre_tokens (int): Previous tokens. Default: 2147483547.
|
|
7485
|
+
next_tokens (int): next tokens. Default: 0.
|
|
7486
|
+
indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
|
|
7487
|
+
indicates that the data blocks in the upper triangle are not involved in the calculation
|
|
7488
|
+
input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
|
|
7489
|
+
num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
|
|
7490
|
+
The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
|
|
7491
|
+
sparse_mode (int): Default: 0
|
|
7492
|
+
|
|
7493
|
+
|
|
7494
|
+
Outputs:
|
|
7495
|
+
attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7496
|
+
|
|
7497
|
+
Supported Platforms:
|
|
7498
|
+
``Ascend``
|
|
7499
|
+
|
|
7500
|
+
Examples:
|
|
7501
|
+
>>> from mindspore.ops.function.nn_func import prompt_flash_attention
|
|
7502
|
+
>>> from mindspore import Tensor
|
|
7503
|
+
>>> import numpy as np
|
|
7504
|
+
>>> B = 1
|
|
7505
|
+
>>> N = 16
|
|
7506
|
+
>>> S = 256
|
|
7507
|
+
>>> D = 16
|
|
7508
|
+
>>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7509
|
+
>>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7510
|
+
>>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7511
|
+
>>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
|
|
7512
|
+
None, N, input_layout='BNSD')
|
|
7513
|
+
>>> print(out[0].shape)
|
|
7514
|
+
(1, 16, 256, 16)
|
|
7515
|
+
"""
|
|
7516
|
+
|
|
7517
|
+
pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
|
|
7518
|
+
num_key_value_heads, sparse_mode)
|
|
7519
|
+
return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
|
|
7520
|
+
quant_scale1, deq_scale2, quant_scale2, quant_offset2)
|
|
7521
|
+
|
|
7522
|
+
|
|
7434
7523
|
__all__ = [
|
|
7435
7524
|
'adaptive_avg_pool1d',
|
|
7436
7525
|
'adaptive_avg_pool2d',
|
|
@@ -1180,7 +1180,8 @@ def randint_like(input, low, high, seed=None, *, dtype=None):
|
|
|
1180
1180
|
cast_ = P.Cast()
|
|
1181
1181
|
low_ = Tensor(low, mstype.int32)
|
|
1182
1182
|
high_ = Tensor(high, mstype.int32)
|
|
1183
|
-
|
|
1183
|
+
size_ = Tensor(size, mstype.int32)
|
|
1184
|
+
output = rand_op(size_, low_, high_)
|
|
1184
1185
|
return cast_(output, dtype)
|
|
1185
1186
|
|
|
1186
1187
|
|
|
@@ -36,13 +36,16 @@ if platform.system() == "Linux":
|
|
|
36
36
|
BUILT_IN_OPS_REGISTER_PATH = "mindspore/ops/_op_impl"
|
|
37
37
|
BUILT_IN_CUSTOM_OPS_REGISTER_PATH = "mindspore/ops/_op_impl/_custom_op"
|
|
38
38
|
|
|
39
|
+
KEY_NAME = "name"
|
|
40
|
+
ASCEND_CUSTOM_OPP_PATH = "ASCEND_CUSTOM_OPP_PATH"
|
|
39
41
|
|
|
40
|
-
|
|
42
|
+
|
|
43
|
+
def _get_reg_info_attr(op_info, attr_name, default_value=None):
|
|
41
44
|
"""get attr value"""
|
|
42
45
|
for _, item in enumerate(op_info.get("attr", [])):
|
|
43
|
-
if item.get(
|
|
46
|
+
if item.get(KEY_NAME) == attr_name:
|
|
44
47
|
return item.get("defaultValue")
|
|
45
|
-
return
|
|
48
|
+
return default_value
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
class _CustomInstaller:
|
|
@@ -66,12 +69,12 @@ class _CustomInstaller:
|
|
|
66
69
|
@staticmethod
|
|
67
70
|
def _set_env(custom_opp_path):
|
|
68
71
|
"""set custom file path to env"""
|
|
69
|
-
if not os.environ.get(
|
|
70
|
-
os.environ[
|
|
72
|
+
if not os.environ.get(ASCEND_CUSTOM_OPP_PATH):
|
|
73
|
+
os.environ[ASCEND_CUSTOM_OPP_PATH] = custom_opp_path
|
|
71
74
|
else:
|
|
72
|
-
paths = os.environ[
|
|
75
|
+
paths = os.environ[ASCEND_CUSTOM_OPP_PATH].split(':')
|
|
73
76
|
if custom_opp_path not in paths:
|
|
74
|
-
os.environ[
|
|
77
|
+
os.environ[ASCEND_CUSTOM_OPP_PATH] = custom_opp_path + ':' + os.environ[ASCEND_CUSTOM_OPP_PATH]
|
|
75
78
|
|
|
76
79
|
@staticmethod
|
|
77
80
|
def _create_dir(*dir_names):
|
|
@@ -94,11 +97,11 @@ class _CustomInstaller:
|
|
|
94
97
|
_CustomInstaller.copied_paths.append(src_path)
|
|
95
98
|
if os.path.isfile(src_path):
|
|
96
99
|
lock_file = os.path.join(dst_dir, "file.lock")
|
|
97
|
-
with open(lock_file,
|
|
100
|
+
with os.fdopen(os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
|
|
98
101
|
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
99
102
|
shutil.copy(src_path, dst_dir)
|
|
100
103
|
|
|
101
|
-
def
|
|
104
|
+
def check(self):
|
|
102
105
|
"""check if the reg info need written"""
|
|
103
106
|
if platform.system() != "Linux":
|
|
104
107
|
return False
|
|
@@ -153,12 +156,12 @@ class _CustomInstaller:
|
|
|
153
156
|
# attr
|
|
154
157
|
attrs_name = []
|
|
155
158
|
for _, item in enumerate(self.op_info.get("attr", [])):
|
|
156
|
-
attr_name = item.get(
|
|
159
|
+
attr_name = item.get(KEY_NAME)
|
|
157
160
|
attrs_name.append(attr_name)
|
|
158
161
|
key = "attr_" + attr_name
|
|
159
162
|
op_info[key] = {}
|
|
160
163
|
for k, v in item.items():
|
|
161
|
-
if k !=
|
|
164
|
+
if k != KEY_NAME:
|
|
162
165
|
op_info[key][k] = v
|
|
163
166
|
if attrs_name:
|
|
164
167
|
op_info["attr"] = {"list": ",".join(attrs_name)}
|
|
@@ -171,7 +174,7 @@ class _CustomInstaller:
|
|
|
171
174
|
item = inputs[i] if i < input_num else outputs[i - input_num]
|
|
172
175
|
key = "input" if i < input_num else "output"
|
|
173
176
|
key += str(item.get("index"))
|
|
174
|
-
op_info[key] = {
|
|
177
|
+
op_info[key] = {KEY_NAME: item.get(KEY_NAME),
|
|
175
178
|
"paramType": item.get("paramType", "required"),
|
|
176
179
|
"shape": item.get("shape", "all")}
|
|
177
180
|
dtype, formats = _get_dtype_format(i)
|
|
@@ -181,7 +184,8 @@ class _CustomInstaller:
|
|
|
181
184
|
op_info[key]["format"] = ",".join(formats)
|
|
182
185
|
return op_info
|
|
183
186
|
|
|
184
|
-
|
|
187
|
+
@staticmethod
|
|
188
|
+
def _gen_ai_cpu_reg_info(so_file):
|
|
185
189
|
"""generate reg info"""
|
|
186
190
|
op_info = {"opInfo": {"computeCost": "100",
|
|
187
191
|
"engine": "DNN_VM_AICPU",
|
|
@@ -198,7 +202,7 @@ class _CustomInstaller:
|
|
|
198
202
|
repo = {}
|
|
199
203
|
save_path = os.path.join(dst_dir, file_name)
|
|
200
204
|
lock_file = os.path.join(dst_dir, "file.lock")
|
|
201
|
-
with open(lock_file,
|
|
205
|
+
with os.fdopen(os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
|
|
202
206
|
fcntl.flock(f.fileno(), fcntl.LOCK_EX)
|
|
203
207
|
if os.path.isfile(save_path):
|
|
204
208
|
with open(save_path, 'r') as fr:
|
|
@@ -211,7 +215,7 @@ class _CustomInstaller:
|
|
|
211
215
|
|
|
212
216
|
def run(self):
|
|
213
217
|
"""save reg info to file"""
|
|
214
|
-
if not self.
|
|
218
|
+
if not self.check():
|
|
215
219
|
return
|
|
216
220
|
so_name = _get_reg_info_attr(self.op_info, "cust_aicpu")
|
|
217
221
|
if so_name:
|
|
@@ -380,7 +384,6 @@ class RegOp:
|
|
|
380
384
|
"""
|
|
381
385
|
if not isinstance(value, str):
|
|
382
386
|
raise TypeError("%s value must be str" % str(value))
|
|
383
|
-
return True
|
|
384
387
|
|
|
385
388
|
def _is_int(self, value):
|
|
386
389
|
"""
|
|
@@ -394,7 +397,6 @@ class RegOp:
|
|
|
394
397
|
"""
|
|
395
398
|
if not isinstance(value, int):
|
|
396
399
|
raise TypeError("%s value must be int" % str(value))
|
|
397
|
-
return True
|
|
398
400
|
|
|
399
401
|
def _is_bool(self, value):
|
|
400
402
|
"""
|
|
@@ -408,7 +410,6 @@ class RegOp:
|
|
|
408
410
|
"""
|
|
409
411
|
if not isinstance(value, bool):
|
|
410
412
|
raise TypeError("%s value must be bool" % str(value))
|
|
411
|
-
return True
|
|
412
413
|
|
|
413
414
|
@staticmethod
|
|
414
415
|
def _is_list(value):
|
|
@@ -423,7 +424,6 @@ class RegOp:
|
|
|
423
424
|
"""
|
|
424
425
|
if not isinstance(value, list):
|
|
425
426
|
raise TypeError("%s value must be list" % str(value))
|
|
426
|
-
return True
|
|
427
427
|
|
|
428
428
|
def _check_param(self, param_list, key_list, fn_list, kwargs):
|
|
429
429
|
"""
|
|
@@ -491,7 +491,9 @@ class RegOp:
|
|
|
491
491
|
self._is_string(arg[1])
|
|
492
492
|
if len(arg) == 3:
|
|
493
493
|
self._is_string(arg[2])
|
|
494
|
-
|
|
494
|
+
dtype_format.append(arg)
|
|
495
|
+
else:
|
|
496
|
+
dtype_format.append(arg)
|
|
495
497
|
self.dtype_format_.append(tuple(dtype_format))
|
|
496
498
|
return self
|
|
497
499
|
|
|
@@ -920,7 +922,8 @@ class TBERegOp(RegOp):
|
|
|
920
922
|
Args:
|
|
921
923
|
pattern (str): Value of op pattern, e.g. "broadcast", "reduce". Default: ``None`` .
|
|
922
924
|
"""
|
|
923
|
-
if pattern is not None
|
|
925
|
+
if pattern is not None:
|
|
926
|
+
self._is_string(pattern)
|
|
924
927
|
self.op_pattern_ = pattern
|
|
925
928
|
return self
|
|
926
929
|
|
|
@@ -118,7 +118,7 @@ from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSpa
|
|
|
118
118
|
Dilation2D, DataFormatVecPermute, DeformableOffsets, Dense, FractionalAvgPool,
|
|
119
119
|
FractionalMaxPool, FractionalMaxPool3DWithFixedKsize, FractionalMaxPoolWithFixedKsize,
|
|
120
120
|
GridSampler2D, TripletMarginLoss, UpsampleNearest3D, UpsampleTrilinear3D, PadV3, ChannelShuffle,
|
|
121
|
-
GLU, MaxUnpool3D, Pdist)
|
|
121
|
+
GLU, MaxUnpool3D, Pdist, RmsNorm, PagedAttention, PagedAttentionMask, ReshapeAndCache)
|
|
122
122
|
from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
|
|
123
123
|
ConfusionMatrix, UpdateState, Load, StopGradient,
|
|
124
124
|
CheckValid, Partial, Depend, Push, Pull, PyExecute, PyFunc, _DynamicLossScale,
|
|
@@ -691,7 +691,11 @@ __all__ = [
|
|
|
691
691
|
"IndexPut",
|
|
692
692
|
"MaskedScatter",
|
|
693
693
|
"Ormqr",
|
|
694
|
-
"RandpermV2"
|
|
694
|
+
"RandpermV2",
|
|
695
|
+
"RmsNorm",
|
|
696
|
+
"PagedAttention",
|
|
697
|
+
"PagedAttentionMask",
|
|
698
|
+
"ReshapeAndCache"
|
|
695
699
|
]
|
|
696
700
|
|
|
697
701
|
__custom__ = [
|
|
@@ -3845,7 +3845,7 @@ class FlashAttentionScoreGrad(Primitive):
|
|
|
3845
3845
|
"""
|
|
3846
3846
|
@prim_attr_register
|
|
3847
3847
|
def __init__(self, head_num, keep_prob=1.0, scale_value=1.0, pre_tokens=65536, next_tokens=65536, inner_precise=1,
|
|
3848
|
-
input_layout='BSH'):
|
|
3848
|
+
input_layout='BSH', sparse_mode=0):
|
|
3849
3849
|
"""Initialize FlashAttentionScoreGrad."""
|
|
3850
3850
|
validator.check_value_type('head_num', head_num, [int], self.name)
|
|
3851
3851
|
validator.check_value_type('keep_prob', keep_prob, [int, float], self.name)
|
|
@@ -3855,11 +3855,30 @@ class FlashAttentionScoreGrad(Primitive):
|
|
|
3855
3855
|
validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
|
|
3856
3856
|
validator.check_value_type('next_tokens', next_tokens, [int], self.name)
|
|
3857
3857
|
validator.check_value_type('inner_precise', inner_precise, [int], self.name)
|
|
3858
|
+
validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
|
|
3858
3859
|
if inner_precise not in [0, 1]:
|
|
3859
3860
|
raise ValueError(f"Attribute 'inner_precise' must be either 0 or 1, but got {inner_precise}")
|
|
3860
3861
|
validator.check_value_type('input_layout', input_layout, [str], self.name)
|
|
3861
|
-
if input_layout not in ["BSH"]:
|
|
3862
|
-
raise ValueError(f"Attribute 'input_layout' must be either '
|
|
3863
|
-
self.init_prim_io_names(inputs=['query', 'key', 'value', '
|
|
3864
|
-
'
|
|
3865
|
-
|
|
3862
|
+
if input_layout not in ["BSH", "BNSD"]:
|
|
3863
|
+
raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
|
|
3864
|
+
self.init_prim_io_names(inputs=['query', 'key', 'value', 'dy', 'pse_shift', 'drop_mask', "padding_mask",
|
|
3865
|
+
'attn_mask', 'softmax_max', 'softmax_sum', 'softmax_out', 'attention_in',
|
|
3866
|
+
'prefix'],
|
|
3867
|
+
outputs=['dq', 'dk', 'dv', 'dpse'])
|
|
3868
|
+
|
|
3869
|
+
|
|
3870
|
+
class RmsNormGrad(Primitive):
|
|
3871
|
+
r"""
|
|
3872
|
+
Calculates the gradient of RmsNorm operation.
|
|
3873
|
+
.. warning::
|
|
3874
|
+
This is an experimental API that is subject to change or deletion.
|
|
3875
|
+
|
|
3876
|
+
Supported Platforms:
|
|
3877
|
+
``Ascend``
|
|
3878
|
+
"""
|
|
3879
|
+
|
|
3880
|
+
@prim_attr_register
|
|
3881
|
+
def __init__(self):
|
|
3882
|
+
"""Initialize RmsNormGrad."""
|
|
3883
|
+
self.init_prim_io_names(inputs=["dy", "x", "rstd", "gamma"],
|
|
3884
|
+
outputs=["dx", "dgamma"])
|