mindspore 2.2.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.11__cp37-cp37m-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_akg/akg/composite/build_module.py +104 -20
  3. mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
  4. mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
  5. mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
  6. mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
  7. mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
  8. mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
  9. mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
  10. mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
  11. mindspore/_akg/akg/utils/kernel_exec.py +41 -15
  12. mindspore/_akg/akg/utils/tbe_codegen_utils.py +27 -6
  13. mindspore/_akg/akg/utils/util.py +56 -1
  14. mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
  15. mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
  16. mindspore/_checkparam.py +3 -3
  17. mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
  18. mindspore/_extends/graph_kernel/splitter.py +3 -2
  19. mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
  20. mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
  21. mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
  22. mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
  23. mindspore/_extends/parse/__init__.py +3 -2
  24. mindspore/_extends/parse/parser.py +6 -1
  25. mindspore/_extends/parse/standard_method.py +14 -11
  26. mindspore/_extends/remote/kernel_build_server.py +2 -1
  27. mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
  28. mindspore/bin/cache_admin +0 -0
  29. mindspore/bin/cache_server +0 -0
  30. mindspore/common/_utils.py +16 -0
  31. mindspore/common/api.py +1 -1
  32. mindspore/common/auto_dynamic_shape.py +81 -85
  33. mindspore/common/dump.py +1 -1
  34. mindspore/common/tensor.py +3 -20
  35. mindspore/config/op_info.config +1 -1
  36. mindspore/context.py +11 -4
  37. mindspore/dataset/engine/cache_client.py +8 -5
  38. mindspore/dataset/engine/datasets_standard_format.py +5 -0
  39. mindspore/dataset/vision/transforms.py +21 -21
  40. mindspore/experimental/optim/adam.py +1 -1
  41. mindspore/gen_ops.py +1 -1
  42. mindspore/include/api/model.h +17 -0
  43. mindspore/include/api/status.h +8 -3
  44. mindspore/lib/libdnnl.so.2 +0 -0
  45. mindspore/lib/libmindspore.so +0 -0
  46. mindspore/lib/libmindspore_backend.so +0 -0
  47. mindspore/lib/libmindspore_common.so +0 -0
  48. mindspore/lib/libmindspore_core.so +0 -0
  49. mindspore/lib/libmindspore_glog.so.0 +0 -0
  50. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  51. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  52. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  53. mindspore/lib/libmindspore_shared_lib.so +0 -0
  54. mindspore/lib/libnnacl.so +0 -0
  55. mindspore/lib/libopencv_core.so.4.5 +0 -0
  56. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  57. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  58. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
  59. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
  60. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
  61. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
  62. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  63. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  64. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  65. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  66. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
  67. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
  68. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
  69. mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
  70. mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
  71. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  72. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  73. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +78 -80
  74. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  75. mindspore/lib/plugin/ascend/libakg.so +0 -0
  76. mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
  77. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  78. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  79. mindspore/lib/plugin/cpu/libakg.so +0 -0
  80. mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
  81. mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
  82. mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
  83. mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
  84. mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
  85. mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
  86. mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
  87. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  88. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  89. mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
  90. mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
  91. mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
  92. mindspore/nn/cell.py +0 -3
  93. mindspore/nn/layer/activation.py +4 -5
  94. mindspore/nn/layer/conv.py +39 -23
  95. mindspore/nn/layer/flash_attention.py +54 -129
  96. mindspore/nn/layer/math.py +3 -7
  97. mindspore/nn/layer/rnn_cells.py +5 -5
  98. mindspore/nn/wrap/__init__.py +4 -2
  99. mindspore/nn/wrap/cell_wrapper.py +12 -3
  100. mindspore/numpy/utils_const.py +5 -5
  101. mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
  102. mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
  103. mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
  104. mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
  105. mindspore/ops/_op_impl/aicpu/add.py +3 -3
  106. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  107. mindspore/ops/_utils/utils.py +2 -0
  108. mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
  109. mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
  110. mindspore/ops/function/array_func.py +10 -7
  111. mindspore/ops/function/grad/grad_func.py +0 -1
  112. mindspore/ops/function/nn_func.py +98 -9
  113. mindspore/ops/function/random_func.py +2 -1
  114. mindspore/ops/op_info_register.py +24 -21
  115. mindspore/ops/operations/__init__.py +6 -2
  116. mindspore/ops/operations/_grad_ops.py +25 -6
  117. mindspore/ops/operations/_inner_ops.py +155 -23
  118. mindspore/ops/operations/array_ops.py +9 -7
  119. mindspore/ops/operations/comm_ops.py +2 -2
  120. mindspore/ops/operations/custom_ops.py +85 -68
  121. mindspore/ops/operations/inner_ops.py +26 -3
  122. mindspore/ops/operations/math_ops.py +7 -6
  123. mindspore/ops/operations/nn_ops.py +193 -49
  124. mindspore/parallel/_parallel_serialization.py +10 -3
  125. mindspore/parallel/_tensor.py +4 -1
  126. mindspore/parallel/checkpoint_transform.py +13 -2
  127. mindspore/parallel/shard.py +17 -10
  128. mindspore/profiler/common/util.py +1 -0
  129. mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
  130. mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
  131. mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
  132. mindspore/profiler/parser/ascend_op_generator.py +1 -1
  133. mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
  134. mindspore/profiler/parser/base_timeline_generator.py +1 -1
  135. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
  136. mindspore/profiler/parser/framework_parser.py +1 -1
  137. mindspore/profiler/parser/profiler_info.py +19 -0
  138. mindspore/profiler/profiling.py +46 -24
  139. mindspore/rewrite/api/pattern_engine.py +1 -1
  140. mindspore/rewrite/parsers/for_parser.py +7 -7
  141. mindspore/rewrite/parsers/module_parser.py +4 -4
  142. mindspore/rewrite/symbol_tree.py +1 -4
  143. mindspore/run_check/_check_version.py +5 -3
  144. mindspore/safeguard/rewrite_obfuscation.py +52 -28
  145. mindspore/scipy/ops.py +55 -5
  146. mindspore/scipy/optimize/__init__.py +3 -2
  147. mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
  148. mindspore/train/callback/_summary_collector.py +1 -1
  149. mindspore/train/dataset_helper.py +1 -0
  150. mindspore/train/model.py +2 -2
  151. mindspore/train/serialization.py +97 -11
  152. mindspore/train/summary/_summary_adapter.py +1 -1
  153. mindspore/train/summary/summary_record.py +23 -7
  154. mindspore/version.py +1 -1
  155. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
  156. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +160 -151
  157. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
  158. mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
  159. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
  160. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
  161. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
  162. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
  163. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  164. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  165. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  166. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  167. /mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
  168. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
  169. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
  170. {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
@@ -660,14 +660,14 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
660
660
 
661
661
  Note:
662
662
  If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
663
+ On Ascend, if `on_value` is Int64 dtype, `indices` must be Int64 dtype.
663
664
 
664
665
  Args:
665
666
  indices(Tensor): A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
666
667
  Data type must be int32 or int64.
667
668
  depth(int): A scalar defining the depth of the one-hot dimension.
668
669
  on_value(Union[Tensor, int, float], optional): A value to fill in output when `indices[j] = i`.
669
- Support uint8, uint16, uint32, uint64, int8, int16, int32, int64, float16, float32, float64,
670
- bool, complex64, complex128. Default: ``1`` .
670
+ Data type must be int32, int64, float16 or float32. Default: ``1`` .
671
671
  off_value(Union[Tensor, int, float], optional): A value to fill in output when `indices[j] != i`.
672
672
  Has the same data type as `on_value`. Default: ``0`` .
673
673
  axis(int, optional): Position to insert the value. e.g. If shape of `self` is :math:`(N, C)`, and `axis` is -1,
@@ -676,7 +676,8 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
676
676
  Default: ``-1`` .
677
677
 
678
678
  Returns:
679
- Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`.
679
+ Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`,
680
+ and it has the same data type as `on_value`.
680
681
 
681
682
  Raises:
682
683
  TypeError: If `axis` or `depth` is not an int.
@@ -1734,7 +1735,11 @@ def flatten(input, order='C', *, start_dim=1, end_dim=-1):
1734
1735
  raise TypeError(f"For 'flatten', both 'start_dim' and 'end_dim' must be int.")
1735
1736
  check_flatten_order_const(order)
1736
1737
  if order == 'F':
1737
- perm = ops.make_range(0, ops.rank(input))
1738
+ x_rank = rank_(input)
1739
+ # If input is a 0-dimensional Tensor, a 1-dimensional Tensor will be returned.
1740
+ if x_rank in (0, 1):
1741
+ return reshape_(input, (-1,))
1742
+ perm = ops.make_range(0, x_rank)
1738
1743
  new_order = ops.tuple_reversed(perm)
1739
1744
  input = _get_cache_prim(P.Transpose)()(input, new_order)
1740
1745
 
@@ -2161,8 +2166,6 @@ def concat(tensors, axis=0):
2161
2166
 
2162
2167
  Tutorial Examples:
2163
2168
  - `Tensor - Tensor Operation <https://mindspore.cn/tutorials/en/r2.2/beginner/tensor.html#tensor-operation>`_
2164
- - `FGSM Network Adversarial Attack - Implementing FGSM
2165
- <https://mindspore.cn/tutorials/application/en/r2.2/cv/fgsm.html#implementing-fgsm>`_
2166
2169
  - `Vision Transformer Image Classification - Building ViT as a whole
2167
2170
  <https://mindspore.cn/tutorials/application/en/r2.2/cv/vit.html#building-vit-as-a-whole>`_
2168
2171
  - `Sentiment Classification Implemented by RNN - Dense
@@ -6828,7 +6831,7 @@ def diagonal(input, offset=0, dim1=0, dim2=1):
6828
6831
  """
6829
6832
  x_ndim = input.ndim
6830
6833
  if x_ndim < 2:
6831
- raise ValueError(f"ops.diagonal requires an array of at least two dimensions")
6834
+ raise ValueError(f"For 'ops.diagonal', the original tensor requires at least two dimensions, but got {x_ndim}")
6832
6835
  _check_attr_dtype("dim1", dim1, [int], "diagonal")
6833
6836
  _check_attr_dtype("dim2", dim2, [int], "diagonal")
6834
6837
  dtype = input.dtype
@@ -37,7 +37,6 @@ oneslike = P.OnesLike()
37
37
  def _check_has_aux_type(inputs):
38
38
  if not isinstance(inputs, bool):
39
39
  raise TypeError("The 'has_aux' must be bool type.")
40
- return True
41
40
 
42
41
 
43
42
  @constexpr
@@ -4609,6 +4609,19 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
4609
4609
 
4610
4610
  - **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
4611
4611
  It has the same data type as `x`.
4612
+
4613
+ .. math::
4614
+ D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
4615
+ (\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
4616
+
4617
+ .. math::
4618
+ H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
4619
+ (\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
4620
+
4621
+ .. math::
4622
+ W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
4623
+ (\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
4624
+
4612
4625
  - **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
4613
4626
  only when `return_indices` is ``True`` .
4614
4627
 
@@ -6037,20 +6050,20 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
6037
6050
 
6038
6051
  .. math::
6039
6052
  \begin{array}{ll} \\
6040
- D_{out} \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
6041
- H_{out} \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
6042
- W_{out} \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
6053
+ D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
6054
+ H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
6055
+ W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
6043
6056
  \end{array}
6044
6057
 
6045
6058
  `pad_mode` is ``"valid"``:
6046
6059
 
6047
6060
  .. math::
6048
6061
  \begin{array}{ll} \\
6049
- D_{out} \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
6062
+ D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
6050
6063
  {\text{stride[0]}} + 1} \right \rfloor \\
6051
- H_{out} \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
6064
+ H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
6052
6065
  {\text{stride[1]}} + 1} \right \rfloor \\
6053
- W_{out} \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
6066
+ W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
6054
6067
  {\text{stride[2]}} + 1} \right \rfloor \\
6055
6068
  \end{array}
6056
6069
 
@@ -6058,11 +6071,11 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
6058
6071
 
6059
6072
  .. math::
6060
6073
  \begin{array}{ll} \\
6061
- D_{out} \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
6074
+ D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
6062
6075
  \text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
6063
- H_{out} \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
6076
+ H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
6064
6077
  \text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
6065
- W_{out} \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
6078
+ W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
6066
6079
  \text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
6067
6080
  \end{array}
6068
6081
 
@@ -7431,6 +7444,82 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
7431
7444
  return out
7432
7445
 
7433
7446
 
7447
+ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
7448
+ actual_seq_lengths_kv, deq_scale1, quant_scale1,
7449
+ deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
7450
+ next_tokens=0, input_layout='BSH',
7451
+ num_key_value_heads=0, sparse_mode=0):
7452
+ r"""
7453
+ The interface for fully inference.
7454
+ B -- Batch size
7455
+ S -- Sequence length
7456
+ H -- Hidden size
7457
+
7458
+ Note:
7459
+ is only supported on ascend910B
7460
+
7461
+ .. warning::
7462
+ This is an experimental API that is subject to change or deletion.
7463
+
7464
+ Inputs:
7465
+ query (Tensor) - The query tensor with data type of float16 or float32.
7466
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7467
+ key (Tensor) - The key tensor with data type of float16 or float32.
7468
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7469
+ value (Tensor) - The value tensor with data type of float16 or float32.
7470
+ Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7471
+ padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
7472
+ attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
7473
+ For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
7474
+ actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
7475
+ actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
7476
+ dep_scale1 (Tensor)
7477
+ quant_scale1 (Tensor)
7478
+ deq_scale2 (Tensor)
7479
+ quant_scale2 (Tensor)
7480
+ quant_offset2 (Tensor)
7481
+ num_heads (int): The number of heads.
7482
+ scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
7483
+ Muls in the calculation. Default: 1.0.
7484
+ pre_tokens (int): Previous tokens. Default: 2147483547.
7485
+ next_tokens (int): next tokens. Default: 0.
7486
+ indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
7487
+ indicates that the data blocks in the upper triangle are not involved in the calculation
7488
+ input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
7489
+ num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
7490
+ The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
7491
+ sparse_mode (int): Default: 0
7492
+
7493
+
7494
+ Outputs:
7495
+ attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7496
+
7497
+ Supported Platforms:
7498
+ ``Ascend``
7499
+
7500
+ Examples:
7501
+ >>> from mindspore.ops.function.nn_func import prompt_flash_attention
7502
+ >>> from mindspore import Tensor
7503
+ >>> import numpy as np
7504
+ >>> B = 1
7505
+ >>> N = 16
7506
+ >>> S = 256
7507
+ >>> D = 16
7508
+ >>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
7509
+ >>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
7510
+ >>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
7511
+ >>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
7512
+ None, N, input_layout='BNSD')
7513
+ >>> print(out[0].shape)
7514
+ (1, 16, 256, 16)
7515
+ """
7516
+
7517
+ pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
7518
+ num_key_value_heads, sparse_mode)
7519
+ return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
7520
+ quant_scale1, deq_scale2, quant_scale2, quant_offset2)
7521
+
7522
+
7434
7523
  __all__ = [
7435
7524
  'adaptive_avg_pool1d',
7436
7525
  'adaptive_avg_pool2d',
@@ -1180,7 +1180,8 @@ def randint_like(input, low, high, seed=None, *, dtype=None):
1180
1180
  cast_ = P.Cast()
1181
1181
  low_ = Tensor(low, mstype.int32)
1182
1182
  high_ = Tensor(high, mstype.int32)
1183
- output = rand_op(size, low_, high_)
1183
+ size_ = Tensor(size, mstype.int32)
1184
+ output = rand_op(size_, low_, high_)
1184
1185
  return cast_(output, dtype)
1185
1186
 
1186
1187
 
@@ -36,13 +36,16 @@ if platform.system() == "Linux":
36
36
  BUILT_IN_OPS_REGISTER_PATH = "mindspore/ops/_op_impl"
37
37
  BUILT_IN_CUSTOM_OPS_REGISTER_PATH = "mindspore/ops/_op_impl/_custom_op"
38
38
 
39
+ KEY_NAME = "name"
40
+ ASCEND_CUSTOM_OPP_PATH = "ASCEND_CUSTOM_OPP_PATH"
39
41
 
40
- def _get_reg_info_attr(op_info, attr_name):
42
+
43
+ def _get_reg_info_attr(op_info, attr_name, default_value=None):
41
44
  """get attr value"""
42
45
  for _, item in enumerate(op_info.get("attr", [])):
43
- if item.get("name") == attr_name:
46
+ if item.get(KEY_NAME) == attr_name:
44
47
  return item.get("defaultValue")
45
- return None
48
+ return default_value
46
49
 
47
50
 
48
51
  class _CustomInstaller:
@@ -66,12 +69,12 @@ class _CustomInstaller:
66
69
  @staticmethod
67
70
  def _set_env(custom_opp_path):
68
71
  """set custom file path to env"""
69
- if not os.environ.get("ASCEND_CUSTOM_OPP_PATH"):
70
- os.environ["ASCEND_CUSTOM_OPP_PATH"] = custom_opp_path
72
+ if not os.environ.get(ASCEND_CUSTOM_OPP_PATH):
73
+ os.environ[ASCEND_CUSTOM_OPP_PATH] = custom_opp_path
71
74
  else:
72
- paths = os.environ["ASCEND_CUSTOM_OPP_PATH"].split(':')
75
+ paths = os.environ[ASCEND_CUSTOM_OPP_PATH].split(':')
73
76
  if custom_opp_path not in paths:
74
- os.environ["ASCEND_CUSTOM_OPP_PATH"] = custom_opp_path + ':' + os.environ["ASCEND_CUSTOM_OPP_PATH"]
77
+ os.environ[ASCEND_CUSTOM_OPP_PATH] = custom_opp_path + ':' + os.environ[ASCEND_CUSTOM_OPP_PATH]
75
78
 
76
79
  @staticmethod
77
80
  def _create_dir(*dir_names):
@@ -94,11 +97,11 @@ class _CustomInstaller:
94
97
  _CustomInstaller.copied_paths.append(src_path)
95
98
  if os.path.isfile(src_path):
96
99
  lock_file = os.path.join(dst_dir, "file.lock")
97
- with open(lock_file, "w") as f:
100
+ with os.fdopen(os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
98
101
  fcntl.flock(f.fileno(), fcntl.LOCK_EX)
99
102
  shutil.copy(src_path, dst_dir)
100
103
 
101
- def _check(self):
104
+ def check(self):
102
105
  """check if the reg info need written"""
103
106
  if platform.system() != "Linux":
104
107
  return False
@@ -153,12 +156,12 @@ class _CustomInstaller:
153
156
  # attr
154
157
  attrs_name = []
155
158
  for _, item in enumerate(self.op_info.get("attr", [])):
156
- attr_name = item.get("name")
159
+ attr_name = item.get(KEY_NAME)
157
160
  attrs_name.append(attr_name)
158
161
  key = "attr_" + attr_name
159
162
  op_info[key] = {}
160
163
  for k, v in item.items():
161
- if k != "name":
164
+ if k != KEY_NAME:
162
165
  op_info[key][k] = v
163
166
  if attrs_name:
164
167
  op_info["attr"] = {"list": ",".join(attrs_name)}
@@ -171,7 +174,7 @@ class _CustomInstaller:
171
174
  item = inputs[i] if i < input_num else outputs[i - input_num]
172
175
  key = "input" if i < input_num else "output"
173
176
  key += str(item.get("index"))
174
- op_info[key] = {"name": item.get("name"),
177
+ op_info[key] = {KEY_NAME: item.get(KEY_NAME),
175
178
  "paramType": item.get("paramType", "required"),
176
179
  "shape": item.get("shape", "all")}
177
180
  dtype, formats = _get_dtype_format(i)
@@ -181,7 +184,8 @@ class _CustomInstaller:
181
184
  op_info[key]["format"] = ",".join(formats)
182
185
  return op_info
183
186
 
184
- def _gen_ai_cpu_reg_info(self, so_file):
187
+ @staticmethod
188
+ def _gen_ai_cpu_reg_info(so_file):
185
189
  """generate reg info"""
186
190
  op_info = {"opInfo": {"computeCost": "100",
187
191
  "engine": "DNN_VM_AICPU",
@@ -198,7 +202,7 @@ class _CustomInstaller:
198
202
  repo = {}
199
203
  save_path = os.path.join(dst_dir, file_name)
200
204
  lock_file = os.path.join(dst_dir, "file.lock")
201
- with open(lock_file, "w") as f:
205
+ with os.fdopen(os.open(lock_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
202
206
  fcntl.flock(f.fileno(), fcntl.LOCK_EX)
203
207
  if os.path.isfile(save_path):
204
208
  with open(save_path, 'r') as fr:
@@ -211,7 +215,7 @@ class _CustomInstaller:
211
215
 
212
216
  def run(self):
213
217
  """save reg info to file"""
214
- if not self._check():
218
+ if not self.check():
215
219
  return
216
220
  so_name = _get_reg_info_attr(self.op_info, "cust_aicpu")
217
221
  if so_name:
@@ -380,7 +384,6 @@ class RegOp:
380
384
  """
381
385
  if not isinstance(value, str):
382
386
  raise TypeError("%s value must be str" % str(value))
383
- return True
384
387
 
385
388
  def _is_int(self, value):
386
389
  """
@@ -394,7 +397,6 @@ class RegOp:
394
397
  """
395
398
  if not isinstance(value, int):
396
399
  raise TypeError("%s value must be int" % str(value))
397
- return True
398
400
 
399
401
  def _is_bool(self, value):
400
402
  """
@@ -408,7 +410,6 @@ class RegOp:
408
410
  """
409
411
  if not isinstance(value, bool):
410
412
  raise TypeError("%s value must be bool" % str(value))
411
- return True
412
413
 
413
414
  @staticmethod
414
415
  def _is_list(value):
@@ -423,7 +424,6 @@ class RegOp:
423
424
  """
424
425
  if not isinstance(value, list):
425
426
  raise TypeError("%s value must be list" % str(value))
426
- return True
427
427
 
428
428
  def _check_param(self, param_list, key_list, fn_list, kwargs):
429
429
  """
@@ -491,7 +491,9 @@ class RegOp:
491
491
  self._is_string(arg[1])
492
492
  if len(arg) == 3:
493
493
  self._is_string(arg[2])
494
- dtype_format.append(arg)
494
+ dtype_format.append(arg)
495
+ else:
496
+ dtype_format.append(arg)
495
497
  self.dtype_format_.append(tuple(dtype_format))
496
498
  return self
497
499
 
@@ -920,7 +922,8 @@ class TBERegOp(RegOp):
920
922
  Args:
921
923
  pattern (str): Value of op pattern, e.g. "broadcast", "reduce". Default: ``None`` .
922
924
  """
923
- if pattern is not None and self._is_string(pattern):
925
+ if pattern is not None:
926
+ self._is_string(pattern)
924
927
  self.op_pattern_ = pattern
925
928
  return self
926
929
 
@@ -118,7 +118,7 @@ from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSpa
118
118
  Dilation2D, DataFormatVecPermute, DeformableOffsets, Dense, FractionalAvgPool,
119
119
  FractionalMaxPool, FractionalMaxPool3DWithFixedKsize, FractionalMaxPoolWithFixedKsize,
120
120
  GridSampler2D, TripletMarginLoss, UpsampleNearest3D, UpsampleTrilinear3D, PadV3, ChannelShuffle,
121
- GLU, MaxUnpool3D, Pdist)
121
+ GLU, MaxUnpool3D, Pdist, RmsNorm, PagedAttention, PagedAttentionMask, ReshapeAndCache)
122
122
  from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
123
123
  ConfusionMatrix, UpdateState, Load, StopGradient,
124
124
  CheckValid, Partial, Depend, Push, Pull, PyExecute, PyFunc, _DynamicLossScale,
@@ -691,7 +691,11 @@ __all__ = [
691
691
  "IndexPut",
692
692
  "MaskedScatter",
693
693
  "Ormqr",
694
- "RandpermV2"
694
+ "RandpermV2",
695
+ "RmsNorm",
696
+ "PagedAttention",
697
+ "PagedAttentionMask",
698
+ "ReshapeAndCache"
695
699
  ]
696
700
 
697
701
  __custom__ = [
@@ -3845,7 +3845,7 @@ class FlashAttentionScoreGrad(Primitive):
3845
3845
  """
3846
3846
  @prim_attr_register
3847
3847
  def __init__(self, head_num, keep_prob=1.0, scale_value=1.0, pre_tokens=65536, next_tokens=65536, inner_precise=1,
3848
- input_layout='BSH'):
3848
+ input_layout='BSH', sparse_mode=0):
3849
3849
  """Initialize FlashAttentionScoreGrad."""
3850
3850
  validator.check_value_type('head_num', head_num, [int], self.name)
3851
3851
  validator.check_value_type('keep_prob', keep_prob, [int, float], self.name)
@@ -3855,11 +3855,30 @@ class FlashAttentionScoreGrad(Primitive):
3855
3855
  validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
3856
3856
  validator.check_value_type('next_tokens', next_tokens, [int], self.name)
3857
3857
  validator.check_value_type('inner_precise', inner_precise, [int], self.name)
3858
+ validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
3858
3859
  if inner_precise not in [0, 1]:
3859
3860
  raise ValueError(f"Attribute 'inner_precise' must be either 0 or 1, but got {inner_precise}")
3860
3861
  validator.check_value_type('input_layout', input_layout, [str], self.name)
3861
- if input_layout not in ["BSH"]:
3862
- raise ValueError(f"Attribute 'input_layout' must be either 'bsh' or 'sbh', but got {input_layout}")
3863
- self.init_prim_io_names(inputs=['query', 'key', 'value', 'attn_mask', 'attention_in', 'softmax_max',
3864
- 'softmax_sum', 'dy', 'drop_mask', 'real_shift', "padding_mask", 'softmax_out'],
3865
- outputs=['dq', 'dk', 'dv'])
3862
+ if input_layout not in ["BSH", "BNSD"]:
3863
+ raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
3864
+ self.init_prim_io_names(inputs=['query', 'key', 'value', 'dy', 'pse_shift', 'drop_mask', "padding_mask",
3865
+ 'attn_mask', 'softmax_max', 'softmax_sum', 'softmax_out', 'attention_in',
3866
+ 'prefix'],
3867
+ outputs=['dq', 'dk', 'dv', 'dpse'])
3868
+
3869
+
3870
+ class RmsNormGrad(Primitive):
3871
+ r"""
3872
+ Calculates the gradient of RmsNorm operation.
3873
+ .. warning::
3874
+ This is an experimental API that is subject to change or deletion.
3875
+
3876
+ Supported Platforms:
3877
+ ``Ascend``
3878
+ """
3879
+
3880
+ @prim_attr_register
3881
+ def __init__(self):
3882
+ """Initialize RmsNormGrad."""
3883
+ self.init_prim_io_names(inputs=["dy", "x", "rstd", "gamma"],
3884
+ outputs=["dx", "dgamma"])