mindspore 2.2.10__cp39-cp39-win_amd64.whl → 2.2.14__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (128) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +2 -1
  3. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  6. mindspore/_extends/parse/__init__.py +3 -2
  7. mindspore/_extends/parse/parser.py +6 -1
  8. mindspore/_extends/parse/standard_method.py +12 -2
  9. mindspore/common/_utils.py +16 -0
  10. mindspore/common/tensor.py +0 -2
  11. mindspore/communication/management.py +3 -0
  12. mindspore/context.py +34 -4
  13. mindspore/dataset/engine/cache_client.py +8 -5
  14. mindspore/dataset/engine/datasets.py +23 -0
  15. mindspore/dataset/engine/validators.py +1 -1
  16. mindspore/dataset/vision/py_transforms_util.py +2 -2
  17. mindspore/dnnl.dll +0 -0
  18. mindspore/experimental/optim/lr_scheduler.py +5 -6
  19. mindspore/jpeg62.dll +0 -0
  20. mindspore/mindrecord/tools/cifar100_to_mr.py +49 -57
  21. mindspore/mindrecord/tools/cifar10_to_mr.py +46 -55
  22. mindspore/mindrecord/tools/csv_to_mr.py +3 -8
  23. mindspore/mindrecord/tools/mnist_to_mr.py +4 -9
  24. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -4
  25. mindspore/mindspore_backend.dll +0 -0
  26. mindspore/mindspore_common.dll +0 -0
  27. mindspore/mindspore_core.dll +0 -0
  28. mindspore/mindspore_glog.dll +0 -0
  29. mindspore/mindspore_shared_lib.dll +0 -0
  30. mindspore/nn/layer/activation.py +1 -1
  31. mindspore/nn/layer/embedding.py +2 -2
  32. mindspore/nn/layer/flash_attention.py +48 -135
  33. mindspore/nn/loss/loss.py +1 -1
  34. mindspore/nn/optim/ada_grad.py +2 -2
  35. mindspore/nn/optim/sgd.py +3 -2
  36. mindspore/nn/wrap/__init__.py +4 -2
  37. mindspore/nn/wrap/cell_wrapper.py +6 -3
  38. mindspore/numpy/math_ops.py +1 -1
  39. mindspore/opencv_core452.dll +0 -0
  40. mindspore/opencv_imgcodecs452.dll +0 -0
  41. mindspore/opencv_imgproc452.dll +0 -0
  42. mindspore/ops/__init__.py +3 -0
  43. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -31
  44. mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
  45. mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
  46. mindspore/ops/_grad_experimental/grad_math_ops.py +37 -17
  47. mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
  48. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
  49. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  50. mindspore/ops/function/array_func.py +6 -5
  51. mindspore/ops/function/debug_func.py +1 -1
  52. mindspore/ops/function/linalg_func.py +21 -11
  53. mindspore/ops/function/math_func.py +3 -0
  54. mindspore/ops/function/nn_func.py +13 -11
  55. mindspore/ops/function/parameter_func.py +2 -0
  56. mindspore/ops/function/sparse_unary_func.py +2 -2
  57. mindspore/ops/function/vmap_func.py +1 -0
  58. mindspore/ops/operations/__init__.py +5 -2
  59. mindspore/ops/operations/_embedding_cache_ops.py +1 -1
  60. mindspore/ops/operations/_grad_ops.py +3 -4
  61. mindspore/ops/operations/_inner_ops.py +56 -1
  62. mindspore/ops/operations/_quant_ops.py +4 -4
  63. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  64. mindspore/ops/operations/array_ops.py +15 -4
  65. mindspore/ops/operations/custom_ops.py +1 -1
  66. mindspore/ops/operations/debug_ops.py +1 -1
  67. mindspore/ops/operations/image_ops.py +3 -3
  68. mindspore/ops/operations/inner_ops.py +49 -0
  69. mindspore/ops/operations/math_ops.py +65 -3
  70. mindspore/ops/operations/nn_ops.py +95 -28
  71. mindspore/ops/operations/random_ops.py +2 -0
  72. mindspore/ops/operations/sparse_ops.py +4 -4
  73. mindspore/ops/silent_check.py +162 -0
  74. mindspore/parallel/__init__.py +3 -2
  75. mindspore/parallel/_auto_parallel_context.py +82 -3
  76. mindspore/parallel/_parallel_serialization.py +34 -2
  77. mindspore/parallel/_tensor.py +3 -1
  78. mindspore/parallel/_transformer/transformer.py +8 -8
  79. mindspore/parallel/checkpoint_transform.py +191 -45
  80. mindspore/profiler/parser/ascend_cluster_generator.py +111 -0
  81. mindspore/profiler/parser/ascend_communicate_generator.py +315 -0
  82. mindspore/profiler/parser/ascend_flops_generator.py +8 -2
  83. mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
  84. mindspore/profiler/parser/ascend_hccl_generator.py +2 -2
  85. mindspore/profiler/parser/ascend_msprof_exporter.py +30 -6
  86. mindspore/profiler/parser/ascend_msprof_generator.py +16 -5
  87. mindspore/profiler/parser/ascend_op_generator.py +15 -7
  88. mindspore/profiler/parser/ascend_timeline_generator.py +5 -2
  89. mindspore/profiler/parser/base_timeline_generator.py +11 -3
  90. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
  91. mindspore/profiler/parser/framework_parser.py +8 -2
  92. mindspore/profiler/parser/memory_usage_parser.py +8 -2
  93. mindspore/profiler/parser/minddata_analyzer.py +8 -2
  94. mindspore/profiler/parser/minddata_parser.py +1 -1
  95. mindspore/profiler/parser/msadvisor_analyzer.py +4 -2
  96. mindspore/profiler/parser/msadvisor_parser.py +9 -3
  97. mindspore/profiler/profiling.py +97 -25
  98. mindspore/rewrite/api/node.py +1 -1
  99. mindspore/rewrite/api/symbol_tree.py +2 -2
  100. mindspore/rewrite/parsers/for_parser.py +6 -6
  101. mindspore/rewrite/parsers/module_parser.py +4 -4
  102. mindspore/tinyxml2.dll +0 -0
  103. mindspore/train/callback/_checkpoint.py +8 -8
  104. mindspore/train/callback/_landscape.py +2 -3
  105. mindspore/train/callback/_summary_collector.py +6 -7
  106. mindspore/train/dataset_helper.py +6 -0
  107. mindspore/train/model.py +17 -5
  108. mindspore/train/serialization.py +6 -1
  109. mindspore/train/summary/_writer_pool.py +1 -1
  110. mindspore/train/summary/summary_record.py +5 -6
  111. mindspore/turbojpeg.dll +0 -0
  112. mindspore/version.py +1 -1
  113. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/METADATA +3 -2
  114. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/RECORD +117 -124
  115. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  116. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
  117. mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
  118. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
  119. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
  120. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
  121. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
  122. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  123. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  124. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  125. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  126. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/WHEEL +0 -0
  127. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/entry_points.txt +0 -0
  128. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/top_level.txt +0 -0
@@ -18,11 +18,13 @@
18
18
  import numpy as np
19
19
  import mindspore.numpy as mnp
20
20
  from mindspore.common import dtype as mstype
21
+ import mindspore.ops as ops
21
22
  from mindspore.ops import functional as F
22
23
  from mindspore.ops import operations as P
23
24
  from mindspore import Tensor
24
25
  from mindspore.ops.operations.math_ops import Real, Imag, Complex, Angle
25
- from mindspore.ops.operations.math_ops import Polar
26
+ from mindspore.ops.operations.math_ops import Polar, SilentCheck
27
+ from mindspore.ops.operations._inner_ops import _MirrorSilentCheck
26
28
  from mindspore.ops.operations import _grad_ops as G
27
29
  from mindspore.ops.operations.math_ops import Lgamma
28
30
  from mindspore.ops.operations.math_ops import Digamma
@@ -763,6 +765,7 @@ def get_bprop_fft_with_size(self):
763
765
  to_tensor_op = P.ScalarToTensor()
764
766
  type_op = P.DType()
765
767
  concat_op = P.Concat()
768
+ concat_op_last = P.Concat(axis=-1)
766
769
  ones_op = P.Ones()
767
770
  zeros_op = P.Zeros()
768
771
  real_op = P.Real()
@@ -794,8 +797,7 @@ def get_bprop_fft_with_size(self):
794
797
  signal_sizes=offset_shape[-1:])
795
798
  irfft2d_ = FFTWithSize(signal_ndim=2, inverse=True, real=True, norm="backward", onesided=onesided,
796
799
  signal_sizes=offset_shape[-2:])
797
- irfft3d_ = FFTWithSize(signal_ndim=3, inverse=True, real=True, norm="backward", onesided=onesided,
798
- signal_sizes=offset_shape[-3:])
800
+ irfft3d_ = FFTWithSize(signal_ndim=3, inverse=True, real=False, norm="backward", onesided=onesided)
799
801
  if inverse is False:
800
802
  if onesided is True:
801
803
  terms = 0
@@ -811,6 +813,7 @@ def get_bprop_fft_with_size(self):
811
813
  vec_mask = complex_op(1 - 2 * (mnp.arange(0, input_shape[-1], 1, input_type) % 2),
812
814
  zeros_op(input_shape[-1], input_type))
813
815
  terms = real_op(dout_first) + is_even * real_op(dout_last * vec_mask)
816
+ dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
814
817
  elif signal_ndim == 2:
815
818
  dx = irfft2d_(dout)
816
819
  arange_inner = mnp.arange(0, input_shape[-2], 1, input_type)
@@ -852,26 +855,27 @@ def get_bprop_fft_with_size(self):
852
855
  dout_shape, [input_shape[-1]])))
853
856
  dout_last_term = dout_last_term * vec_mask
854
857
  terms = real_op(dout_first_term) + is_even * real_op(dout_last_term)
858
+ dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
855
859
  elif signal_ndim == 3:
856
- dx = irfft3d_(dout) * real_op(offset_size)
857
- dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
860
+ zeros_shape = offset_shape[:-1] + (offset_shape[-1] - dout_shape[-1],)
861
+ zeros_values = zeros_op(zeros_shape, input_type)
862
+ zeros_padding = complex_op(zeros_values, zeros_values)
863
+ dout = concat_op_last((dout, zeros_padding))
864
+ dx = real_op(irfft3d_(dout)) * real_op(offset_size)
858
865
  else:
859
866
  dx = irfft_fn(dout) * real_op(offset_size)
860
867
  else:
861
868
  dx = rfft_fn(dout)
862
869
  if onesided is True:
863
- if signal_ndim != 3:
864
- is_odd = dout_shape[-1] % 2
865
- last_shape = offset_shape[-1]
866
- mask = concat_op((ones_op(1, output_type), 2.0 * ones_op(
867
- (last_shape - 2 + is_odd,), output_type), ones_op((1 - is_odd,), output_type)))
868
- dx = dx * complex_op(mask, zeros_op(shape_op(mask), output_type))
869
- irfft_offset_size = to_tensor_op(
870
- _fft_with_size_back_norm(shape_op(dout), norm, inverse, signal_ndim),
871
- output_type)
872
- dx = dx * complex_op(irfft_offset_size, zeros_op(1, output_type))
873
- else:
874
- dx = dx * complex_op(offset_size, zeros_op(1, output_type))
870
+ is_odd = dout_shape[-1] % 2
871
+ last_shape = offset_shape[-1]
872
+ mask = concat_op((ones_op(1, output_type), 2.0 * ones_op(
873
+ (last_shape - 2 + is_odd,), output_type), ones_op((1 - is_odd,), output_type)))
874
+ dx = dx * complex_op(mask, zeros_op(shape_op(mask), output_type))
875
+ irfft_offset_size = to_tensor_op(
876
+ _fft_with_size_back_norm(shape_op(dout), norm, inverse, signal_ndim),
877
+ output_type)
878
+ dx = dx * complex_op(irfft_offset_size, zeros_op(1, output_type))
875
879
  else:
876
880
  dx = dx * complex_op(offset_size, zeros_op(1, output_type))
877
881
  return (dx,)
@@ -1017,3 +1021,19 @@ def get_bprop_tensor_add(self):
1017
1021
  return binop_grad_common(x, y, dout, dout)
1018
1022
 
1019
1023
  return bprop
1024
+
1025
+
1026
+ @bprop_getters.register(_MirrorSilentCheck)
1027
+ def get_bprop_mirror_silent_check(self):
1028
+ """Grad definition for '_MirrorSilentCheck' op"""
1029
+ silent_check = SilentCheck(self.min_steps, self.thresh_l1, self.coeff_l1, self.thresh_l2, self.coeff_l2)
1030
+ out_tensor = Tensor([0.0], mstype.float32)
1031
+
1032
+ def bporp(x, pre_val, min_val, max_val, n_step, loss_scale, out, dout):
1033
+ if loss_scale is not None:
1034
+ dout = dout / loss_scale
1035
+ grad = ops.norm(dout)
1036
+ dx, _, _, _, _ = silent_check(grad, dout, pre_val, min_val, max_val, n_step)
1037
+ return (dx, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
1038
+
1039
+ return bporp
@@ -60,6 +60,7 @@ from .init_data_set_queue import _init_data_set_queue_aicpu
60
60
  from .embedding_lookup import _embedding_lookup_aicpu
61
61
  from .padding import _padding_aicpu
62
62
  from .gather import _gather_aicpu
63
+ from .generate_eod_mask import _generate_eod_mask_aicpu
63
64
  from .gather_grad import _gather_grad_aicpu
64
65
  from .gather_d_grad_v2 import _gather_d_grad_v2_aicpu
65
66
  from .gather_d import _gather_d_aicpu
@@ -0,0 +1,38 @@
1
+ # Copyright 2023 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+
16
+ """GenerateEodMask op"""
17
+ from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
18
+
19
+ generate_eod_mask_op_info = AiCPURegOp("GenerateEodMask") \
20
+ .fusion_type("OPAQUE") \
21
+ .attr("eod_token_id", "int") \
22
+ .attr("n_pos", "int") \
23
+ .attr("n_step", "listint") \
24
+ .attr("n_error_mode", "str") \
25
+ .input(0, "inputs_ids", "required") \
26
+ .output(0, "position_ids", "required") \
27
+ .dtype_format(DataType.U16_Default, DataType.U16_Default) \
28
+ .dtype_format(DataType.U32_Default, DataType.U32_Default) \
29
+ .dtype_format(DataType.U64_Default, DataType.U64_Default) \
30
+ .dtype_format(DataType.I32_Default, DataType.I32_Default) \
31
+ .dtype_format(DataType.I64_Default, DataType.I64_Default) \
32
+ .get_op_info()
33
+
34
+
35
+ @op_info_register(generate_eod_mask_op_info)
36
+ def _generate_eod_mask_aicpu():
37
+ """GenerateEodMask AiCPU register"""
38
+ return
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2023 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,11 +24,30 @@ lsap_op_info = AiCPURegOp("LinearSumAssignment") \
24
24
  .input(2, 'maximize', "required") \
25
25
  .output(0, "row_ind", "required") \
26
26
  .output(1, "col_ind", "required") \
27
- .attr("cust_aicpu", "str") \
28
27
  .dtype_format(DataType.F64_Default, DataType.I64_Default,
29
28
  DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
30
29
  .dtype_format(DataType.F32_Default, DataType.I64_Default,
31
30
  DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
31
+ .dtype_format(DataType.F16_Default, DataType.I64_Default,
32
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
33
+ .dtype_format(DataType.BOOL_Default, DataType.I64_Default,
34
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
35
+ .dtype_format(DataType.I16_Default, DataType.I64_Default,
36
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
37
+ .dtype_format(DataType.I32_Default, DataType.I64_Default,
38
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
39
+ .dtype_format(DataType.I64_Default, DataType.I64_Default,
40
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
41
+ .dtype_format(DataType.I8_Default, DataType.I64_Default,
42
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
43
+ .dtype_format(DataType.U16_Default, DataType.I64_Default,
44
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
45
+ .dtype_format(DataType.U32_Default, DataType.I64_Default,
46
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
47
+ .dtype_format(DataType.U64_Default, DataType.I64_Default,
48
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
49
+ .dtype_format(DataType.U8_Default, DataType.I64_Default,
50
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
32
51
  .get_op_info()
33
52
 
34
53
 
@@ -268,7 +268,7 @@ def cat(tensors, axis=0):
268
268
 
269
269
  Returns:
270
270
  Tensor, the shape is :math:`(x_1, x_2, ..., \sum_{i=1}^Nx_{mi}, ..., x_R)`.
271
- The data type is the same with `tensors`.
271
+ The data type is the same with `tensors`.
272
272
 
273
273
  Raises:
274
274
  TypeError: If `axis` is not an int.
@@ -660,7 +660,7 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
660
660
 
661
661
  Note:
662
662
  If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
663
- On Ascend, if `on_value` is Int64 dtype, `indices` must be Int64 dtype.
663
+ On Ascend, if `on_value` is int64 dtype, `indices` must be int64 dtype.
664
664
 
665
665
  Args:
666
666
  indices(Tensor): A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
@@ -4228,6 +4228,7 @@ def space_to_batch_nd(input_x, block_size, paddings):
4228
4228
 
4229
4229
  Examples:
4230
4230
  >>> import numpy as np
4231
+ >>> import mindspore
4231
4232
  >>> from mindspore import Tensor, ops
4232
4233
  >>> block_size = [2, 2]
4233
4234
  >>> paddings = [[0, 0], [0, 0]]
@@ -5395,8 +5396,8 @@ def masked_select(input, mask):
5395
5396
 
5396
5397
  Examples:
5397
5398
  >>> import numpy as np
5398
- >>> import mindspore.ops as ops
5399
- >>> from mindspore import Tensor
5399
+ >>> import mindspore
5400
+ >>> from mindspore import Tensor, ops
5400
5401
  >>> x = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
5401
5402
  >>> mask = Tensor(np.array([1, 0, 1, 0]), mindspore.bool_)
5402
5403
  >>> output = ops.masked_select(x, mask)
@@ -6520,7 +6521,7 @@ def topk(input, k, dim=None, largest=True, sorted=True):
6520
6521
 
6521
6522
  Args:
6522
6523
  input (Tensor): Input to be computed, data type must be float16, float32 or int32.
6523
- k (int): The number of top or bottom elements to be computed along the last dimension, constant input is needed.
6524
+ k (int): The number of top or bottom elements to be computed along the last dimension.
6524
6525
  dim (int, optional): The dimension to sort along. Default: ``None`` .
6525
6526
  largest (bool, optional): If largest is ``False`` then the k smallest elements are returned.
6526
6527
  Default: ``True`` .
@@ -51,7 +51,7 @@ def print_(*input_x):
51
51
 
52
52
  Examples:
53
53
  >>> import numpy as np
54
- >>> from mindspore import Tensor
54
+ >>> from mindspore import Tensor, ops
55
55
  >>> x = Tensor(np.ones([2, 1]).astype(np.int32))
56
56
  >>> y = Tensor(np.ones([2, 2]).astype(np.int32))
57
57
  >>> result = ops.print_('Print Tensor x and Tensor y:', x, y)
@@ -59,6 +59,8 @@ def cond(A, p=None):
59
59
 
60
60
  Args:
61
61
  A (Tensor): Tensor of shape :math:`(*, n)` or :math:`(*, m, n)` where * is zero or more batch dimensions.
62
+ If `p` is one of Union[1, -1, inf, -inf, 'fro', 'nuc'], the function uses
63
+ :class:`mindspore.ops.MatrixInverse` , therefore, :math:`(*, m, n)` has to be square and ivertible.
62
64
  p (Union[int, float, inf, -inf, 'fro', 'nuc'], optional): norm's mode. Refer to the table above for
63
65
  behavior. Default: ``None``.
64
66
 
@@ -84,8 +86,8 @@ def cond(A, p=None):
84
86
  matrix_inverse = _get_cache_prim(P.MatrixInverse)(adjoint=False)
85
87
  if p is None:
86
88
  p = 2
87
- norm_a = F.norm(A, p)
88
- norm_inv_a = F.norm(matrix_inverse(A), p)
89
+ norm_a = F.matrix_norm(A, p)
90
+ norm_inv_a = F.matrix_norm(matrix_inverse(A), p)
89
91
  return norm_a * norm_inv_a
90
92
 
91
93
 
@@ -194,6 +196,8 @@ def geqrf(input):
194
196
  ``Ascend`` ``GPU`` ``CPU``
195
197
 
196
198
  Examples:
199
+ >>> from mindspore import Tensor, ops
200
+ >>> import numpy as np
197
201
  >>> input_x = Tensor(np.array([[-2.0, -1.0], [1.0, 2.0]]).astype(np.float32))
198
202
  >>> y, tau = ops.geqrf(input_x)
199
203
  >>> print(y)
@@ -266,6 +270,16 @@ def svd(input, full_matrices=False, compute_uv=True):
266
270
  return s
267
271
 
268
272
 
273
+ def _check_pinv_shape(x):
274
+ if not isinstance(x, (Tensor, Tensor_)):
275
+ raise TypeError("The input x must be tensor")
276
+ if x.shape == ():
277
+ raise TypeError("For pinv, the 0-D input is not supported")
278
+ x_shape = F.shape(x)
279
+ if len(x_shape) < 2:
280
+ raise ValueError("input x should have 2 or more dimensions, " f"but got {len(x_shape)}.")
281
+
282
+
269
283
  def pinv(x, *, atol=None, rtol=None, hermitian=False):
270
284
  r"""
271
285
  Computes the (Moore-Penrose) pseudo-inverse of a matrix.
@@ -318,19 +332,15 @@ def pinv(x, *, atol=None, rtol=None, hermitian=False):
318
332
  ``CPU``
319
333
 
320
334
  Examples:
335
+ >>> import mindspore
336
+ >>> from mindspore import Tensor, ops
321
337
  >>> x = Tensor([[4., 0.], [0., 5.]], mindspore.float32)
322
338
  >>> output = ops.pinv(x)
323
339
  >>> print(output)
324
- [[0.25 0. ]
325
- [0. 0.2 ]]
340
+ [[0.25 0. ]
341
+ [0. 0.2 ]]
326
342
  """
327
- if not isinstance(x, (Tensor, Tensor_)):
328
- raise TypeError("The input x must be tensor")
329
- if x.shape == ():
330
- raise TypeError("For pinv, the 0-D input is not supported")
331
- x_shape = F.shape(x)
332
- if len(x_shape) < 2:
333
- raise ValueError("input x should have 2 or more dimensions, " f"but got {len(x_shape)}.")
343
+ _check_pinv_shape(x)
334
344
  x_dtype = _get_cache_prim(P.DType)()(x)
335
345
  _check_input_dtype("x", x_dtype, [mstype.float32, mstype.float64], "pinv")
336
346
  _check_attr_dtype("hermitian", hermitian, [bool], "pinv")
@@ -343,6 +343,7 @@ def add(input, other):
343
343
 
344
344
  Examples:
345
345
  >>> import numpy as np
346
+ >>> import mindspore
346
347
  >>> from mindspore import Tensor, ops
347
348
  >>> # case 1: x and y are both Tensor.
348
349
  >>> x = Tensor(np.array([1, 2, 3]).astype(np.float32))
@@ -12784,6 +12785,7 @@ def count_nonzero(x, axis=(), keep_dims=False, dtype=mstype.int32):
12784
12785
  Examples:
12785
12786
  >>> from mindspore import Tensor, ops
12786
12787
  >>> import numpy as np
12788
+ >>> import mindspore
12787
12789
  >>> # case 1: each value specified.
12788
12790
  >>> x = Tensor(np.array([[0, 1, 0], [1, 1, 0]]).astype(np.float32))
12789
12791
  >>> nonzero_num = ops.count_nonzero(x=x, axis=[0, 1], keep_dims=True, dtype=mindspore.int32)
@@ -13392,6 +13394,7 @@ def batch_dot(x1, x2, axes=None):
13392
13394
  ``Ascend`` ``GPU`` ``CPU``
13393
13395
 
13394
13396
  Examples:
13397
+ >>> import mindspore
13395
13398
  >>> from mindspore import Tensor, ops
13396
13399
  >>> import numpy as np
13397
13400
  >>> x1 = Tensor(np.ones(shape=[2, 2, 3]), mindspore.float32)
@@ -2748,6 +2748,7 @@ def softshrink(x, lambd=0.5):
2748
2748
  ``Ascend`` ``GPU`` ``CPU``
2749
2749
 
2750
2750
  Examples:
2751
+ >>> import mindspore
2751
2752
  >>> from mindspore import Tensor
2752
2753
  >>> from mindspore import ops
2753
2754
  >>> import numpy as np
@@ -3000,11 +3001,12 @@ def dense(input, weight, bias=None):
3000
3001
 
3001
3002
  Examples:
3002
3003
  >>> import numpy as np
3004
+ >>> import mindspore
3003
3005
  >>> from mindspore import Tensor, ops
3004
- >>> input = mindspore.Tensor([[-1., 1., 2.], [-3., -3., 1.]], mindspore.float32)
3005
- >>> weight = mindspore.Tensor([[-2., -2., -2.], [0., -1., 0.]], mindspore.float32)
3006
- >>> bias = mindspore.Tensor([0., 1.], mindspore.float32)
3007
- >>> output = mindspore.ops.dense(input, weight, bias)
3006
+ >>> input = Tensor([[-1., 1., 2.], [-3., -3., 1.]], mindspore.float32)
3007
+ >>> weight = Tensor([[-2., -2., -2.], [0., -1., 0.]], mindspore.float32)
3008
+ >>> bias = Tensor([0., 1.], mindspore.float32)
3009
+ >>> output = ops.dense(input, weight, bias)
3008
3010
  >>> print(output)
3009
3011
  [[-4. 0.]
3010
3012
  [10. 4.]]
@@ -3992,8 +3994,8 @@ def l1_loss(input, target, reduction='mean'):
3992
3994
  Examples:
3993
3995
  >>> from mindspore import Tensor, ops
3994
3996
  >>> from mindspore import dtype as mstype
3995
- >>> x = ms.Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
3996
- >>> target = ms.Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
3997
+ >>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
3998
+ >>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
3997
3999
  >>> output = ops.l1_loss(x, target, reduction="mean")
3998
4000
  >>> print(output)
3999
4001
  3.0
@@ -5342,7 +5344,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
5342
5344
  >>> from mindspore import Tensor, ops
5343
5345
  >>> x = Tensor(np.arange(64).reshape((4, 4, 4)), mindspore.float32)
5344
5346
  >>> weight = Tensor(np.arange(8).reshape((2, 2, 2)), mindspore.float32)
5345
- >>> bias = Tensor([-0.12345, 2.7683], ms.float32)
5347
+ >>> bias = Tensor([-0.12345, 2.7683], mindspore.float32)
5346
5348
  >>> output = ops.conv1d(x, weight, pad_mode='pad', padding=(1,), bias=bias, groups=2)
5347
5349
  >>> print(output.shape)
5348
5350
  (4, 2, 5)
@@ -7444,8 +7446,8 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
7444
7446
  return out
7445
7447
 
7446
7448
 
7447
- def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
7448
- actual_seq_lengths_kv, deq_scale1, quant_scale1,
7449
+ def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths,
7450
+ actual_seq_lengths_kv, pse_shift, deq_scale1, quant_scale1,
7449
7451
  deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
7450
7452
  next_tokens=0, input_layout='BSH',
7451
7453
  num_key_value_heads=0, sparse_mode=0):
@@ -7468,11 +7470,11 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
7468
7470
  Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7469
7471
  value (Tensor) - The value tensor with data type of float16 or float32.
7470
7472
  Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
7471
- padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
7472
7473
  attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
7473
7474
  For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
7474
7475
  actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
7475
7476
  actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
7477
+ pse_shift (Tensor) - The position encoding tensor with data type of float16 or float32.
7476
7478
  dep_scale1 (Tensor)
7477
7479
  quant_scale1 (Tensor)
7478
7480
  deq_scale2 (Tensor)
@@ -7516,7 +7518,7 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
7516
7518
 
7517
7519
  pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
7518
7520
  num_key_value_heads, sparse_mode)
7519
- return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
7521
+ return pfa(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1,
7520
7522
  quant_scale1, deq_scale2, quant_scale2, quant_offset2)
7521
7523
 
7522
7524
 
@@ -99,6 +99,7 @@ def assign_sub(variable, value):
99
99
  >>> import mindspore
100
100
  >>> import numpy as np
101
101
  >>> from mindspore import Tensor, ops
102
+ >>> from mindspore.common.initializer import initializer
102
103
  >>> variable = mindspore.Parameter(initializer(1, [1], mindspore.int32), name="global_step")
103
104
  >>> value = Tensor(np.ones([1]).astype(np.int32) * 100)
104
105
  >>> ops.assign_sub(variable, value)
@@ -149,6 +150,7 @@ def assign_add(variable, value):
149
150
  >>> import mindspore
150
151
  >>> import numpy as np
151
152
  >>> from mindspore import Tensor, ops
153
+ >>> from mindspore.common.initializer import initializer
152
154
  >>> variable = mindspore.Parameter(initializer(1, [1], mindspore.int32), name="global_step")
153
155
  >>> value = Tensor(np.ones([1]).astype(np.int32) * 100)
154
156
  >>> ops.assign_add(variable, value)
@@ -28,7 +28,7 @@ def csr_cos(x: CSRTensor) -> CSRTensor:
28
28
  out_i = \cos(x_i)
29
29
 
30
30
  .. warning::
31
- Currently support data types float16 and float32. If use Float64, there may be a problem of missing precision.
31
+ Currently support data types float16 and float32. If use float64, there may be a problem of missing precision.
32
32
 
33
33
  Args:
34
34
  x (CSRTensor): Input CSRTensor.
@@ -69,7 +69,7 @@ def coo_cos(x: COOTensor) -> COOTensor:
69
69
  out_i = \cos(x_i)
70
70
 
71
71
  .. warning::
72
- If use Float64, there may be a problem of missing precision.
72
+ If use float64, there may be a problem of missing precision.
73
73
 
74
74
  Args:
75
75
  x (COOTensor): Input COOTensor.
@@ -81,6 +81,7 @@ def vmap(fn, in_axes=0, out_axes=0):
81
81
  ``Ascend`` ``GPU`` ``CPU``
82
82
 
83
83
  Examples:
84
+ >>> import numpy as np
84
85
  >>> from mindspore import Tensor
85
86
  >>> from mindspore import vmap
86
87
  >>> def test_vmap(x, y, z): # ([a],[a],[a]) -> [a]
@@ -118,7 +118,7 @@ from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSpa
118
118
  Dilation2D, DataFormatVecPermute, DeformableOffsets, Dense, FractionalAvgPool,
119
119
  FractionalMaxPool, FractionalMaxPool3DWithFixedKsize, FractionalMaxPoolWithFixedKsize,
120
120
  GridSampler2D, TripletMarginLoss, UpsampleNearest3D, UpsampleTrilinear3D, PadV3, ChannelShuffle,
121
- GLU, MaxUnpool3D, Pdist, RmsNorm)
121
+ GLU, MaxUnpool3D, Pdist, RmsNorm, PagedAttention, PagedAttentionMask, ReshapeAndCache)
122
122
  from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
123
123
  ConfusionMatrix, UpdateState, Load, StopGradient,
124
124
  CheckValid, Partial, Depend, Push, Pull, PyExecute, PyFunc, _DynamicLossScale,
@@ -692,7 +692,10 @@ __all__ = [
692
692
  "MaskedScatter",
693
693
  "Ormqr",
694
694
  "RandpermV2",
695
- "RmsNorm"
695
+ "RmsNorm",
696
+ "PagedAttention",
697
+ "PagedAttentionMask",
698
+ "ReshapeAndCache"
696
699
  ]
697
700
 
698
701
  __custom__ = [
@@ -65,7 +65,7 @@ class SubAndFilter(PrimitiveWithCheck):
65
65
 
66
66
  Inputs:
67
67
  - **input_x** (Tensor) - Input tensor.
68
- - **max_num** (Int) - The max value of element that after sub `offset`.
68
+ - **max_num** (int) - The max value of element that after sub `offset`.
69
69
  - **offset** (int) - Specifies the offset value of this `input_x`.
70
70
 
71
71
  Outputs:
@@ -3861,10 +3861,10 @@ class FlashAttentionScoreGrad(Primitive):
3861
3861
  validator.check_value_type('input_layout', input_layout, [str], self.name)
3862
3862
  if input_layout not in ["BSH", "BNSD"]:
3863
3863
  raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
3864
- self.init_prim_io_names(inputs=['query', 'key', 'value', 'attn_mask', 'attention_in', 'softmax_max',
3865
- 'softmax_sum', 'dy', 'drop_mask', 'real_shift', "padding_mask", 'softmax_out',
3864
+ self.init_prim_io_names(inputs=['query', 'key', 'value', 'dy', 'pse_shift', 'drop_mask', "padding_mask",
3865
+ 'attn_mask', 'softmax_max', 'softmax_sum', 'softmax_out', 'attention_in',
3866
3866
  'prefix'],
3867
- outputs=['dq', 'dk', 'dv'])
3867
+ outputs=['dq', 'dk', 'dv', 'dpse'])
3868
3868
 
3869
3869
 
3870
3870
  class RmsNormGrad(Primitive):
@@ -3882,4 +3882,3 @@ class RmsNormGrad(Primitive):
3882
3882
  """Initialize RmsNormGrad."""
3883
3883
  self.init_prim_io_names(inputs=["dy", "x", "rstd", "gamma"],
3884
3884
  outputs=["dx", "dgamma"])
3885
-
@@ -16,6 +16,7 @@
16
16
  """Inner operators."""
17
17
  from types import FunctionType, MethodType
18
18
  from collections.abc import Iterable
19
+ import os
19
20
  import numpy as np
20
21
 
21
22
  from mindspore.common import Tensor
@@ -1008,7 +1009,7 @@ class Centralization(PrimitiveWithInfer):
1008
1009
 
1009
1010
  Inputs:
1010
1011
  - **input_x** (Tensor) - The input tensor. The data type mast be float16 or float32.
1011
- - **axis** (Union[Int, Tuple(Int), List(Int)]) - The dimensions to reduce. Default: (), reduce all dimensions.
1012
+ - **axis** (Union[int, Tuple(int), List(int)]) - The dimensions to reduce. Default: (), reduce all dimensions.
1012
1013
  Only constant value is allowed. Must be in the range [-rank(input_x), rank(input_x)).
1013
1014
 
1014
1015
  Outputs:
@@ -2858,6 +2859,60 @@ class DecoderKVCache(Primitive):
2858
2859
  self.add_prim_attr('side_effect_mem', True)
2859
2860
 
2860
2861
 
2862
+ class _MirrorSilentCheck(PrimitiveWithInfer):
2863
+ """
2864
+ The operator _MirrorSilentCheck implements accuracy-sensitive detection on the tensor input in backpropagator.
2865
+ Call _MirrorSilentCheck in method __call__ of derived class to implement accuracy-sensitive detection.
2866
+
2867
+ Inputs:
2868
+ - **input** (Tensor) : The tensor used for detection.
2869
+ Its data type must be mindspore.float16, mindspore.float32 or mindspore.bfloat16.
2870
+ - **pre_val** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
2871
+ Please only generated by method generate_params() of ASDBase.
2872
+ - **min_val** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
2873
+ Please only generated by method generate_params() of ASDBase.
2874
+ - **max_val** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
2875
+ Please only generated by method generate_params() of ASDBase.
2876
+ - **cnt** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
2877
+ Please only generated by method generate_params() of ASDBase.
2878
+ After each invocation of _MirrorSilentCheck, increment the value of cnt by one.
2879
+
2880
+ Outputs:
2881
+ - **output** (Tensor) - Same shape, type and value as `input`.
2882
+ """
2883
+ @prim_attr_register
2884
+ def __init__(self, min_steps=8):
2885
+ upper_thresh, sigma_thresh = self.get_thresh()
2886
+ self.min_steps = min_steps
2887
+ self.thresh_l1 = upper_thresh[0]
2888
+ self.coeff_l1 = sigma_thresh[0]
2889
+ self.thresh_l2 = upper_thresh[1]
2890
+ self.coeff_l2 = sigma_thresh[1]
2891
+ self.add_prim_attr('side_effect_mem', True)
2892
+
2893
+ def parse_thresh(self, env_var_name, default_value, min_value):
2894
+ env_var = os.environ.get(env_var_name, default=default_value)
2895
+ thresh = [value.strip() for value in env_var.split(",")]
2896
+ if len(thresh) != 2 or not all(value.isdigit() for value in thresh):
2897
+ thresh = default_value.split(",")
2898
+ thresh = [float(max(int(value), min_value)) for value in thresh]
2899
+ if thresh[0] <= thresh[1]:
2900
+ thresh = [float(value) for value in default_value.split(",")]
2901
+
2902
+ return thresh
2903
+
2904
+ def get_thresh(self):
2905
+ upper_thresh = self.parse_thresh("NPU_ASD_UPPER_THRESH", "1000000,10000", 3)
2906
+ sigma_thresh = self.parse_thresh("NPU_ASD_SIGMA_THRESH", "100000,5000", 3)
2907
+ return upper_thresh, sigma_thresh
2908
+
2909
+ def infer_shape(self, x_shape, pre_shape, min_shape, max_shape, n_step, loss_scale_shape):
2910
+ return x_shape
2911
+
2912
+ def infer_dtype(self, x_dtype, pre_dtype, min_dtype, max_dtype, n_dtype, loss_scale_dtype):
2913
+ return x_dtype
2914
+
2915
+
2861
2916
  class PromptKVCache(Primitive):
2862
2917
  r"""
2863
2918
  The PromptKVCache is used for prefill the KVCache of transformer network.
@@ -579,7 +579,7 @@ class FakeQuantWithMinMaxVars(PrimitiveWithInfer):
579
579
  range is [1, 2^num_bits-1]. Default: ``False``.
580
580
 
581
581
  Inputs:
582
- - **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
582
+ - **x** (Tensor) - float32 tensor representing the shape of the output tensor.
583
583
  - **min** (Tensor) - Value of the min range of the input data x.
584
584
  - **max** (Tensor) - Value of the max range of the input data x.
585
585
 
@@ -638,7 +638,7 @@ class FakeQuantWithMinMaxVarsGradient(PrimitiveWithInfer):
638
638
 
639
639
  Inputs:
640
640
  - **gradients** (Tensor) - The gradient above the FakeQuantWithMinMaxVars.
641
- - **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
641
+ - **x** (Tensor) - float32 tensor representing the shape of the output tensor.
642
642
  - **min** (Tensor) - Value of the min range of the input data x.
643
643
  - **max** (Tensor) - Value of the max range of the input data x.
644
644
 
@@ -702,7 +702,7 @@ class FakeQuantWithMinMaxVarsPerChannel(PrimitiveWithInfer):
702
702
  range is [1, 2^num_bits-1]. Default: ``False``.
703
703
 
704
704
  Inputs:
705
- - **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
705
+ - **x** (Tensor) - float32 tensor representing the shape of the output tensor.
706
706
  - **min** (Tensor) - Value of the min range of the input data x.
707
707
  - **max** (Tensor) - Value of the max range of the input data x.
708
708
 
@@ -754,7 +754,7 @@ class FakeQuantWithMinMaxVarsPerChannelGradient(PrimitiveWithInfer):
754
754
 
755
755
  Inputs:
756
756
  - **gradients** (Tensor) - The gradient above the FakeQuantWithMinMaxVars.
757
- - **x** (Tensor) - Float32 tensor representing the shape of the output tensor.
757
+ - **x** (Tensor) - float32 tensor representing the shape of the output tensor.
758
758
  - **min** (Tensor) - Value of the min range of the input data x.
759
759
  - **max** (Tensor) - Value of the max range of the input data x.
760
760
 
@@ -341,7 +341,7 @@ class LSTMV2(Primitive):
341
341
  - **h** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
342
342
  - **c** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
343
343
  - **w** (Tensor) - The input tensor which states for weights.
344
- - **seq_lengths** (Tensor) - The Tensor[Int32] of shape (batch_size, ),
344
+ - **seq_lengths** (Tensor) - The Tensor[int32] of shape (batch_size, ),
345
345
  indicates the seq_length of each batch dim.
346
346
 
347
347
  Outputs: