mindspore 2.2.10__cp39-cp39-win_amd64.whl → 2.2.14__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +2 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +12 -2
- mindspore/common/_utils.py +16 -0
- mindspore/common/tensor.py +0 -2
- mindspore/communication/management.py +3 -0
- mindspore/context.py +34 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets.py +23 -0
- mindspore/dataset/engine/validators.py +1 -1
- mindspore/dataset/vision/py_transforms_util.py +2 -2
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/optim/lr_scheduler.py +5 -6
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/cifar100_to_mr.py +49 -57
- mindspore/mindrecord/tools/cifar10_to_mr.py +46 -55
- mindspore/mindrecord/tools/csv_to_mr.py +3 -8
- mindspore/mindrecord/tools/mnist_to_mr.py +4 -9
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -4
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/layer/activation.py +1 -1
- mindspore/nn/layer/embedding.py +2 -2
- mindspore/nn/layer/flash_attention.py +48 -135
- mindspore/nn/loss/loss.py +1 -1
- mindspore/nn/optim/ada_grad.py +2 -2
- mindspore/nn/optim/sgd.py +3 -2
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +6 -3
- mindspore/numpy/math_ops.py +1 -1
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +3 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -31
- mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +37 -17
- mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
- mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/function/array_func.py +6 -5
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/linalg_func.py +21 -11
- mindspore/ops/function/math_func.py +3 -0
- mindspore/ops/function/nn_func.py +13 -11
- mindspore/ops/function/parameter_func.py +2 -0
- mindspore/ops/function/sparse_unary_func.py +2 -2
- mindspore/ops/function/vmap_func.py +1 -0
- mindspore/ops/operations/__init__.py +5 -2
- mindspore/ops/operations/_embedding_cache_ops.py +1 -1
- mindspore/ops/operations/_grad_ops.py +3 -4
- mindspore/ops/operations/_inner_ops.py +56 -1
- mindspore/ops/operations/_quant_ops.py +4 -4
- mindspore/ops/operations/_rl_inner_ops.py +1 -1
- mindspore/ops/operations/array_ops.py +15 -4
- mindspore/ops/operations/custom_ops.py +1 -1
- mindspore/ops/operations/debug_ops.py +1 -1
- mindspore/ops/operations/image_ops.py +3 -3
- mindspore/ops/operations/inner_ops.py +49 -0
- mindspore/ops/operations/math_ops.py +65 -3
- mindspore/ops/operations/nn_ops.py +95 -28
- mindspore/ops/operations/random_ops.py +2 -0
- mindspore/ops/operations/sparse_ops.py +4 -4
- mindspore/ops/silent_check.py +162 -0
- mindspore/parallel/__init__.py +3 -2
- mindspore/parallel/_auto_parallel_context.py +82 -3
- mindspore/parallel/_parallel_serialization.py +34 -2
- mindspore/parallel/_tensor.py +3 -1
- mindspore/parallel/_transformer/transformer.py +8 -8
- mindspore/parallel/checkpoint_transform.py +191 -45
- mindspore/profiler/parser/ascend_cluster_generator.py +111 -0
- mindspore/profiler/parser/ascend_communicate_generator.py +315 -0
- mindspore/profiler/parser/ascend_flops_generator.py +8 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
- mindspore/profiler/parser/ascend_hccl_generator.py +2 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +30 -6
- mindspore/profiler/parser/ascend_msprof_generator.py +16 -5
- mindspore/profiler/parser/ascend_op_generator.py +15 -7
- mindspore/profiler/parser/ascend_timeline_generator.py +5 -2
- mindspore/profiler/parser/base_timeline_generator.py +11 -3
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
- mindspore/profiler/parser/framework_parser.py +8 -2
- mindspore/profiler/parser/memory_usage_parser.py +8 -2
- mindspore/profiler/parser/minddata_analyzer.py +8 -2
- mindspore/profiler/parser/minddata_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_analyzer.py +4 -2
- mindspore/profiler/parser/msadvisor_parser.py +9 -3
- mindspore/profiler/profiling.py +97 -25
- mindspore/rewrite/api/node.py +1 -1
- mindspore/rewrite/api/symbol_tree.py +2 -2
- mindspore/rewrite/parsers/for_parser.py +6 -6
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/callback/_checkpoint.py +8 -8
- mindspore/train/callback/_landscape.py +2 -3
- mindspore/train/callback/_summary_collector.py +6 -7
- mindspore/train/dataset_helper.py +6 -0
- mindspore/train/model.py +17 -5
- mindspore/train/serialization.py +6 -1
- mindspore/train/summary/_writer_pool.py +1 -1
- mindspore/train/summary/summary_record.py +5 -6
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/METADATA +3 -2
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/RECORD +117 -124
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/WHEEL +0 -0
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/top_level.txt +0 -0
|
@@ -18,11 +18,13 @@
|
|
|
18
18
|
import numpy as np
|
|
19
19
|
import mindspore.numpy as mnp
|
|
20
20
|
from mindspore.common import dtype as mstype
|
|
21
|
+
import mindspore.ops as ops
|
|
21
22
|
from mindspore.ops import functional as F
|
|
22
23
|
from mindspore.ops import operations as P
|
|
23
24
|
from mindspore import Tensor
|
|
24
25
|
from mindspore.ops.operations.math_ops import Real, Imag, Complex, Angle
|
|
25
|
-
from mindspore.ops.operations.math_ops import Polar
|
|
26
|
+
from mindspore.ops.operations.math_ops import Polar, SilentCheck
|
|
27
|
+
from mindspore.ops.operations._inner_ops import _MirrorSilentCheck
|
|
26
28
|
from mindspore.ops.operations import _grad_ops as G
|
|
27
29
|
from mindspore.ops.operations.math_ops import Lgamma
|
|
28
30
|
from mindspore.ops.operations.math_ops import Digamma
|
|
@@ -763,6 +765,7 @@ def get_bprop_fft_with_size(self):
|
|
|
763
765
|
to_tensor_op = P.ScalarToTensor()
|
|
764
766
|
type_op = P.DType()
|
|
765
767
|
concat_op = P.Concat()
|
|
768
|
+
concat_op_last = P.Concat(axis=-1)
|
|
766
769
|
ones_op = P.Ones()
|
|
767
770
|
zeros_op = P.Zeros()
|
|
768
771
|
real_op = P.Real()
|
|
@@ -794,8 +797,7 @@ def get_bprop_fft_with_size(self):
|
|
|
794
797
|
signal_sizes=offset_shape[-1:])
|
|
795
798
|
irfft2d_ = FFTWithSize(signal_ndim=2, inverse=True, real=True, norm="backward", onesided=onesided,
|
|
796
799
|
signal_sizes=offset_shape[-2:])
|
|
797
|
-
irfft3d_ = FFTWithSize(signal_ndim=3, inverse=True, real=
|
|
798
|
-
signal_sizes=offset_shape[-3:])
|
|
800
|
+
irfft3d_ = FFTWithSize(signal_ndim=3, inverse=True, real=False, norm="backward", onesided=onesided)
|
|
799
801
|
if inverse is False:
|
|
800
802
|
if onesided is True:
|
|
801
803
|
terms = 0
|
|
@@ -811,6 +813,7 @@ def get_bprop_fft_with_size(self):
|
|
|
811
813
|
vec_mask = complex_op(1 - 2 * (mnp.arange(0, input_shape[-1], 1, input_type) % 2),
|
|
812
814
|
zeros_op(input_shape[-1], input_type))
|
|
813
815
|
terms = real_op(dout_first) + is_even * real_op(dout_last * vec_mask)
|
|
816
|
+
dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
|
|
814
817
|
elif signal_ndim == 2:
|
|
815
818
|
dx = irfft2d_(dout)
|
|
816
819
|
arange_inner = mnp.arange(0, input_shape[-2], 1, input_type)
|
|
@@ -852,26 +855,27 @@ def get_bprop_fft_with_size(self):
|
|
|
852
855
|
dout_shape, [input_shape[-1]])))
|
|
853
856
|
dout_last_term = dout_last_term * vec_mask
|
|
854
857
|
terms = real_op(dout_first_term) + is_even * real_op(dout_last_term)
|
|
858
|
+
dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
|
|
855
859
|
elif signal_ndim == 3:
|
|
856
|
-
|
|
857
|
-
|
|
860
|
+
zeros_shape = offset_shape[:-1] + (offset_shape[-1] - dout_shape[-1],)
|
|
861
|
+
zeros_values = zeros_op(zeros_shape, input_type)
|
|
862
|
+
zeros_padding = complex_op(zeros_values, zeros_values)
|
|
863
|
+
dout = concat_op_last((dout, zeros_padding))
|
|
864
|
+
dx = real_op(irfft3d_(dout)) * real_op(offset_size)
|
|
858
865
|
else:
|
|
859
866
|
dx = irfft_fn(dout) * real_op(offset_size)
|
|
860
867
|
else:
|
|
861
868
|
dx = rfft_fn(dout)
|
|
862
869
|
if onesided is True:
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
dx = dx * complex_op(irfft_offset_size, zeros_op(1, output_type))
|
|
873
|
-
else:
|
|
874
|
-
dx = dx * complex_op(offset_size, zeros_op(1, output_type))
|
|
870
|
+
is_odd = dout_shape[-1] % 2
|
|
871
|
+
last_shape = offset_shape[-1]
|
|
872
|
+
mask = concat_op((ones_op(1, output_type), 2.0 * ones_op(
|
|
873
|
+
(last_shape - 2 + is_odd,), output_type), ones_op((1 - is_odd,), output_type)))
|
|
874
|
+
dx = dx * complex_op(mask, zeros_op(shape_op(mask), output_type))
|
|
875
|
+
irfft_offset_size = to_tensor_op(
|
|
876
|
+
_fft_with_size_back_norm(shape_op(dout), norm, inverse, signal_ndim),
|
|
877
|
+
output_type)
|
|
878
|
+
dx = dx * complex_op(irfft_offset_size, zeros_op(1, output_type))
|
|
875
879
|
else:
|
|
876
880
|
dx = dx * complex_op(offset_size, zeros_op(1, output_type))
|
|
877
881
|
return (dx,)
|
|
@@ -1017,3 +1021,19 @@ def get_bprop_tensor_add(self):
|
|
|
1017
1021
|
return binop_grad_common(x, y, dout, dout)
|
|
1018
1022
|
|
|
1019
1023
|
return bprop
|
|
1024
|
+
|
|
1025
|
+
|
|
1026
|
+
@bprop_getters.register(_MirrorSilentCheck)
|
|
1027
|
+
def get_bprop_mirror_silent_check(self):
|
|
1028
|
+
"""Grad definition for '_MirrorSilentCheck' op"""
|
|
1029
|
+
silent_check = SilentCheck(self.min_steps, self.thresh_l1, self.coeff_l1, self.thresh_l2, self.coeff_l2)
|
|
1030
|
+
out_tensor = Tensor([0.0], mstype.float32)
|
|
1031
|
+
|
|
1032
|
+
def bporp(x, pre_val, min_val, max_val, n_step, loss_scale, out, dout):
|
|
1033
|
+
if loss_scale is not None:
|
|
1034
|
+
dout = dout / loss_scale
|
|
1035
|
+
grad = ops.norm(dout)
|
|
1036
|
+
dx, _, _, _, _ = silent_check(grad, dout, pre_val, min_val, max_val, n_step)
|
|
1037
|
+
return (dx, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
|
|
1038
|
+
|
|
1039
|
+
return bporp
|
|
@@ -60,6 +60,7 @@ from .init_data_set_queue import _init_data_set_queue_aicpu
|
|
|
60
60
|
from .embedding_lookup import _embedding_lookup_aicpu
|
|
61
61
|
from .padding import _padding_aicpu
|
|
62
62
|
from .gather import _gather_aicpu
|
|
63
|
+
from .generate_eod_mask import _generate_eod_mask_aicpu
|
|
63
64
|
from .gather_grad import _gather_grad_aicpu
|
|
64
65
|
from .gather_d_grad_v2 import _gather_d_grad_v2_aicpu
|
|
65
66
|
from .gather_d import _gather_d_aicpu
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright 2023 Huawei Technologies Co., Ltd
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
# ============================================================================
|
|
15
|
+
|
|
16
|
+
"""GenerateEodMask op"""
|
|
17
|
+
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
|
|
18
|
+
|
|
19
|
+
generate_eod_mask_op_info = AiCPURegOp("GenerateEodMask") \
|
|
20
|
+
.fusion_type("OPAQUE") \
|
|
21
|
+
.attr("eod_token_id", "int") \
|
|
22
|
+
.attr("n_pos", "int") \
|
|
23
|
+
.attr("n_step", "listint") \
|
|
24
|
+
.attr("n_error_mode", "str") \
|
|
25
|
+
.input(0, "inputs_ids", "required") \
|
|
26
|
+
.output(0, "position_ids", "required") \
|
|
27
|
+
.dtype_format(DataType.U16_Default, DataType.U16_Default) \
|
|
28
|
+
.dtype_format(DataType.U32_Default, DataType.U32_Default) \
|
|
29
|
+
.dtype_format(DataType.U64_Default, DataType.U64_Default) \
|
|
30
|
+
.dtype_format(DataType.I32_Default, DataType.I32_Default) \
|
|
31
|
+
.dtype_format(DataType.I64_Default, DataType.I64_Default) \
|
|
32
|
+
.get_op_info()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@op_info_register(generate_eod_mask_op_info)
|
|
36
|
+
def _generate_eod_mask_aicpu():
|
|
37
|
+
"""GenerateEodMask AiCPU register"""
|
|
38
|
+
return
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -24,11 +24,30 @@ lsap_op_info = AiCPURegOp("LinearSumAssignment") \
|
|
|
24
24
|
.input(2, 'maximize', "required") \
|
|
25
25
|
.output(0, "row_ind", "required") \
|
|
26
26
|
.output(1, "col_ind", "required") \
|
|
27
|
-
.attr("cust_aicpu", "str") \
|
|
28
27
|
.dtype_format(DataType.F64_Default, DataType.I64_Default,
|
|
29
28
|
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
30
29
|
.dtype_format(DataType.F32_Default, DataType.I64_Default,
|
|
31
30
|
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
31
|
+
.dtype_format(DataType.F16_Default, DataType.I64_Default,
|
|
32
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
33
|
+
.dtype_format(DataType.BOOL_Default, DataType.I64_Default,
|
|
34
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
35
|
+
.dtype_format(DataType.I16_Default, DataType.I64_Default,
|
|
36
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
37
|
+
.dtype_format(DataType.I32_Default, DataType.I64_Default,
|
|
38
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
39
|
+
.dtype_format(DataType.I64_Default, DataType.I64_Default,
|
|
40
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
41
|
+
.dtype_format(DataType.I8_Default, DataType.I64_Default,
|
|
42
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
43
|
+
.dtype_format(DataType.U16_Default, DataType.I64_Default,
|
|
44
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
45
|
+
.dtype_format(DataType.U32_Default, DataType.I64_Default,
|
|
46
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
47
|
+
.dtype_format(DataType.U64_Default, DataType.I64_Default,
|
|
48
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
49
|
+
.dtype_format(DataType.U8_Default, DataType.I64_Default,
|
|
50
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
32
51
|
.get_op_info()
|
|
33
52
|
|
|
34
53
|
|
|
@@ -268,7 +268,7 @@ def cat(tensors, axis=0):
|
|
|
268
268
|
|
|
269
269
|
Returns:
|
|
270
270
|
Tensor, the shape is :math:`(x_1, x_2, ..., \sum_{i=1}^Nx_{mi}, ..., x_R)`.
|
|
271
|
-
|
|
271
|
+
The data type is the same with `tensors`.
|
|
272
272
|
|
|
273
273
|
Raises:
|
|
274
274
|
TypeError: If `axis` is not an int.
|
|
@@ -660,7 +660,7 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
|
|
|
660
660
|
|
|
661
661
|
Note:
|
|
662
662
|
If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
|
|
663
|
-
On Ascend, if `on_value` is
|
|
663
|
+
On Ascend, if `on_value` is int64 dtype, `indices` must be int64 dtype.
|
|
664
664
|
|
|
665
665
|
Args:
|
|
666
666
|
indices(Tensor): A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
|
|
@@ -4228,6 +4228,7 @@ def space_to_batch_nd(input_x, block_size, paddings):
|
|
|
4228
4228
|
|
|
4229
4229
|
Examples:
|
|
4230
4230
|
>>> import numpy as np
|
|
4231
|
+
>>> import mindspore
|
|
4231
4232
|
>>> from mindspore import Tensor, ops
|
|
4232
4233
|
>>> block_size = [2, 2]
|
|
4233
4234
|
>>> paddings = [[0, 0], [0, 0]]
|
|
@@ -5395,8 +5396,8 @@ def masked_select(input, mask):
|
|
|
5395
5396
|
|
|
5396
5397
|
Examples:
|
|
5397
5398
|
>>> import numpy as np
|
|
5398
|
-
>>> import mindspore
|
|
5399
|
-
>>> from mindspore import Tensor
|
|
5399
|
+
>>> import mindspore
|
|
5400
|
+
>>> from mindspore import Tensor, ops
|
|
5400
5401
|
>>> x = Tensor(np.array([1, 2, 3, 4]), mindspore.int64)
|
|
5401
5402
|
>>> mask = Tensor(np.array([1, 0, 1, 0]), mindspore.bool_)
|
|
5402
5403
|
>>> output = ops.masked_select(x, mask)
|
|
@@ -6520,7 +6521,7 @@ def topk(input, k, dim=None, largest=True, sorted=True):
|
|
|
6520
6521
|
|
|
6521
6522
|
Args:
|
|
6522
6523
|
input (Tensor): Input to be computed, data type must be float16, float32 or int32.
|
|
6523
|
-
k (int): The number of top or bottom elements to be computed along the last dimension
|
|
6524
|
+
k (int): The number of top or bottom elements to be computed along the last dimension.
|
|
6524
6525
|
dim (int, optional): The dimension to sort along. Default: ``None`` .
|
|
6525
6526
|
largest (bool, optional): If largest is ``False`` then the k smallest elements are returned.
|
|
6526
6527
|
Default: ``True`` .
|
|
@@ -51,7 +51,7 @@ def print_(*input_x):
|
|
|
51
51
|
|
|
52
52
|
Examples:
|
|
53
53
|
>>> import numpy as np
|
|
54
|
-
>>> from mindspore import Tensor
|
|
54
|
+
>>> from mindspore import Tensor, ops
|
|
55
55
|
>>> x = Tensor(np.ones([2, 1]).astype(np.int32))
|
|
56
56
|
>>> y = Tensor(np.ones([2, 2]).astype(np.int32))
|
|
57
57
|
>>> result = ops.print_('Print Tensor x and Tensor y:', x, y)
|
|
@@ -59,6 +59,8 @@ def cond(A, p=None):
|
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
61
|
A (Tensor): Tensor of shape :math:`(*, n)` or :math:`(*, m, n)` where * is zero or more batch dimensions.
|
|
62
|
+
If `p` is one of Union[1, -1, inf, -inf, 'fro', 'nuc'], the function uses
|
|
63
|
+
:class:`mindspore.ops.MatrixInverse` , therefore, :math:`(*, m, n)` has to be square and ivertible.
|
|
62
64
|
p (Union[int, float, inf, -inf, 'fro', 'nuc'], optional): norm's mode. Refer to the table above for
|
|
63
65
|
behavior. Default: ``None``.
|
|
64
66
|
|
|
@@ -84,8 +86,8 @@ def cond(A, p=None):
|
|
|
84
86
|
matrix_inverse = _get_cache_prim(P.MatrixInverse)(adjoint=False)
|
|
85
87
|
if p is None:
|
|
86
88
|
p = 2
|
|
87
|
-
norm_a = F.
|
|
88
|
-
norm_inv_a = F.
|
|
89
|
+
norm_a = F.matrix_norm(A, p)
|
|
90
|
+
norm_inv_a = F.matrix_norm(matrix_inverse(A), p)
|
|
89
91
|
return norm_a * norm_inv_a
|
|
90
92
|
|
|
91
93
|
|
|
@@ -194,6 +196,8 @@ def geqrf(input):
|
|
|
194
196
|
``Ascend`` ``GPU`` ``CPU``
|
|
195
197
|
|
|
196
198
|
Examples:
|
|
199
|
+
>>> from mindspore import Tensor, ops
|
|
200
|
+
>>> import numpy as np
|
|
197
201
|
>>> input_x = Tensor(np.array([[-2.0, -1.0], [1.0, 2.0]]).astype(np.float32))
|
|
198
202
|
>>> y, tau = ops.geqrf(input_x)
|
|
199
203
|
>>> print(y)
|
|
@@ -266,6 +270,16 @@ def svd(input, full_matrices=False, compute_uv=True):
|
|
|
266
270
|
return s
|
|
267
271
|
|
|
268
272
|
|
|
273
|
+
def _check_pinv_shape(x):
|
|
274
|
+
if not isinstance(x, (Tensor, Tensor_)):
|
|
275
|
+
raise TypeError("The input x must be tensor")
|
|
276
|
+
if x.shape == ():
|
|
277
|
+
raise TypeError("For pinv, the 0-D input is not supported")
|
|
278
|
+
x_shape = F.shape(x)
|
|
279
|
+
if len(x_shape) < 2:
|
|
280
|
+
raise ValueError("input x should have 2 or more dimensions, " f"but got {len(x_shape)}.")
|
|
281
|
+
|
|
282
|
+
|
|
269
283
|
def pinv(x, *, atol=None, rtol=None, hermitian=False):
|
|
270
284
|
r"""
|
|
271
285
|
Computes the (Moore-Penrose) pseudo-inverse of a matrix.
|
|
@@ -318,19 +332,15 @@ def pinv(x, *, atol=None, rtol=None, hermitian=False):
|
|
|
318
332
|
``CPU``
|
|
319
333
|
|
|
320
334
|
Examples:
|
|
335
|
+
>>> import mindspore
|
|
336
|
+
>>> from mindspore import Tensor, ops
|
|
321
337
|
>>> x = Tensor([[4., 0.], [0., 5.]], mindspore.float32)
|
|
322
338
|
>>> output = ops.pinv(x)
|
|
323
339
|
>>> print(output)
|
|
324
|
-
[[0.25
|
|
325
|
-
|
|
340
|
+
[[0.25 0. ]
|
|
341
|
+
[0. 0.2 ]]
|
|
326
342
|
"""
|
|
327
|
-
|
|
328
|
-
raise TypeError("The input x must be tensor")
|
|
329
|
-
if x.shape == ():
|
|
330
|
-
raise TypeError("For pinv, the 0-D input is not supported")
|
|
331
|
-
x_shape = F.shape(x)
|
|
332
|
-
if len(x_shape) < 2:
|
|
333
|
-
raise ValueError("input x should have 2 or more dimensions, " f"but got {len(x_shape)}.")
|
|
343
|
+
_check_pinv_shape(x)
|
|
334
344
|
x_dtype = _get_cache_prim(P.DType)()(x)
|
|
335
345
|
_check_input_dtype("x", x_dtype, [mstype.float32, mstype.float64], "pinv")
|
|
336
346
|
_check_attr_dtype("hermitian", hermitian, [bool], "pinv")
|
|
@@ -343,6 +343,7 @@ def add(input, other):
|
|
|
343
343
|
|
|
344
344
|
Examples:
|
|
345
345
|
>>> import numpy as np
|
|
346
|
+
>>> import mindspore
|
|
346
347
|
>>> from mindspore import Tensor, ops
|
|
347
348
|
>>> # case 1: x and y are both Tensor.
|
|
348
349
|
>>> x = Tensor(np.array([1, 2, 3]).astype(np.float32))
|
|
@@ -12784,6 +12785,7 @@ def count_nonzero(x, axis=(), keep_dims=False, dtype=mstype.int32):
|
|
|
12784
12785
|
Examples:
|
|
12785
12786
|
>>> from mindspore import Tensor, ops
|
|
12786
12787
|
>>> import numpy as np
|
|
12788
|
+
>>> import mindspore
|
|
12787
12789
|
>>> # case 1: each value specified.
|
|
12788
12790
|
>>> x = Tensor(np.array([[0, 1, 0], [1, 1, 0]]).astype(np.float32))
|
|
12789
12791
|
>>> nonzero_num = ops.count_nonzero(x=x, axis=[0, 1], keep_dims=True, dtype=mindspore.int32)
|
|
@@ -13392,6 +13394,7 @@ def batch_dot(x1, x2, axes=None):
|
|
|
13392
13394
|
``Ascend`` ``GPU`` ``CPU``
|
|
13393
13395
|
|
|
13394
13396
|
Examples:
|
|
13397
|
+
>>> import mindspore
|
|
13395
13398
|
>>> from mindspore import Tensor, ops
|
|
13396
13399
|
>>> import numpy as np
|
|
13397
13400
|
>>> x1 = Tensor(np.ones(shape=[2, 2, 3]), mindspore.float32)
|
|
@@ -2748,6 +2748,7 @@ def softshrink(x, lambd=0.5):
|
|
|
2748
2748
|
``Ascend`` ``GPU`` ``CPU``
|
|
2749
2749
|
|
|
2750
2750
|
Examples:
|
|
2751
|
+
>>> import mindspore
|
|
2751
2752
|
>>> from mindspore import Tensor
|
|
2752
2753
|
>>> from mindspore import ops
|
|
2753
2754
|
>>> import numpy as np
|
|
@@ -3000,11 +3001,12 @@ def dense(input, weight, bias=None):
|
|
|
3000
3001
|
|
|
3001
3002
|
Examples:
|
|
3002
3003
|
>>> import numpy as np
|
|
3004
|
+
>>> import mindspore
|
|
3003
3005
|
>>> from mindspore import Tensor, ops
|
|
3004
|
-
>>> input =
|
|
3005
|
-
>>> weight =
|
|
3006
|
-
>>> bias =
|
|
3007
|
-
>>> output =
|
|
3006
|
+
>>> input = Tensor([[-1., 1., 2.], [-3., -3., 1.]], mindspore.float32)
|
|
3007
|
+
>>> weight = Tensor([[-2., -2., -2.], [0., -1., 0.]], mindspore.float32)
|
|
3008
|
+
>>> bias = Tensor([0., 1.], mindspore.float32)
|
|
3009
|
+
>>> output = ops.dense(input, weight, bias)
|
|
3008
3010
|
>>> print(output)
|
|
3009
3011
|
[[-4. 0.]
|
|
3010
3012
|
[10. 4.]]
|
|
@@ -3992,8 +3994,8 @@ def l1_loss(input, target, reduction='mean'):
|
|
|
3992
3994
|
Examples:
|
|
3993
3995
|
>>> from mindspore import Tensor, ops
|
|
3994
3996
|
>>> from mindspore import dtype as mstype
|
|
3995
|
-
>>> x =
|
|
3996
|
-
>>> target =
|
|
3997
|
+
>>> x = Tensor([[1, 2, 3], [4, 5, 6]], mstype.float32)
|
|
3998
|
+
>>> target = Tensor([[6, 5, 4], [3, 2, 1]], mstype.float32)
|
|
3997
3999
|
>>> output = ops.l1_loss(x, target, reduction="mean")
|
|
3998
4000
|
>>> print(output)
|
|
3999
4001
|
3.0
|
|
@@ -5342,7 +5344,7 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5342
5344
|
>>> from mindspore import Tensor, ops
|
|
5343
5345
|
>>> x = Tensor(np.arange(64).reshape((4, 4, 4)), mindspore.float32)
|
|
5344
5346
|
>>> weight = Tensor(np.arange(8).reshape((2, 2, 2)), mindspore.float32)
|
|
5345
|
-
>>> bias = Tensor([-0.12345, 2.7683],
|
|
5347
|
+
>>> bias = Tensor([-0.12345, 2.7683], mindspore.float32)
|
|
5346
5348
|
>>> output = ops.conv1d(x, weight, pad_mode='pad', padding=(1,), bias=bias, groups=2)
|
|
5347
5349
|
>>> print(output.shape)
|
|
5348
5350
|
(4, 2, 5)
|
|
@@ -7444,8 +7446,8 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
|
|
|
7444
7446
|
return out
|
|
7445
7447
|
|
|
7446
7448
|
|
|
7447
|
-
def prompt_flash_attention(query, key, value,
|
|
7448
|
-
actual_seq_lengths_kv, deq_scale1, quant_scale1,
|
|
7449
|
+
def prompt_flash_attention(query, key, value, attn_mask, actual_seq_lengths,
|
|
7450
|
+
actual_seq_lengths_kv, pse_shift, deq_scale1, quant_scale1,
|
|
7449
7451
|
deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
|
|
7450
7452
|
next_tokens=0, input_layout='BSH',
|
|
7451
7453
|
num_key_value_heads=0, sparse_mode=0):
|
|
@@ -7468,11 +7470,11 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
|
|
|
7468
7470
|
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7469
7471
|
value (Tensor) - The value tensor with data type of float16 or float32.
|
|
7470
7472
|
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7471
|
-
padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
|
|
7472
7473
|
attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
7473
7474
|
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
7474
7475
|
actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7475
7476
|
actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7477
|
+
pse_shift (Tensor) - The position encoding tensor with data type of float16 or float32.
|
|
7476
7478
|
dep_scale1 (Tensor)
|
|
7477
7479
|
quant_scale1 (Tensor)
|
|
7478
7480
|
deq_scale2 (Tensor)
|
|
@@ -7516,7 +7518,7 @@ def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_se
|
|
|
7516
7518
|
|
|
7517
7519
|
pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
|
|
7518
7520
|
num_key_value_heads, sparse_mode)
|
|
7519
|
-
return pfa(query, key, value,
|
|
7521
|
+
return pfa(query, key, value, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, pse_shift, deq_scale1,
|
|
7520
7522
|
quant_scale1, deq_scale2, quant_scale2, quant_offset2)
|
|
7521
7523
|
|
|
7522
7524
|
|
|
@@ -99,6 +99,7 @@ def assign_sub(variable, value):
|
|
|
99
99
|
>>> import mindspore
|
|
100
100
|
>>> import numpy as np
|
|
101
101
|
>>> from mindspore import Tensor, ops
|
|
102
|
+
>>> from mindspore.common.initializer import initializer
|
|
102
103
|
>>> variable = mindspore.Parameter(initializer(1, [1], mindspore.int32), name="global_step")
|
|
103
104
|
>>> value = Tensor(np.ones([1]).astype(np.int32) * 100)
|
|
104
105
|
>>> ops.assign_sub(variable, value)
|
|
@@ -149,6 +150,7 @@ def assign_add(variable, value):
|
|
|
149
150
|
>>> import mindspore
|
|
150
151
|
>>> import numpy as np
|
|
151
152
|
>>> from mindspore import Tensor, ops
|
|
153
|
+
>>> from mindspore.common.initializer import initializer
|
|
152
154
|
>>> variable = mindspore.Parameter(initializer(1, [1], mindspore.int32), name="global_step")
|
|
153
155
|
>>> value = Tensor(np.ones([1]).astype(np.int32) * 100)
|
|
154
156
|
>>> ops.assign_add(variable, value)
|
|
@@ -28,7 +28,7 @@ def csr_cos(x: CSRTensor) -> CSRTensor:
|
|
|
28
28
|
out_i = \cos(x_i)
|
|
29
29
|
|
|
30
30
|
.. warning::
|
|
31
|
-
Currently support data types float16 and float32. If use
|
|
31
|
+
Currently support data types float16 and float32. If use float64, there may be a problem of missing precision.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
34
|
x (CSRTensor): Input CSRTensor.
|
|
@@ -69,7 +69,7 @@ def coo_cos(x: COOTensor) -> COOTensor:
|
|
|
69
69
|
out_i = \cos(x_i)
|
|
70
70
|
|
|
71
71
|
.. warning::
|
|
72
|
-
If use
|
|
72
|
+
If use float64, there may be a problem of missing precision.
|
|
73
73
|
|
|
74
74
|
Args:
|
|
75
75
|
x (COOTensor): Input COOTensor.
|
|
@@ -118,7 +118,7 @@ from .nn_ops import (LSTM, SGD, Adam, AdamWeightDecay, FusedSparseAdam, FusedSpa
|
|
|
118
118
|
Dilation2D, DataFormatVecPermute, DeformableOffsets, Dense, FractionalAvgPool,
|
|
119
119
|
FractionalMaxPool, FractionalMaxPool3DWithFixedKsize, FractionalMaxPoolWithFixedKsize,
|
|
120
120
|
GridSampler2D, TripletMarginLoss, UpsampleNearest3D, UpsampleTrilinear3D, PadV3, ChannelShuffle,
|
|
121
|
-
GLU, MaxUnpool3D, Pdist, RmsNorm)
|
|
121
|
+
GLU, MaxUnpool3D, Pdist, RmsNorm, PagedAttention, PagedAttentionMask, ReshapeAndCache)
|
|
122
122
|
from .other_ops import (Assign, IOU, BoundingBoxDecode, BoundingBoxEncode,
|
|
123
123
|
ConfusionMatrix, UpdateState, Load, StopGradient,
|
|
124
124
|
CheckValid, Partial, Depend, Push, Pull, PyExecute, PyFunc, _DynamicLossScale,
|
|
@@ -692,7 +692,10 @@ __all__ = [
|
|
|
692
692
|
"MaskedScatter",
|
|
693
693
|
"Ormqr",
|
|
694
694
|
"RandpermV2",
|
|
695
|
-
"RmsNorm"
|
|
695
|
+
"RmsNorm",
|
|
696
|
+
"PagedAttention",
|
|
697
|
+
"PagedAttentionMask",
|
|
698
|
+
"ReshapeAndCache"
|
|
696
699
|
]
|
|
697
700
|
|
|
698
701
|
__custom__ = [
|
|
@@ -65,7 +65,7 @@ class SubAndFilter(PrimitiveWithCheck):
|
|
|
65
65
|
|
|
66
66
|
Inputs:
|
|
67
67
|
- **input_x** (Tensor) - Input tensor.
|
|
68
|
-
- **max_num** (
|
|
68
|
+
- **max_num** (int) - The max value of element that after sub `offset`.
|
|
69
69
|
- **offset** (int) - Specifies the offset value of this `input_x`.
|
|
70
70
|
|
|
71
71
|
Outputs:
|
|
@@ -3861,10 +3861,10 @@ class FlashAttentionScoreGrad(Primitive):
|
|
|
3861
3861
|
validator.check_value_type('input_layout', input_layout, [str], self.name)
|
|
3862
3862
|
if input_layout not in ["BSH", "BNSD"]:
|
|
3863
3863
|
raise ValueError(f"Attribute 'input_layout' must be either 'BSH' or 'BNSD', but got {input_layout}")
|
|
3864
|
-
self.init_prim_io_names(inputs=['query', 'key', 'value', '
|
|
3865
|
-
'
|
|
3864
|
+
self.init_prim_io_names(inputs=['query', 'key', 'value', 'dy', 'pse_shift', 'drop_mask', "padding_mask",
|
|
3865
|
+
'attn_mask', 'softmax_max', 'softmax_sum', 'softmax_out', 'attention_in',
|
|
3866
3866
|
'prefix'],
|
|
3867
|
-
outputs=['dq', 'dk', 'dv'])
|
|
3867
|
+
outputs=['dq', 'dk', 'dv', 'dpse'])
|
|
3868
3868
|
|
|
3869
3869
|
|
|
3870
3870
|
class RmsNormGrad(Primitive):
|
|
@@ -3882,4 +3882,3 @@ class RmsNormGrad(Primitive):
|
|
|
3882
3882
|
"""Initialize RmsNormGrad."""
|
|
3883
3883
|
self.init_prim_io_names(inputs=["dy", "x", "rstd", "gamma"],
|
|
3884
3884
|
outputs=["dx", "dgamma"])
|
|
3885
|
-
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
"""Inner operators."""
|
|
17
17
|
from types import FunctionType, MethodType
|
|
18
18
|
from collections.abc import Iterable
|
|
19
|
+
import os
|
|
19
20
|
import numpy as np
|
|
20
21
|
|
|
21
22
|
from mindspore.common import Tensor
|
|
@@ -1008,7 +1009,7 @@ class Centralization(PrimitiveWithInfer):
|
|
|
1008
1009
|
|
|
1009
1010
|
Inputs:
|
|
1010
1011
|
- **input_x** (Tensor) - The input tensor. The data type mast be float16 or float32.
|
|
1011
|
-
- **axis** (Union[
|
|
1012
|
+
- **axis** (Union[int, Tuple(int), List(int)]) - The dimensions to reduce. Default: (), reduce all dimensions.
|
|
1012
1013
|
Only constant value is allowed. Must be in the range [-rank(input_x), rank(input_x)).
|
|
1013
1014
|
|
|
1014
1015
|
Outputs:
|
|
@@ -2858,6 +2859,60 @@ class DecoderKVCache(Primitive):
|
|
|
2858
2859
|
self.add_prim_attr('side_effect_mem', True)
|
|
2859
2860
|
|
|
2860
2861
|
|
|
2862
|
+
class _MirrorSilentCheck(PrimitiveWithInfer):
|
|
2863
|
+
"""
|
|
2864
|
+
The operator _MirrorSilentCheck implements accuracy-sensitive detection on the tensor input in backpropagator.
|
|
2865
|
+
Call _MirrorSilentCheck in method __call__ of derived class to implement accuracy-sensitive detection.
|
|
2866
|
+
|
|
2867
|
+
Inputs:
|
|
2868
|
+
- **input** (Tensor) : The tensor used for detection.
|
|
2869
|
+
Its data type must be mindspore.float16, mindspore.float32 or mindspore.bfloat16.
|
|
2870
|
+
- **pre_val** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
|
|
2871
|
+
Please only generated by method generate_params() of ASDBase.
|
|
2872
|
+
- **min_val** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
|
|
2873
|
+
Please only generated by method generate_params() of ASDBase.
|
|
2874
|
+
- **max_val** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
|
|
2875
|
+
Please only generated by method generate_params() of ASDBase.
|
|
2876
|
+
- **cnt** (Parameter(Tensor)) : Support parameter in accuracy-sensitive detection.
|
|
2877
|
+
Please only generated by method generate_params() of ASDBase.
|
|
2878
|
+
After each invocation of _MirrorSilentCheck, increment the value of cnt by one.
|
|
2879
|
+
|
|
2880
|
+
Outputs:
|
|
2881
|
+
- **output** (Tensor) - Same shape, type and value as `input`.
|
|
2882
|
+
"""
|
|
2883
|
+
@prim_attr_register
|
|
2884
|
+
def __init__(self, min_steps=8):
|
|
2885
|
+
upper_thresh, sigma_thresh = self.get_thresh()
|
|
2886
|
+
self.min_steps = min_steps
|
|
2887
|
+
self.thresh_l1 = upper_thresh[0]
|
|
2888
|
+
self.coeff_l1 = sigma_thresh[0]
|
|
2889
|
+
self.thresh_l2 = upper_thresh[1]
|
|
2890
|
+
self.coeff_l2 = sigma_thresh[1]
|
|
2891
|
+
self.add_prim_attr('side_effect_mem', True)
|
|
2892
|
+
|
|
2893
|
+
def parse_thresh(self, env_var_name, default_value, min_value):
|
|
2894
|
+
env_var = os.environ.get(env_var_name, default=default_value)
|
|
2895
|
+
thresh = [value.strip() for value in env_var.split(",")]
|
|
2896
|
+
if len(thresh) != 2 or not all(value.isdigit() for value in thresh):
|
|
2897
|
+
thresh = default_value.split(",")
|
|
2898
|
+
thresh = [float(max(int(value), min_value)) for value in thresh]
|
|
2899
|
+
if thresh[0] <= thresh[1]:
|
|
2900
|
+
thresh = [float(value) for value in default_value.split(",")]
|
|
2901
|
+
|
|
2902
|
+
return thresh
|
|
2903
|
+
|
|
2904
|
+
def get_thresh(self):
|
|
2905
|
+
upper_thresh = self.parse_thresh("NPU_ASD_UPPER_THRESH", "1000000,10000", 3)
|
|
2906
|
+
sigma_thresh = self.parse_thresh("NPU_ASD_SIGMA_THRESH", "100000,5000", 3)
|
|
2907
|
+
return upper_thresh, sigma_thresh
|
|
2908
|
+
|
|
2909
|
+
def infer_shape(self, x_shape, pre_shape, min_shape, max_shape, n_step, loss_scale_shape):
|
|
2910
|
+
return x_shape
|
|
2911
|
+
|
|
2912
|
+
def infer_dtype(self, x_dtype, pre_dtype, min_dtype, max_dtype, n_dtype, loss_scale_dtype):
|
|
2913
|
+
return x_dtype
|
|
2914
|
+
|
|
2915
|
+
|
|
2861
2916
|
class PromptKVCache(Primitive):
|
|
2862
2917
|
r"""
|
|
2863
2918
|
The PromptKVCache is used for prefill the KVCache of transformer network.
|
|
@@ -579,7 +579,7 @@ class FakeQuantWithMinMaxVars(PrimitiveWithInfer):
|
|
|
579
579
|
range is [1, 2^num_bits-1]. Default: ``False``.
|
|
580
580
|
|
|
581
581
|
Inputs:
|
|
582
|
-
- **x** (Tensor) -
|
|
582
|
+
- **x** (Tensor) - float32 tensor representing the shape of the output tensor.
|
|
583
583
|
- **min** (Tensor) - Value of the min range of the input data x.
|
|
584
584
|
- **max** (Tensor) - Value of the max range of the input data x.
|
|
585
585
|
|
|
@@ -638,7 +638,7 @@ class FakeQuantWithMinMaxVarsGradient(PrimitiveWithInfer):
|
|
|
638
638
|
|
|
639
639
|
Inputs:
|
|
640
640
|
- **gradients** (Tensor) - The gradient above the FakeQuantWithMinMaxVars.
|
|
641
|
-
- **x** (Tensor) -
|
|
641
|
+
- **x** (Tensor) - float32 tensor representing the shape of the output tensor.
|
|
642
642
|
- **min** (Tensor) - Value of the min range of the input data x.
|
|
643
643
|
- **max** (Tensor) - Value of the max range of the input data x.
|
|
644
644
|
|
|
@@ -702,7 +702,7 @@ class FakeQuantWithMinMaxVarsPerChannel(PrimitiveWithInfer):
|
|
|
702
702
|
range is [1, 2^num_bits-1]. Default: ``False``.
|
|
703
703
|
|
|
704
704
|
Inputs:
|
|
705
|
-
- **x** (Tensor) -
|
|
705
|
+
- **x** (Tensor) - float32 tensor representing the shape of the output tensor.
|
|
706
706
|
- **min** (Tensor) - Value of the min range of the input data x.
|
|
707
707
|
- **max** (Tensor) - Value of the max range of the input data x.
|
|
708
708
|
|
|
@@ -754,7 +754,7 @@ class FakeQuantWithMinMaxVarsPerChannelGradient(PrimitiveWithInfer):
|
|
|
754
754
|
|
|
755
755
|
Inputs:
|
|
756
756
|
- **gradients** (Tensor) - The gradient above the FakeQuantWithMinMaxVars.
|
|
757
|
-
- **x** (Tensor) -
|
|
757
|
+
- **x** (Tensor) - float32 tensor representing the shape of the output tensor.
|
|
758
758
|
- **min** (Tensor) - Value of the min range of the input data x.
|
|
759
759
|
- **max** (Tensor) - Value of the max range of the input data x.
|
|
760
760
|
|
|
@@ -341,7 +341,7 @@ class LSTMV2(Primitive):
|
|
|
341
341
|
- **h** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
|
|
342
342
|
- **c** (Tensor) - Tensor of shape (num_directions * `num_layers`, batch_size, `hidden_size`).
|
|
343
343
|
- **w** (Tensor) - The input tensor which states for weights.
|
|
344
|
-
- **seq_lengths** (Tensor) - The Tensor[
|
|
344
|
+
- **seq_lengths** (Tensor) - The Tensor[int32] of shape (batch_size, ),
|
|
345
345
|
indicates the seq_length of each batch dim.
|
|
346
346
|
|
|
347
347
|
Outputs:
|