mindspore 2.2.0__cp38-cp38-win_amd64.whl → 2.2.11__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +3 -3
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/splitter.py +3 -2
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +83 -66
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -4
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +2 -1
- mindspore/_extends/parse/__init__.py +3 -2
- mindspore/_extends/parse/parser.py +6 -1
- mindspore/_extends/parse/standard_method.py +14 -11
- mindspore/_extends/remote/kernel_build_server.py +2 -1
- mindspore/common/_utils.py +16 -0
- mindspore/common/api.py +1 -1
- mindspore/common/auto_dynamic_shape.py +81 -85
- mindspore/common/dump.py +1 -1
- mindspore/common/tensor.py +3 -20
- mindspore/config/op_info.config +1 -1
- mindspore/context.py +11 -4
- mindspore/dataset/engine/cache_client.py +8 -5
- mindspore/dataset/engine/datasets_standard_format.py +5 -0
- mindspore/dataset/vision/transforms.py +21 -21
- mindspore/experimental/optim/adam.py +1 -1
- mindspore/gen_ops.py +1 -1
- mindspore/include/api/model.h +17 -0
- mindspore/include/api/status.h +8 -3
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/cell.py +0 -3
- mindspore/nn/layer/activation.py +4 -5
- mindspore/nn/layer/conv.py +39 -23
- mindspore/nn/layer/flash_attention.py +54 -129
- mindspore/nn/layer/math.py +3 -7
- mindspore/nn/layer/rnn_cells.py +5 -5
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +12 -3
- mindspore/numpy/utils_const.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -1
- mindspore/ops/_grad_experimental/grad_implementations.py +2 -2
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -18
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_utils/utils.py +2 -0
- mindspore/ops/composite/multitype_ops/_compile_utils.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +2 -2
- mindspore/ops/function/array_func.py +10 -7
- mindspore/ops/function/grad/grad_func.py +0 -1
- mindspore/ops/function/nn_func.py +98 -9
- mindspore/ops/function/random_func.py +2 -1
- mindspore/ops/op_info_register.py +24 -21
- mindspore/ops/operations/__init__.py +6 -2
- mindspore/ops/operations/_grad_ops.py +25 -6
- mindspore/ops/operations/_inner_ops.py +155 -23
- mindspore/ops/operations/array_ops.py +9 -7
- mindspore/ops/operations/comm_ops.py +2 -2
- mindspore/ops/operations/custom_ops.py +85 -68
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +7 -6
- mindspore/ops/operations/nn_ops.py +193 -49
- mindspore/parallel/_parallel_serialization.py +10 -3
- mindspore/parallel/_tensor.py +4 -1
- mindspore/parallel/checkpoint_transform.py +13 -2
- mindspore/parallel/shard.py +17 -10
- mindspore/profiler/common/util.py +1 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +232 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +86 -43
- mindspore/profiler/parser/ascend_msprof_generator.py +196 -9
- mindspore/profiler/parser/ascend_op_generator.py +1 -1
- mindspore/profiler/parser/ascend_timeline_generator.py +6 -182
- mindspore/profiler/parser/base_timeline_generator.py +1 -1
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -2
- mindspore/profiler/parser/framework_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +19 -0
- mindspore/profiler/profiling.py +46 -24
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/parsers/for_parser.py +7 -7
- mindspore/rewrite/parsers/module_parser.py +4 -4
- mindspore/rewrite/symbol_tree.py +1 -4
- mindspore/run_check/_check_version.py +5 -3
- mindspore/safeguard/rewrite_obfuscation.py +52 -28
- mindspore/train/callback/_summary_collector.py +1 -1
- mindspore/train/dataset_helper.py +1 -0
- mindspore/train/model.py +2 -2
- mindspore/train/serialization.py +97 -11
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +23 -7
- mindspore/version.py +1 -1
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +3 -2
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +101 -112
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
- mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.2.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
mindspore/nn/layer/math.py
CHANGED
|
@@ -375,9 +375,6 @@ class DiGamma(Cell):
|
|
|
375
375
|
nan, real_result)
|
|
376
376
|
|
|
377
377
|
|
|
378
|
-
eps_fp32 = Tensor(np.finfo(np.float32).eps, mstype.float32)
|
|
379
|
-
|
|
380
|
-
|
|
381
378
|
def _while_helper_func(cond, body, vals):
|
|
382
379
|
while cond(vals).any():
|
|
383
380
|
vals = body(vals)
|
|
@@ -394,7 +391,7 @@ def _igamma_series(ax, x, a, enabled):
|
|
|
394
391
|
select = P.Select()
|
|
395
392
|
|
|
396
393
|
# If more data types are supported, this epsilon need to be selected.
|
|
397
|
-
epsilon =
|
|
394
|
+
epsilon = Tensor(np.finfo(np.float32).eps, mstype.float32)
|
|
398
395
|
|
|
399
396
|
def cond(vals):
|
|
400
397
|
enabled = vals[0]
|
|
@@ -443,7 +440,7 @@ def _igammac_continued_fraction(ax, x, a, enabled):
|
|
|
443
440
|
select = P.Select()
|
|
444
441
|
|
|
445
442
|
# If more data types are supported, this epsilon need to be selected.
|
|
446
|
-
epsilon =
|
|
443
|
+
epsilon = Tensor(np.finfo(np.float32).eps, mstype.float32)
|
|
447
444
|
|
|
448
445
|
def cond(vals):
|
|
449
446
|
enabled = vals[0]
|
|
@@ -620,8 +617,7 @@ class IGamma(Cell):
|
|
|
620
617
|
x = F.broadcast_to(x, para_shape)
|
|
621
618
|
a = F.broadcast_to(a, para_shape)
|
|
622
619
|
x_is_zero = self.equal(x, 0)
|
|
623
|
-
|
|
624
|
-
underflow = self.less(ax, self.neg(log_maxfloat))
|
|
620
|
+
underflow = self.less(ax, self.neg(self.log_maxfloat32))
|
|
625
621
|
ax = self.exp(ax)
|
|
626
622
|
enabled = self.logicalnot(self.logicalor(self.logicalor(x_is_zero, domain_error), underflow))
|
|
627
623
|
output = self.select(use_igammac,
|
mindspore/nn/layer/rnn_cells.py
CHANGED
|
@@ -83,7 +83,7 @@ def _check_lstmcell_init(func):
|
|
|
83
83
|
|
|
84
84
|
|
|
85
85
|
def _rnn_tanh_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
86
|
-
|
|
86
|
+
"""RNN cell function with tanh activation"""
|
|
87
87
|
if b_ih is None:
|
|
88
88
|
igates = P.MatMul(False, True)(inputs, w_ih)
|
|
89
89
|
hgates = P.MatMul(False, True)(hidden, w_hh)
|
|
@@ -94,7 +94,7 @@ def _rnn_tanh_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
def _rnn_relu_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
97
|
-
|
|
97
|
+
"""RNN cell function with relu activation"""
|
|
98
98
|
if b_ih is None:
|
|
99
99
|
igates = P.MatMul(False, True)(inputs, w_ih)
|
|
100
100
|
hgates = P.MatMul(False, True)(hidden, w_hh)
|
|
@@ -105,7 +105,7 @@ def _rnn_relu_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
|
105
105
|
|
|
106
106
|
|
|
107
107
|
def _lstm_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
108
|
-
|
|
108
|
+
"""LSTM cell function"""
|
|
109
109
|
hx, cx = hidden
|
|
110
110
|
if b_ih is None:
|
|
111
111
|
gates = P.MatMul(False, True)(inputs, w_ih) + P.MatMul(False, True)(hx, w_hh)
|
|
@@ -125,7 +125,7 @@ def _lstm_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
|
125
125
|
|
|
126
126
|
|
|
127
127
|
def _gru_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
128
|
-
|
|
128
|
+
"""GRU cell function"""
|
|
129
129
|
if b_ih is None:
|
|
130
130
|
gi = P.MatMul(False, True)(inputs, w_ih)
|
|
131
131
|
gh = P.MatMul(False, True)(hidden, w_hh)
|
|
@@ -144,7 +144,7 @@ def _gru_cell(inputs, hidden, w_ih, w_hh, b_ih, b_hh):
|
|
|
144
144
|
|
|
145
145
|
|
|
146
146
|
class RNNCellBase(Cell):
|
|
147
|
-
|
|
147
|
+
"""Basic class for RNN Cells"""
|
|
148
148
|
def __init__(self, input_size: int, hidden_size: int, has_bias: bool, num_chunks: int,
|
|
149
149
|
dtype=mstype.float32):
|
|
150
150
|
super().__init__()
|
mindspore/nn/wrap/__init__.py
CHANGED
|
@@ -20,7 +20,8 @@ Use the Wrapper to combine the loss or build the training steps.
|
|
|
20
20
|
from __future__ import absolute_import
|
|
21
21
|
|
|
22
22
|
from mindspore.nn.wrap.cell_wrapper import ForwardValueAndGrad, TrainOneStepCell, WithLossCell, WithGradCell, \
|
|
23
|
-
WithEvalCell, ParameterUpdate, GetNextSingleOp, VirtualDatasetCellTriple, MicroBatchInterleaved, PipelineCell
|
|
23
|
+
WithEvalCell, ParameterUpdate, GetNextSingleOp, VirtualDatasetCellTriple, MicroBatchInterleaved, PipelineCell, \
|
|
24
|
+
GradAccumulationCell
|
|
24
25
|
from mindspore.nn.wrap.loss_scale import TrainOneStepWithLossScaleCell,\
|
|
25
26
|
DynamicLossScaleUpdateCell, FixedLossScaleUpdateCell
|
|
26
27
|
from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
|
|
@@ -42,5 +43,6 @@ __all__ = [
|
|
|
42
43
|
"ParameterUpdate",
|
|
43
44
|
"DynamicLossScaleUpdateCell",
|
|
44
45
|
"FixedLossScaleUpdateCell",
|
|
45
|
-
"VirtualDatasetCellTriple"
|
|
46
|
+
"VirtualDatasetCellTriple",
|
|
47
|
+
"GradAccumulationCell"
|
|
46
48
|
]
|
|
@@ -644,6 +644,9 @@ class PipelineCell(Cell):
|
|
|
644
644
|
self.micro_inputs = nn.CellList()
|
|
645
645
|
self.micro_size = micro_size
|
|
646
646
|
self.add_list = []
|
|
647
|
+
if not isinstance(network, Cell):
|
|
648
|
+
raise TypeError("For 'PipelineCell', the argument 'network' must cell type, "
|
|
649
|
+
"but got the type : {}.".format(type(network)))
|
|
647
650
|
if not isinstance(micro_size, int):
|
|
648
651
|
raise TypeError("For 'PipelineCell', the argument 'micro_size' must be integer, "
|
|
649
652
|
"but got the type : {}.".format(type(micro_size)))
|
|
@@ -670,7 +673,7 @@ class PipelineCell(Cell):
|
|
|
670
673
|
|
|
671
674
|
class GradAccumulationCell(Cell):
|
|
672
675
|
"""
|
|
673
|
-
Wrap the network with Micro Batch.
|
|
676
|
+
Wrap the network with Micro Batch to enable the grad accumulation in semi_auto_parallel/auto_parallel mode.
|
|
674
677
|
|
|
675
678
|
Args:
|
|
676
679
|
network (Cell): The target network to wrap.
|
|
@@ -680,8 +683,11 @@ class GradAccumulationCell(Cell):
|
|
|
680
683
|
``Ascend`` ``GPU``
|
|
681
684
|
|
|
682
685
|
Examples:
|
|
683
|
-
>>>
|
|
684
|
-
>>>
|
|
686
|
+
>>> import mindspore.nn as nn
|
|
687
|
+
>>> # Define the network structure of LeNet5. Refer to
|
|
688
|
+
>>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
689
|
+
>>> net = LeNet5()
|
|
690
|
+
>>> net = nn.GradAccumulationCell(net, 4)
|
|
685
691
|
"""
|
|
686
692
|
def __init__(self, network, micro_size):
|
|
687
693
|
super(GradAccumulationCell, self).__init__(auto_prefix=False)
|
|
@@ -689,6 +695,9 @@ class GradAccumulationCell(Cell):
|
|
|
689
695
|
self.micro_inputs = nn.CellList()
|
|
690
696
|
self.micro_size = micro_size
|
|
691
697
|
self.add_list = []
|
|
698
|
+
if not isinstance(network, Cell):
|
|
699
|
+
raise TypeError("For 'GradAccumulationCell', the argument 'network' must cell type, "
|
|
700
|
+
"but got the type : {}.".format(type(network)))
|
|
692
701
|
if not isinstance(micro_size, int):
|
|
693
702
|
raise TypeError("For 'GradAccumulationCell', the argument 'micro_size' must be integer, "
|
|
694
703
|
"but got the type : {}.".format(type(micro_size)))
|
mindspore/numpy/utils_const.py
CHANGED
|
@@ -143,8 +143,8 @@ def _infer_out_shape(*shapes):
|
|
|
143
143
|
shape_out = list()
|
|
144
144
|
max_len = max([len(it) for it in shapes])
|
|
145
145
|
for i in range(max_len):
|
|
146
|
-
items = [
|
|
147
|
-
|
|
146
|
+
items = [
|
|
147
|
+
it[i - max_len + len(it)] if i - max_len + len(it) >= 0 else 1 for it in shapes]
|
|
148
148
|
max_size = 0 if 0 in items else max(items)
|
|
149
149
|
_check()
|
|
150
150
|
shape_out.append(max_size)
|
|
@@ -158,8 +158,8 @@ def _can_broadcast(*shapes):
|
|
|
158
158
|
"""
|
|
159
159
|
max_len = max([len(it) for it in shapes])
|
|
160
160
|
for i in range(max_len):
|
|
161
|
-
items = [
|
|
162
|
-
|
|
161
|
+
items = [
|
|
162
|
+
it[i - max_len + len(it)] if i - max_len + len(it) >= 0 else 1 for it in shapes]
|
|
163
163
|
max_size = 0 if 0 in items else max(items)
|
|
164
164
|
if any(item not in (1, max_size) for item in items):
|
|
165
165
|
return False
|
|
@@ -399,7 +399,7 @@ def _broadcast_tuples(tup1, tup2):
|
|
|
399
399
|
if not isinstance(tup1, (tuple, list)) or not isinstance(tup2, (tuple, list)):
|
|
400
400
|
raise TypeError("input shift and axis must be tuple or list or int.")
|
|
401
401
|
if len(tup1) == len(tup2) or len(tup1) == 1 or len(tup2) == 1:
|
|
402
|
-
return
|
|
402
|
+
return
|
|
403
403
|
raise ValueError("shape mismatch: objects cannot be broadcast to a single shape")
|
|
404
404
|
|
|
405
405
|
tup1 = (tup1,) if isinstance(tup1, int) else tup1
|
mindspore/opencv_core452.dll
CHANGED
|
Binary file
|
|
Binary file
|
mindspore/opencv_imgproc452.dll
CHANGED
|
Binary file
|
|
@@ -203,7 +203,7 @@ def get_bprop_index_put(self):
|
|
|
203
203
|
if is_ascend:
|
|
204
204
|
indices_ms = [convert_idx_positive(indices_ms[i], x1.shape[i]) for i in range(len(indices_ms))]
|
|
205
205
|
indices_me = stack(indices_ms)
|
|
206
|
-
indices_grad = F.transpose(indices_me, F.make_range(F.rank(indices_me)-1, -1, -1))
|
|
206
|
+
indices_grad = F.transpose(indices_me, F.make_range(F.rank(indices_me) - 1, -1, -1))
|
|
207
207
|
values_grad = gather_nd(dout, indices_grad)
|
|
208
208
|
if equal(cast(x2.shape[0], mstype.int32), Tensor(1)):
|
|
209
209
|
values_grad = values_grad.sum().reshape(1)
|
|
@@ -19,7 +19,7 @@ from mindspore.ops import functional as F
|
|
|
19
19
|
from mindspore.ops import operations as P
|
|
20
20
|
from mindspore.ops.composite import multitype_ops as C
|
|
21
21
|
from mindspore.ops.composite.multitype_ops.zeros_like_impl import zeros_like
|
|
22
|
-
from mindspore.ops._grad_experimental.grad_base import bprops
|
|
22
|
+
from mindspore.ops._grad_experimental.grad_base import bprops, bprop_getters
|
|
23
23
|
from mindspore.common import dtype as mstype
|
|
24
24
|
|
|
25
25
|
get_dtype = P.DType()
|
|
@@ -193,7 +193,7 @@ def bprop_tensor_move(x, out, dout):
|
|
|
193
193
|
return (dout,)
|
|
194
194
|
|
|
195
195
|
|
|
196
|
-
@
|
|
196
|
+
@bprop_getters.register("DictInplaceSetItem")
|
|
197
197
|
def get_bprop_dict_inplace_setitem(self):
|
|
198
198
|
"""Generate bprop for dict inplace pop"""
|
|
199
199
|
|
|
@@ -135,7 +135,7 @@ def get_bprop_matrix_triangular_solve(self):
|
|
|
135
135
|
|
|
136
136
|
def bprop(matrix, rhs, out, dout):
|
|
137
137
|
grad_rhs = matrix_triangular_solve_op(matrix, dout)
|
|
138
|
-
if matrix.dtype
|
|
138
|
+
if matrix.dtype in (mstype.complex64, mstype.complex128):
|
|
139
139
|
grad_rhs_temp = _adjoint(grad_rhs)
|
|
140
140
|
out_temp = _adjoint(out)
|
|
141
141
|
else:
|
|
@@ -156,14 +156,14 @@ def get_bprop_matrix_triangular_solve(self):
|
|
|
156
156
|
grad_matrix = mat_mul_op(grad_rhs, out_temp)
|
|
157
157
|
grad_matrix = neg_op(grad_matrix)
|
|
158
158
|
if lower_a:
|
|
159
|
-
if grad_matrix.dtype
|
|
159
|
+
if grad_matrix.dtype in (mstype.complex64, mstype.complex128):
|
|
160
160
|
grad_matrix_real = matrix_band_part_op(real_op(grad_matrix), -1, 0)
|
|
161
161
|
grad_matrix_imag = matrix_band_part_op(imag_op(grad_matrix), -1, 0)
|
|
162
162
|
grad_matrix = complex_op(grad_matrix_real, grad_matrix_imag)
|
|
163
163
|
else:
|
|
164
164
|
grad_matrix = matrix_band_part_op(grad_matrix, -1, 0)
|
|
165
165
|
else:
|
|
166
|
-
if grad_matrix.dtype
|
|
166
|
+
if grad_matrix.dtype in (mstype.complex64, mstype.complex128):
|
|
167
167
|
grad_matrix_real = matrix_band_part_op(real_op(grad_matrix), 0, -1)
|
|
168
168
|
grad_matrix_imag = matrix_band_part_op(imag_op(grad_matrix), 0, -1)
|
|
169
169
|
grad_matrix = complex_op(grad_matrix_real, grad_matrix_imag)
|
|
@@ -219,7 +219,7 @@ def get_bprop_matrix_solve(self):
|
|
|
219
219
|
@_primexpr
|
|
220
220
|
def _generate_perm_matrix_solve_ls(x_dim):
|
|
221
221
|
perm = tuple(range(x_dim - 2))
|
|
222
|
-
perm = perm + (x_dim-1, x_dim-2)
|
|
222
|
+
perm = perm + (x_dim - 1, x_dim - 2)
|
|
223
223
|
return perm
|
|
224
224
|
|
|
225
225
|
|
|
@@ -647,20 +647,21 @@ def _fft_rank_offset(norm_shape, rank):
|
|
|
647
647
|
@_primexpr
|
|
648
648
|
def _fft_with_size_back_norm(norm_shape, norm, inverse, rank):
|
|
649
649
|
"""generate reverse term for fft_with_size"""
|
|
650
|
+
norm_ = None
|
|
650
651
|
if inverse is False:
|
|
651
652
|
if norm == "forward":
|
|
652
|
-
norm_ = 1 / _fft_rank_offset(norm_shape, rank)
|
|
653
|
-
|
|
654
|
-
norm_ = 1 * _fft_rank_offset(norm_shape, rank)
|
|
655
|
-
|
|
656
|
-
norm_ = 1
|
|
657
|
-
|
|
653
|
+
norm_ = 1.0 / _fft_rank_offset(norm_shape, rank)
|
|
654
|
+
elif norm == "backward":
|
|
655
|
+
norm_ = 1.0 * _fft_rank_offset(norm_shape, rank)
|
|
656
|
+
elif norm == "ortho":
|
|
657
|
+
norm_ = 1.0
|
|
658
|
+
else:
|
|
658
659
|
if norm == "forward":
|
|
659
|
-
norm_ = 1 * _fft_rank_offset(norm_shape, rank)
|
|
660
|
-
|
|
661
|
-
norm_ = 1 / _fft_rank_offset(norm_shape, rank)
|
|
662
|
-
|
|
663
|
-
norm_ = 1
|
|
660
|
+
norm_ = 1.0 * _fft_rank_offset(norm_shape, rank)
|
|
661
|
+
elif norm == "backward":
|
|
662
|
+
norm_ = 1.0 / _fft_rank_offset(norm_shape, rank)
|
|
663
|
+
elif norm == "ortho":
|
|
664
|
+
norm_ = 1.0
|
|
664
665
|
return norm_
|
|
665
666
|
|
|
666
667
|
|
|
@@ -670,9 +671,9 @@ def _rfft_norm(norm_shape, norm, rank):
|
|
|
670
671
|
norm_ = 1.0
|
|
671
672
|
if norm == "forward":
|
|
672
673
|
norm_ = 1 / _fft_rank_offset(norm_shape, rank)
|
|
673
|
-
|
|
674
|
-
norm_ = 1
|
|
675
|
-
|
|
674
|
+
elif norm == "backward":
|
|
675
|
+
norm_ = 1.0
|
|
676
|
+
elif norm == "ortho":
|
|
676
677
|
norm_ = 1 / np.sqrt(_fft_rank_offset(norm_shape, rank))
|
|
677
678
|
return norm_
|
|
678
679
|
|
|
@@ -358,10 +358,10 @@ def get_bprop_ragged_tensor_to_sparse(self):
|
|
|
358
358
|
split.append(zeros_like(i))
|
|
359
359
|
all_d = (split, ragged_values_grad)
|
|
360
360
|
return all_d
|
|
361
|
-
|
|
361
|
+
split_ = ()
|
|
362
362
|
for i in enumerate(rt_nested_splits):
|
|
363
|
-
|
|
364
|
-
all_d = (
|
|
363
|
+
split_ = split_ + (zeros_like(i),)
|
|
364
|
+
all_d = (split_, ragged_values_grad)
|
|
365
365
|
return all_d
|
|
366
366
|
|
|
367
367
|
return bprop
|
|
@@ -29,9 +29,9 @@ add_op_info = AiCPURegOp("Add") \
|
|
|
29
29
|
.dtype_format(DataType.I32_Default, DataType.I32_Default, DataType.I32_Default) \
|
|
30
30
|
.dtype_format(DataType.I64_Default, DataType.I64_Default, DataType.I64_Default) \
|
|
31
31
|
.dtype_format(DataType.U8_Default, DataType.U8_Default, DataType.U8_Default) \
|
|
32
|
-
.dtype_format(DataType.U16_Default, DataType.
|
|
33
|
-
.dtype_format(DataType.U32_Default, DataType.
|
|
34
|
-
.dtype_format(DataType.U64_Default, DataType.
|
|
32
|
+
.dtype_format(DataType.U16_Default, DataType.U16_Default, DataType.U16_Default) \
|
|
33
|
+
.dtype_format(DataType.U32_Default, DataType.U32_Default, DataType.U32_Default) \
|
|
34
|
+
.dtype_format(DataType.U64_Default, DataType.U64_Default, DataType.U64_Default) \
|
|
35
35
|
.dtype_format(DataType.C64_Default, DataType.C64_Default, DataType.C64_Default) \
|
|
36
36
|
.dtype_format(DataType.C128_Default, DataType.C128_Default, DataType.C128_Default) \
|
|
37
37
|
.get_op_info()
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright
|
|
1
|
+
# Copyright 2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -24,11 +24,30 @@ lsap_op_info = AiCPURegOp("LinearSumAssignment") \
|
|
|
24
24
|
.input(2, 'maximize', "required") \
|
|
25
25
|
.output(0, "row_ind", "required") \
|
|
26
26
|
.output(1, "col_ind", "required") \
|
|
27
|
-
.attr("cust_aicpu", "str") \
|
|
28
27
|
.dtype_format(DataType.F64_Default, DataType.I64_Default,
|
|
29
28
|
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
30
29
|
.dtype_format(DataType.F32_Default, DataType.I64_Default,
|
|
31
30
|
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
31
|
+
.dtype_format(DataType.F16_Default, DataType.I64_Default,
|
|
32
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
33
|
+
.dtype_format(DataType.BOOL_Default, DataType.I64_Default,
|
|
34
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
35
|
+
.dtype_format(DataType.I16_Default, DataType.I64_Default,
|
|
36
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
37
|
+
.dtype_format(DataType.I32_Default, DataType.I64_Default,
|
|
38
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
39
|
+
.dtype_format(DataType.I64_Default, DataType.I64_Default,
|
|
40
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
41
|
+
.dtype_format(DataType.I8_Default, DataType.I64_Default,
|
|
42
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
43
|
+
.dtype_format(DataType.U16_Default, DataType.I64_Default,
|
|
44
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
45
|
+
.dtype_format(DataType.U32_Default, DataType.I64_Default,
|
|
46
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
47
|
+
.dtype_format(DataType.U64_Default, DataType.I64_Default,
|
|
48
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
49
|
+
.dtype_format(DataType.U8_Default, DataType.I64_Default,
|
|
50
|
+
DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
|
|
32
51
|
.get_op_info()
|
|
33
52
|
|
|
34
53
|
|
mindspore/ops/_utils/utils.py
CHANGED
|
@@ -77,10 +77,12 @@ def get_broadcast_shape(x_shape, y_shape, prim_name, arg_name1="x", arg_name2="y
|
|
|
77
77
|
broadcast_shape = list(broadcast_shape_front) + broadcast_shape_back
|
|
78
78
|
return broadcast_shape
|
|
79
79
|
|
|
80
|
+
|
|
80
81
|
def dim_not_equal(dim1, dim2):
|
|
81
82
|
"""Compare dim in shape"""
|
|
82
83
|
return dim1 != dim2 and dim1 >= 0 and dim2 >= 0
|
|
83
84
|
|
|
85
|
+
|
|
84
86
|
def get_concat_offset(x_shp, x_type, axis, prim_name):
|
|
85
87
|
"""for concat and concatoffset check args and compute offset"""
|
|
86
88
|
validator.check_value_type("shape", x_shp, [tuple, list], prim_name)
|
|
@@ -1255,7 +1255,8 @@ def _tensor_setitem_by_bool_tensor_with_tensor(data, index, value):
|
|
|
1255
1255
|
index = index.reshape(const_utils.generate_padding_shape(index.shape, len(data.shape)))
|
|
1256
1256
|
index = F.broadcast_to(index, data.shape)
|
|
1257
1257
|
value = F.cast(value, F.dtype(data))
|
|
1258
|
-
|
|
1258
|
+
while value.ndim < data.ndim:
|
|
1259
|
+
value = value.unsqueeze(-1)
|
|
1259
1260
|
value = F.broadcast_to(value, data.shape)
|
|
1260
1261
|
result = F.select(index, value, data)
|
|
1261
1262
|
return result
|
|
@@ -161,7 +161,7 @@ def _tuple_getitem_by_slice(data, slice_index):
|
|
|
161
161
|
if start is None:
|
|
162
162
|
start = 0 if step >= 1 else -1
|
|
163
163
|
if stop is None:
|
|
164
|
-
stop = (2**31-1) if step >= 1 else -(2**31-1)
|
|
164
|
+
stop = (2**31 - 1) if step >= 1 else -(2**31 - 1)
|
|
165
165
|
return sequence_slice(data, start, stop, step)
|
|
166
166
|
return _tuple_slice(data, slice_index)
|
|
167
167
|
|
|
@@ -236,7 +236,7 @@ def _list_getitem_by_slice(data, slice_index):
|
|
|
236
236
|
if start is None:
|
|
237
237
|
start = 0 if step >= 1 else -1
|
|
238
238
|
if stop is None:
|
|
239
|
-
stop = (2**31-1) if step >= 1 else -(2**31-1)
|
|
239
|
+
stop = (2**31 - 1) if step >= 1 else -(2**31 - 1)
|
|
240
240
|
return sequence_slice(data, start, stop, step)
|
|
241
241
|
return _list_slice(data, slice_index)
|
|
242
242
|
|
|
@@ -660,14 +660,14 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
|
|
|
660
660
|
|
|
661
661
|
Note:
|
|
662
662
|
If the input indices is rank `N`, the output will have rank `N+1`. The new axis is created at dimension `axis`.
|
|
663
|
+
On Ascend, if `on_value` is Int64 dtype, `indices` must be Int64 dtype.
|
|
663
664
|
|
|
664
665
|
Args:
|
|
665
666
|
indices(Tensor): A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
|
|
666
667
|
Data type must be int32 or int64.
|
|
667
668
|
depth(int): A scalar defining the depth of the one-hot dimension.
|
|
668
669
|
on_value(Union[Tensor, int, float], optional): A value to fill in output when `indices[j] = i`.
|
|
669
|
-
|
|
670
|
-
bool, complex64, complex128. Default: ``1`` .
|
|
670
|
+
Data type must be int32, int64, float16 or float32. Default: ``1`` .
|
|
671
671
|
off_value(Union[Tensor, int, float], optional): A value to fill in output when `indices[j] != i`.
|
|
672
672
|
Has the same data type as `on_value`. Default: ``0`` .
|
|
673
673
|
axis(int, optional): Position to insert the value. e.g. If shape of `self` is :math:`(N, C)`, and `axis` is -1,
|
|
@@ -676,7 +676,8 @@ def one_hot(indices, depth, on_value=1, off_value=0, axis=-1):
|
|
|
676
676
|
Default: ``-1`` .
|
|
677
677
|
|
|
678
678
|
Returns:
|
|
679
|
-
Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)
|
|
679
|
+
Tensor, one-hot tensor. Tensor of shape :math:`(X_0, \ldots, X_{axis}, \text{depth} ,X_{axis+1}, \ldots, X_n)`,
|
|
680
|
+
and it has the same data type as `on_value`.
|
|
680
681
|
|
|
681
682
|
Raises:
|
|
682
683
|
TypeError: If `axis` or `depth` is not an int.
|
|
@@ -1734,7 +1735,11 @@ def flatten(input, order='C', *, start_dim=1, end_dim=-1):
|
|
|
1734
1735
|
raise TypeError(f"For 'flatten', both 'start_dim' and 'end_dim' must be int.")
|
|
1735
1736
|
check_flatten_order_const(order)
|
|
1736
1737
|
if order == 'F':
|
|
1737
|
-
|
|
1738
|
+
x_rank = rank_(input)
|
|
1739
|
+
# If input is a 0-dimensional Tensor, a 1-dimensional Tensor will be returned.
|
|
1740
|
+
if x_rank in (0, 1):
|
|
1741
|
+
return reshape_(input, (-1,))
|
|
1742
|
+
perm = ops.make_range(0, x_rank)
|
|
1738
1743
|
new_order = ops.tuple_reversed(perm)
|
|
1739
1744
|
input = _get_cache_prim(P.Transpose)()(input, new_order)
|
|
1740
1745
|
|
|
@@ -2161,8 +2166,6 @@ def concat(tensors, axis=0):
|
|
|
2161
2166
|
|
|
2162
2167
|
Tutorial Examples:
|
|
2163
2168
|
- `Tensor - Tensor Operation <https://mindspore.cn/tutorials/en/r2.2/beginner/tensor.html#tensor-operation>`_
|
|
2164
|
-
- `FGSM Network Adversarial Attack - Implementing FGSM
|
|
2165
|
-
<https://mindspore.cn/tutorials/application/en/r2.2/cv/fgsm.html#implementing-fgsm>`_
|
|
2166
2169
|
- `Vision Transformer Image Classification - Building ViT as a whole
|
|
2167
2170
|
<https://mindspore.cn/tutorials/application/en/r2.2/cv/vit.html#building-vit-as-a-whole>`_
|
|
2168
2171
|
- `Sentiment Classification Implemented by RNN - Dense
|
|
@@ -6828,7 +6831,7 @@ def diagonal(input, offset=0, dim1=0, dim2=1):
|
|
|
6828
6831
|
"""
|
|
6829
6832
|
x_ndim = input.ndim
|
|
6830
6833
|
if x_ndim < 2:
|
|
6831
|
-
raise ValueError(f"ops.diagonal
|
|
6834
|
+
raise ValueError(f"For 'ops.diagonal', the original tensor requires at least two dimensions, but got {x_ndim}")
|
|
6832
6835
|
_check_attr_dtype("dim1", dim1, [int], "diagonal")
|
|
6833
6836
|
_check_attr_dtype("dim2", dim2, [int], "diagonal")
|
|
6834
6837
|
dtype = input.dtype
|
|
@@ -4609,6 +4609,19 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
|
|
|
4609
4609
|
|
|
4610
4610
|
- **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
4611
4611
|
It has the same data type as `x`.
|
|
4612
|
+
|
|
4613
|
+
.. math::
|
|
4614
|
+
D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
|
|
4615
|
+
(\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
|
|
4616
|
+
|
|
4617
|
+
.. math::
|
|
4618
|
+
H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
|
|
4619
|
+
(\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
|
|
4620
|
+
|
|
4621
|
+
.. math::
|
|
4622
|
+
W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
|
|
4623
|
+
(\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
|
|
4624
|
+
|
|
4612
4625
|
- **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
|
|
4613
4626
|
only when `return_indices` is ``True`` .
|
|
4614
4627
|
|
|
@@ -6037,20 +6050,20 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
6037
6050
|
|
|
6038
6051
|
.. math::
|
|
6039
6052
|
\begin{array}{ll} \\
|
|
6040
|
-
D_{out}
|
|
6041
|
-
H_{out}
|
|
6042
|
-
W_{out}
|
|
6053
|
+
D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
|
|
6054
|
+
H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
|
|
6055
|
+
W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
|
|
6043
6056
|
\end{array}
|
|
6044
6057
|
|
|
6045
6058
|
`pad_mode` is ``"valid"``:
|
|
6046
6059
|
|
|
6047
6060
|
.. math::
|
|
6048
6061
|
\begin{array}{ll} \\
|
|
6049
|
-
D_{out}
|
|
6062
|
+
D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
|
|
6050
6063
|
{\text{stride[0]}} + 1} \right \rfloor \\
|
|
6051
|
-
H_{out}
|
|
6064
|
+
H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
|
|
6052
6065
|
{\text{stride[1]}} + 1} \right \rfloor \\
|
|
6053
|
-
W_{out}
|
|
6066
|
+
W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
|
|
6054
6067
|
{\text{stride[2]}} + 1} \right \rfloor \\
|
|
6055
6068
|
\end{array}
|
|
6056
6069
|
|
|
@@ -6058,11 +6071,11 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
6058
6071
|
|
|
6059
6072
|
.. math::
|
|
6060
6073
|
\begin{array}{ll} \\
|
|
6061
|
-
D_{out}
|
|
6074
|
+
D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
|
|
6062
6075
|
\text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
|
|
6063
|
-
H_{out}
|
|
6076
|
+
H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
|
|
6064
6077
|
\text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
|
|
6065
|
-
W_{out}
|
|
6078
|
+
W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
|
|
6066
6079
|
\text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
|
|
6067
6080
|
\end{array}
|
|
6068
6081
|
|
|
@@ -7431,6 +7444,82 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
|
|
|
7431
7444
|
return out
|
|
7432
7445
|
|
|
7433
7446
|
|
|
7447
|
+
def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
|
|
7448
|
+
actual_seq_lengths_kv, deq_scale1, quant_scale1,
|
|
7449
|
+
deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
|
|
7450
|
+
next_tokens=0, input_layout='BSH',
|
|
7451
|
+
num_key_value_heads=0, sparse_mode=0):
|
|
7452
|
+
r"""
|
|
7453
|
+
The interface for fully inference.
|
|
7454
|
+
B -- Batch size
|
|
7455
|
+
S -- Sequence length
|
|
7456
|
+
H -- Hidden size
|
|
7457
|
+
|
|
7458
|
+
Note:
|
|
7459
|
+
is only supported on ascend910B
|
|
7460
|
+
|
|
7461
|
+
.. warning::
|
|
7462
|
+
This is an experimental API that is subject to change or deletion.
|
|
7463
|
+
|
|
7464
|
+
Inputs:
|
|
7465
|
+
query (Tensor) - The query tensor with data type of float16 or float32.
|
|
7466
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7467
|
+
key (Tensor) - The key tensor with data type of float16 or float32.
|
|
7468
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7469
|
+
value (Tensor) - The value tensor with data type of float16 or float32.
|
|
7470
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7471
|
+
padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
|
|
7472
|
+
attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
7473
|
+
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
7474
|
+
actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7475
|
+
actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7476
|
+
dep_scale1 (Tensor)
|
|
7477
|
+
quant_scale1 (Tensor)
|
|
7478
|
+
deq_scale2 (Tensor)
|
|
7479
|
+
quant_scale2 (Tensor)
|
|
7480
|
+
quant_offset2 (Tensor)
|
|
7481
|
+
num_heads (int): The number of heads.
|
|
7482
|
+
scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
|
|
7483
|
+
Muls in the calculation. Default: 1.0.
|
|
7484
|
+
pre_tokens (int): Previous tokens. Default: 2147483547.
|
|
7485
|
+
next_tokens (int): next tokens. Default: 0.
|
|
7486
|
+
indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
|
|
7487
|
+
indicates that the data blocks in the upper triangle are not involved in the calculation
|
|
7488
|
+
input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
|
|
7489
|
+
num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
|
|
7490
|
+
The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
|
|
7491
|
+
sparse_mode (int): Default: 0
|
|
7492
|
+
|
|
7493
|
+
|
|
7494
|
+
Outputs:
|
|
7495
|
+
attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7496
|
+
|
|
7497
|
+
Supported Platforms:
|
|
7498
|
+
``Ascend``
|
|
7499
|
+
|
|
7500
|
+
Examples:
|
|
7501
|
+
>>> from mindspore.ops.function.nn_func import prompt_flash_attention
|
|
7502
|
+
>>> from mindspore import Tensor
|
|
7503
|
+
>>> import numpy as np
|
|
7504
|
+
>>> B = 1
|
|
7505
|
+
>>> N = 16
|
|
7506
|
+
>>> S = 256
|
|
7507
|
+
>>> D = 16
|
|
7508
|
+
>>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7509
|
+
>>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7510
|
+
>>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7511
|
+
>>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
|
|
7512
|
+
None, N, input_layout='BNSD')
|
|
7513
|
+
>>> print(out[0].shape)
|
|
7514
|
+
(1, 16, 256, 16)
|
|
7515
|
+
"""
|
|
7516
|
+
|
|
7517
|
+
pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
|
|
7518
|
+
num_key_value_heads, sparse_mode)
|
|
7519
|
+
return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
|
|
7520
|
+
quant_scale1, deq_scale2, quant_scale2, quant_offset2)
|
|
7521
|
+
|
|
7522
|
+
|
|
7434
7523
|
__all__ = [
|
|
7435
7524
|
'adaptive_avg_pool1d',
|
|
7436
7525
|
'adaptive_avg_pool2d',
|
|
@@ -1180,7 +1180,8 @@ def randint_like(input, low, high, seed=None, *, dtype=None):
|
|
|
1180
1180
|
cast_ = P.Cast()
|
|
1181
1181
|
low_ = Tensor(low, mstype.int32)
|
|
1182
1182
|
high_ = Tensor(high, mstype.int32)
|
|
1183
|
-
|
|
1183
|
+
size_ = Tensor(size, mstype.int32)
|
|
1184
|
+
output = rand_op(size_, low_, high_)
|
|
1184
1185
|
return cast_(output, dtype)
|
|
1185
1186
|
|
|
1186
1187
|
|