mindspore 2.2.10__cp38-none-any.whl → 2.2.14__cp38-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (152) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +2 -1
  3. mindspore/_akg/akg/composite/build_module.py +95 -5
  4. mindspore/_akg/akg/topi/cpp/impl.py +1 -1
  5. mindspore/_akg/akg/tvm/_ffi/base.py +1 -1
  6. mindspore/_akg/akg/utils/composite_op_helper.py +7 -2
  7. mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
  8. mindspore/_akg/akg/utils/util.py +18 -1
  9. mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
  10. mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
  11. mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
  12. mindspore/_extends/parse/__init__.py +3 -2
  13. mindspore/_extends/parse/parser.py +6 -1
  14. mindspore/_extends/parse/standard_method.py +12 -2
  15. mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
  16. mindspore/bin/cache_admin +0 -0
  17. mindspore/bin/cache_server +0 -0
  18. mindspore/common/_utils.py +16 -0
  19. mindspore/common/tensor.py +0 -2
  20. mindspore/communication/management.py +3 -0
  21. mindspore/context.py +34 -4
  22. mindspore/dataset/engine/cache_client.py +8 -5
  23. mindspore/dataset/engine/datasets.py +23 -0
  24. mindspore/dataset/engine/validators.py +1 -1
  25. mindspore/dataset/vision/py_transforms_util.py +2 -2
  26. mindspore/experimental/optim/lr_scheduler.py +5 -6
  27. mindspore/lib/libdnnl.so.2 +0 -0
  28. mindspore/lib/libmindspore.so +0 -0
  29. mindspore/lib/libmindspore_backend.so +0 -0
  30. mindspore/lib/libmindspore_common.so +0 -0
  31. mindspore/lib/libmindspore_core.so +0 -0
  32. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  33. mindspore/lib/libmindspore_grpc++.so.1 +0 -0
  34. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  35. mindspore/lib/libmindspore_shared_lib.so +0 -0
  36. mindspore/lib/libopencv_core.so.4.5 +0 -0
  37. mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
  38. mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
  39. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  40. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  41. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +118 -0
  42. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  43. mindspore/lib/plugin/ascend/libakg.so +0 -0
  44. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  45. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  46. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  47. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  48. mindspore/lib/plugin/cpu/libakg.so +0 -0
  49. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  50. mindspore/mindrecord/tools/cifar100_to_mr.py +49 -57
  51. mindspore/mindrecord/tools/cifar10_to_mr.py +46 -55
  52. mindspore/mindrecord/tools/csv_to_mr.py +3 -8
  53. mindspore/mindrecord/tools/mnist_to_mr.py +4 -9
  54. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -4
  55. mindspore/nn/layer/activation.py +1 -1
  56. mindspore/nn/layer/embedding.py +2 -2
  57. mindspore/nn/layer/flash_attention.py +48 -135
  58. mindspore/nn/loss/loss.py +1 -1
  59. mindspore/nn/optim/ada_grad.py +2 -2
  60. mindspore/nn/optim/sgd.py +3 -2
  61. mindspore/nn/wrap/__init__.py +4 -2
  62. mindspore/nn/wrap/cell_wrapper.py +6 -3
  63. mindspore/numpy/math_ops.py +1 -1
  64. mindspore/ops/__init__.py +3 -0
  65. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -31
  66. mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
  67. mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
  68. mindspore/ops/_grad_experimental/grad_math_ops.py +37 -17
  69. mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
  70. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
  71. mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
  72. mindspore/ops/function/array_func.py +6 -5
  73. mindspore/ops/function/debug_func.py +1 -1
  74. mindspore/ops/function/linalg_func.py +21 -11
  75. mindspore/ops/function/math_func.py +3 -0
  76. mindspore/ops/function/nn_func.py +13 -11
  77. mindspore/ops/function/parameter_func.py +2 -0
  78. mindspore/ops/function/sparse_unary_func.py +2 -2
  79. mindspore/ops/function/vmap_func.py +1 -0
  80. mindspore/ops/operations/__init__.py +5 -2
  81. mindspore/ops/operations/_embedding_cache_ops.py +1 -1
  82. mindspore/ops/operations/_grad_ops.py +3 -4
  83. mindspore/ops/operations/_inner_ops.py +56 -1
  84. mindspore/ops/operations/_quant_ops.py +4 -4
  85. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  86. mindspore/ops/operations/array_ops.py +15 -4
  87. mindspore/ops/operations/custom_ops.py +1 -1
  88. mindspore/ops/operations/debug_ops.py +1 -1
  89. mindspore/ops/operations/image_ops.py +3 -3
  90. mindspore/ops/operations/inner_ops.py +49 -0
  91. mindspore/ops/operations/math_ops.py +65 -3
  92. mindspore/ops/operations/nn_ops.py +95 -28
  93. mindspore/ops/operations/random_ops.py +2 -0
  94. mindspore/ops/operations/sparse_ops.py +4 -4
  95. mindspore/ops/silent_check.py +162 -0
  96. mindspore/parallel/__init__.py +3 -2
  97. mindspore/parallel/_auto_parallel_context.py +82 -3
  98. mindspore/parallel/_parallel_serialization.py +34 -2
  99. mindspore/parallel/_tensor.py +3 -1
  100. mindspore/parallel/_transformer/transformer.py +8 -8
  101. mindspore/parallel/checkpoint_transform.py +191 -45
  102. mindspore/profiler/parser/ascend_cluster_generator.py +111 -0
  103. mindspore/profiler/parser/ascend_communicate_generator.py +315 -0
  104. mindspore/profiler/parser/ascend_flops_generator.py +8 -2
  105. mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
  106. mindspore/profiler/parser/ascend_hccl_generator.py +2 -2
  107. mindspore/profiler/parser/ascend_msprof_exporter.py +30 -6
  108. mindspore/profiler/parser/ascend_msprof_generator.py +16 -5
  109. mindspore/profiler/parser/ascend_op_generator.py +15 -7
  110. mindspore/profiler/parser/ascend_timeline_generator.py +5 -2
  111. mindspore/profiler/parser/base_timeline_generator.py +11 -3
  112. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
  113. mindspore/profiler/parser/framework_parser.py +8 -2
  114. mindspore/profiler/parser/memory_usage_parser.py +8 -2
  115. mindspore/profiler/parser/minddata_analyzer.py +8 -2
  116. mindspore/profiler/parser/minddata_parser.py +1 -1
  117. mindspore/profiler/parser/msadvisor_analyzer.py +4 -2
  118. mindspore/profiler/parser/msadvisor_parser.py +9 -3
  119. mindspore/profiler/profiling.py +97 -25
  120. mindspore/rewrite/api/node.py +1 -1
  121. mindspore/rewrite/api/symbol_tree.py +2 -2
  122. mindspore/rewrite/parsers/for_parser.py +6 -6
  123. mindspore/rewrite/parsers/module_parser.py +4 -4
  124. mindspore/scipy/ops.py +55 -5
  125. mindspore/scipy/optimize/__init__.py +3 -2
  126. mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
  127. mindspore/train/callback/_checkpoint.py +8 -8
  128. mindspore/train/callback/_landscape.py +2 -3
  129. mindspore/train/callback/_summary_collector.py +6 -7
  130. mindspore/train/dataset_helper.py +6 -0
  131. mindspore/train/model.py +17 -5
  132. mindspore/train/serialization.py +6 -1
  133. mindspore/train/summary/_writer_pool.py +1 -1
  134. mindspore/train/summary/summary_record.py +5 -6
  135. mindspore/version.py +1 -1
  136. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/METADATA +3 -2
  137. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/RECORD +140 -148
  138. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  139. mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
  140. mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -406
  141. mindspore/ops/_op_impl/_custom_op/flash_attention/constants.py +0 -41
  142. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -467
  143. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -563
  144. mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -193
  145. mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -435
  146. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
  147. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
  148. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
  149. mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
  150. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/WHEEL +0 -0
  151. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/entry_points.txt +0 -0
  152. {mindspore-2.2.10.dist-info → mindspore-2.2.14.dist-info}/top_level.txt +0 -0
@@ -21,9 +21,7 @@ import mindspore.common.dtype as mstype
21
21
  from mindspore.common.tensor import Tensor
22
22
  from mindspore import ops
23
23
  from mindspore.nn.cell import Cell
24
- from mindspore.ops._op_impl._custom_op.flash_attention.flash_attention_impl import get_flash_attention
25
24
  from mindspore.ops.operations.nn_ops import FlashAttentionScore
26
- from mindspore._c_expression import MSContext
27
25
 
28
26
  __all__ = ['FlashAttention']
29
27
 
@@ -46,18 +44,17 @@ class FlashAttention(Cell):
46
44
  Default 65536.
47
45
  next_block_num(int): A integer to define the number of blocks to look behind for local block sparse attention.
48
46
  Default 65536.
49
- tiling_stgy_name(str): A str to define tiling strategy of flash attention.
50
47
  dp(int): data parallel.
51
48
  Default 1.
52
49
  mp(int): model parallel.
53
50
  Default 1.
54
- high_precision(bool): This mode has higher precision but some performance loss.
51
+ high_precision(bool): This mode has higher precision but some performance loss. Only take effect on Ascend910A.
55
52
  Default False.
56
53
  have_attention_mask_batch(bool): indicates whether attention_mask contains the batch dimension.
57
54
  Default True
58
55
  alibi(bool): This parameter indicates whether the flashattention supports the Alibi.
59
56
  Default: False
60
- use_mqa(bool): Using MHA if True, only take effect under 910B. Default: False.
57
+ use_mqa(bool): Using MQA if True, only take effect under 910B. Default: False.
61
58
 
62
59
 
63
60
  Inputs:
@@ -98,7 +95,6 @@ class FlashAttention(Cell):
98
95
  dropout_rate=0.0,
99
96
  prev_block_num=65536,
100
97
  next_block_num=65536,
101
- tiling_stgy_name="sparse",
102
98
  dp=1,
103
99
  mp=1,
104
100
  high_precision=False,
@@ -112,52 +108,36 @@ class FlashAttention(Cell):
112
108
  if scaling_constant == 0:
113
109
  raise ValueError("the scaling constant must not be 0.")
114
110
  self.dropout_rate = dropout_rate
115
- self.is_910A = MSContext.get_instance().get_ascend_soc_version() == "ascend910"
116
- if self.is_910A:
117
- self.scale_factor = Tensor([1. / math.sqrt(scaling_constant)], dtype=mstype.float16)
118
- self.scale_mul = ops.Mul().shard(((dp, mp, 1, 1), (1,)))
119
- self.ones = ops.Ones()
120
- self.dim_mask = Tensor([1 for _ in range(head_dim)], dtype=mstype.int8)
121
- self.have_attention_mask_batch = have_attention_mask_batch
122
- self.alibi = alibi
123
- self.flash_attention = get_flash_attention(
124
- prev_block_num=prev_block_num,
125
- next_block_num=next_block_num,
126
- tiling_stgy_name=tiling_stgy_name,
127
- high_precision=high_precision
128
- )
129
- self.flash_attention.add_prim_attr("primitive_target", "Ascend")
111
+ self.alibi = alibi
112
+ self.have_attention_mask_batch = have_attention_mask_batch
113
+
114
+ self.transpose_4d_pre = ops.Transpose().shard(((dp, mp, 1, 1),))
115
+ self.transpose_4d_post = ops.Transpose().shard(((dp, 1, mp, 1),))
116
+ self.reshape = ops.Reshape()
117
+ self.zeros_like = ops.ZerosLike().shard(((dp, mp, 1, 1),))
118
+ self.zeros = ops.Zeros()
119
+ self.attn_cast = ops.Cast()
120
+ if use_mqa:
121
+ fa_strategies = ((dp, mp, 1, 1),
122
+ (dp, 1, 1, 1),
123
+ (dp, 1, 1, 1))
124
+ else:
130
125
  fa_strategies = ((dp, mp, 1, 1),
131
126
  (dp, mp, 1, 1),
132
127
  (dp, mp, 1, 1))
133
- self.shard(fa_strategies)
134
- else:
135
- if alibi:
136
- raise ValueError(f"When soc_version is not Ascend910A, alibi must be False")
137
- self.transpose_4d_pre = ops.Transpose().shard(((dp, mp, 1, 1),))
138
- self.transpose_4d_post = ops.Transpose().shard(((dp, 1, mp, 1),))
139
- self.reshape = ops.Reshape()
140
- self.zeros_like = ops.ZerosLike().shard(((dp, mp, 1, 1),))
141
- self.zeros = ops.Zeros()
142
- self.attn_cast = ops.Cast()
143
- if use_mqa:
144
- fa_strategies = ((dp, mp, 1, 1),
145
- (dp, 1, 1, 1),
146
- (dp, 1, 1, 1),
147
- (dp, 1, 1, 1))
148
- else:
149
- fa_strategies = ((dp, mp, 1, 1),
150
- (dp, mp, 1, 1),
151
- (dp, mp, 1, 1),
152
- (dp, 1, 1, 1))
153
- if dropout_rate > 1e-5:
154
- fa_strategies += ((dp, mp, 1, 1),)
155
- self.flash_attention = FlashAttentionScore(head_num=head_num, pre_tokens=prev_block_num,
156
- next_tokens=next_block_num,
157
- keep_prob=1 - dropout_rate,
158
- scale_value=1. / scaling_constant,
159
- inner_precise=0 if high_precision else 1,
160
- input_layout="BNSD").shard(fa_strategies)
128
+ if self.alibi:
129
+ self.alibi_rescale_mul = ops.Mul().shard(((dp, mp, 1, 1), (1,)))
130
+ self.alibi_rescale_factor = Tensor([scaling_constant], dtype=mstype.float16)
131
+ fa_strategies += ((dp, mp, 1, 1),)
132
+ if dropout_rate > 1e-5:
133
+ fa_strategies += ((dp, mp, 1, 1),)
134
+ fa_strategies += ((dp, 1, 1, 1),)
135
+ self.flash_attention = FlashAttentionScore(head_num=head_num, pre_tokens=prev_block_num,
136
+ next_tokens=next_block_num,
137
+ keep_prob=1 - dropout_rate,
138
+ scale_value=1. / scaling_constant,
139
+ inner_precise=0,
140
+ input_layout="BNSD").shard(fa_strategies)
161
141
 
162
142
  self.dropout_rate = dropout_rate
163
143
  if self.dropout_rate > 1e-5:
@@ -175,49 +155,7 @@ class FlashAttention(Cell):
175
155
  such as MatMul. Default: None.
176
156
  :return:
177
157
  """
178
- if self.is_910A:
179
- if in_strategy is None:
180
- # default: dp=1, mp=1, construct inputs only contain query, key, value
181
- in_strategy = (
182
- (1, 1, 1, 1),
183
- (1, 1, 1, 1),
184
- (1, 1, 1, 1),
185
- )
186
- self.flash_attention.shard(in_strategy)
187
- dp = in_strategy[0][0]
188
- mp = in_strategy[0][1]
189
- self.flash_attention.add_prim_attr("dev_matrix_shape", [dp, mp, 1, 1])
190
- inputs_tensor_map = [
191
- [3, 2, 1, 0],
192
- [3, 2, 1, 0],
193
- [3, 2, 1, 0],
194
- ]
195
- if self.have_attention_mask_batch:
196
- inputs_tensor_map.append([3, 1, 0])
197
- else:
198
- inputs_tensor_map.append([-1, 1, 0])
199
-
200
- input_empty_args_num = 2
201
- # dropout_mask
202
- if self.dropout_rate > 1e-5:
203
- input_empty_args_num -= 1
204
- inputs_tensor_map.append([3, 2, 1, 0])
205
-
206
- if self.alibi:
207
- input_empty_args_num -= 1
208
- inputs_tensor_map.append([3, 2, 1, 0])
209
-
210
- self.flash_attention.add_prim_attr("inputs_tensor_map", inputs_tensor_map)
211
-
212
- self.flash_attention.add_prim_attr("outputs_tensor_map", [
213
- [3, 2, 1, 0], # O
214
- [3, 2, 1], # L
215
- [3, 2, 1] # M
216
- ])
217
- self.flash_attention.add_prim_attr("as_loss_divisor", 0)
218
- self.flash_attention.add_prim_attr("empty_mirror_ops", input_empty_args_num)
219
- else:
220
- self.flash_attention.shard(in_strategy)
158
+ self.flash_attention.shard(in_strategy)
221
159
 
222
160
  def construct(self, query, key, value, attn_mask=None, alibi_mask=None):
223
161
  """FlashAttention forward
@@ -228,49 +166,24 @@ class FlashAttention(Cell):
228
166
  :param alibi_mask: [bsz, head_num, 1, seq_len], if not None
229
167
  :return: output [bsz, head_num, seq_len, head_dim]
230
168
  """
231
- bsz, head_num, seq_len, head_dim = query.shape
232
- if self.is_910A:
233
- _, k_head_num, k_seq_len, _ = key.shape
234
- _, v_head_num, v_seq_len, _ = value.shape
235
- if head_num != k_head_num or head_num != v_head_num:
236
- raise ValueError(
237
- "the head_num of query, key and value must be the same, "
238
- "If different head_num are used, users need to change themselves to be same by tile.")
239
- if seq_len % 16 != 0 or k_seq_len % 16 != 0 or k_seq_len != v_seq_len:
240
- raise ValueError(
241
- "query, key, value seq_len must be a multiple of 16, "
242
- "and the seq_len between key and value must be equal.")
243
- # 910A -- FlashAttentionPrimtive
244
- if head_dim > 304:
245
- raise ValueError(
246
- "the head_dim must be less than 304, otherwise the ub would be OOM.")
247
- if self.dropout_rate > 1e-5:
248
- drop_mask_bits = self.drop_gen_mask((bsz, head_num, seq_len, seq_len), self.keep_prob)
249
- tensor_shape = Tensor((bsz, head_num, seq_len, seq_len), mstype.int32)
250
- ones = self.fill_v2(tensor_shape, self.tensor_one)
251
- ones = self.depend(ones, query)
252
- drop_mask = self.do_dropout(ones, drop_mask_bits, self.keep_prob)
253
- else:
254
- drop_mask = None
255
- query = self.scale_mul(query, self.scale_factor)
256
- key = self.scale_mul(key, self.scale_factor)
257
- attn_mask = self.cast(attn_mask, mstype.float16)
258
- output, _, _ = self.flash_attention(query, key, value, attn_mask, drop_mask, alibi_mask)
169
+ bsz, head_num, seq_len, _ = query.shape
170
+ # 910B -- FlashAttentionScore
171
+ if self.dropout_rate > 1e-5:
172
+ drop_mask_bits = self.reshape(self.drop_gen_mask((bsz, head_num, seq_len, seq_len), self.keep_prob),
173
+ (bsz, head_num, seq_len, seq_len // 8))
259
174
  else:
260
- # 910B -- FlashAttentionScore
261
- if self.dropout_rate > 1e-5:
262
- drop_mask_bits = self.reshape(self.drop_gen_mask((bsz, head_num, seq_len, seq_len), self.keep_prob),
263
- (bsz, head_num, seq_len, seq_len // 8))
264
- else:
265
- drop_mask_bits = None
266
- # (B, S, S) -> (B, 1, S, S)
175
+ drop_mask_bits = None
176
+ if self.alibi:
177
+ alibi_mask = self.alibi_rescale_mul(alibi_mask, self.cast(self.alibi_rescale_factor, alibi_mask.dtype))
178
+ # (B, S, S) -> (B, 1, S, S)
179
+ if self.have_attention_mask_batch:
267
180
  attn_mask = self.cast(self.reshape(attn_mask, (bsz, 1, seq_len, seq_len)), mstype.uint8)
268
- output, _, _ = self.flash_attention(query,
269
- key,
270
- value,
271
- attn_mask,
272
- drop_mask_bits,
273
- None,
274
- None,
275
- None)
181
+ _, _, _, output = self.flash_attention(query,
182
+ key,
183
+ value,
184
+ alibi_mask,
185
+ drop_mask_bits,
186
+ None,
187
+ attn_mask,
188
+ None)
276
189
  return output
mindspore/nn/loss/loss.py CHANGED
@@ -1996,7 +1996,7 @@ class FocalLoss(LossBase):
1996
1996
  >>> import mindspore.nn as nn
1997
1997
  >>> logits = ms.Tensor([[0.8, 1.4], [0.5, 0.9], [1.2, 0.9]], ms.float32)
1998
1998
  >>> labels = ms.Tensor([[1], [1], [0]], ms.int32)
1999
- >>> focalloss = nn.FocalLoss(weight=Tensor([1, 2]), gamma=2.0, reduction='mean')
1999
+ >>> focalloss = nn.FocalLoss(weight=ms.Tensor([1, 2]), gamma=2.0, reduction='mean')
2000
2000
  >>> output = focalloss(logits, labels)
2001
2001
  >>> print(output)
2002
2002
  0.12516622
@@ -162,7 +162,7 @@ class Adagrad(Optimizer):
162
162
  ``Ascend`` ``GPU`` ``CPU``
163
163
 
164
164
  Examples:
165
- >>> import mindspore
165
+ >>> from mindspore import train
166
166
  >>> import mindspore.nn as nn
167
167
  >>>
168
168
  >>> # Define the network structure of LeNet5. Refer to
@@ -185,7 +185,7 @@ class Adagrad(Optimizer):
185
185
  >>> # The final parameters order in which the optimizer will be followed is the value of 'order_params'.
186
186
  >>>
187
187
  >>> loss = nn.SoftmaxCrossEntropyWithLogits()
188
- >>> model = ms.Model(net, loss_fn=loss, optimizer=optim)
188
+ >>> model = train.Model(net, loss_fn=loss, optimizer=optim)
189
189
  """
190
190
 
191
191
  @opt_init_args_register
mindspore/nn/optim/sgd.py CHANGED
@@ -193,9 +193,9 @@ class SGD(Optimizer):
193
193
  "or 'weight_decay' set in grouped 'params' must be float or int type.")
194
194
 
195
195
  if hasattr(self, "group_weight_decay") and self.group_weight_decay:
196
- self.opt = tuple(P.SGD(dampening, wd, nesterov) for wd in self.group_weight_decay)
196
+ self.opt = tuple(P.SGD(dampening, 0.0, nesterov) for _ in self.group_weight_decay)
197
197
  else:
198
- self.opt = tuple([P.SGD(dampening, float(weight_decay), nesterov)] * len(self._parameters))
198
+ self.opt = tuple([P.SGD(dampening, 0.0, nesterov)] * len(self._parameters))
199
199
 
200
200
  self.momentum = Parameter(Tensor(momentum, mstype.float32), name="momentum")
201
201
 
@@ -220,6 +220,7 @@ class SGD(Optimizer):
220
220
  params = self._parameters
221
221
  accum = self.accum
222
222
  stat = self.stat
223
+ gradients = self.decay_weight(gradients)
223
224
  gradients = self.flatten_gradients(gradients)
224
225
  gradients = self.gradients_centralization(gradients)
225
226
  gradients = self.scale_grad(gradients)
@@ -20,7 +20,8 @@ Use the Wrapper to combine the loss or build the training steps.
20
20
  from __future__ import absolute_import
21
21
 
22
22
  from mindspore.nn.wrap.cell_wrapper import ForwardValueAndGrad, TrainOneStepCell, WithLossCell, WithGradCell, \
23
- WithEvalCell, ParameterUpdate, GetNextSingleOp, VirtualDatasetCellTriple, MicroBatchInterleaved, PipelineCell
23
+ WithEvalCell, ParameterUpdate, GetNextSingleOp, VirtualDatasetCellTriple, MicroBatchInterleaved, PipelineCell, \
24
+ GradAccumulationCell
24
25
  from mindspore.nn.wrap.loss_scale import TrainOneStepWithLossScaleCell,\
25
26
  DynamicLossScaleUpdateCell, FixedLossScaleUpdateCell
26
27
  from mindspore.nn.wrap.grad_reducer import DistributedGradReducer
@@ -42,5 +43,6 @@ __all__ = [
42
43
  "ParameterUpdate",
43
44
  "DynamicLossScaleUpdateCell",
44
45
  "FixedLossScaleUpdateCell",
45
- "VirtualDatasetCellTriple"
46
+ "VirtualDatasetCellTriple",
47
+ "GradAccumulationCell"
46
48
  ]
@@ -673,7 +673,7 @@ class PipelineCell(Cell):
673
673
 
674
674
  class GradAccumulationCell(Cell):
675
675
  """
676
- Wrap the network with Micro Batch.
676
+ Wrap the network with Micro Batch to enable the grad accumulation in semi_auto_parallel/auto_parallel mode.
677
677
 
678
678
  Args:
679
679
  network (Cell): The target network to wrap.
@@ -683,8 +683,11 @@ class GradAccumulationCell(Cell):
683
683
  ``Ascend`` ``GPU``
684
684
 
685
685
  Examples:
686
- >>> net = Net()
687
- >>> net = GradAccumulationCell(net, 4)
686
+ >>> import mindspore.nn as nn
687
+ >>> # Define the network structure of LeNet5. Refer to
688
+ >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
689
+ >>> net = LeNet5()
690
+ >>> net = nn.GradAccumulationCell(net, 4)
688
691
  """
689
692
  def __init__(self, network, micro_size):
690
693
  super(GradAccumulationCell, self).__init__(auto_prefix=False)
@@ -4285,7 +4285,7 @@ def argmin(a, axis=None):
4285
4285
 
4286
4286
  Examples:
4287
4287
  >>> import mindspore.numpy as np
4288
- >>> a = np.arange(10, 16).reshape(2, 3)
4288
+ >>> a = np.arange(10, 16).reshape(2, 3).astype(np.float32)
4289
4289
  >>> print(np.argmin(a))
4290
4290
  0
4291
4291
  >>> print(np.argmin(a, axis=0))
mindspore/ops/__init__.py CHANGED
@@ -34,6 +34,7 @@ from mindspore.ops.composite import *
34
34
  from mindspore.ops.operations import *
35
35
  from mindspore.ops.function import *
36
36
  from mindspore.ops.functional import *
37
+ from mindspore.ops.silent_check import _silent_check
37
38
 
38
39
  __primitive__ = [
39
40
  "prim_attr_register", "Primitive", "PrimitiveWithInfer", "PrimitiveWithCheck", "signature"
@@ -48,3 +49,5 @@ __all__.extend(composite.__all__)
48
49
  __all__.extend(operations.__all__)
49
50
  __all__.extend(functional.__all__)
50
51
  __all__.extend(function.__all__)
52
+
53
+ _silent_check()
@@ -36,8 +36,6 @@ from mindspore.ops.operations.array_ops import ScatterAddWithAxis
36
36
  from mindspore.ops.operations.array_ops import Expand
37
37
  from mindspore.ops.operations.array_ops import SegmentMean
38
38
  from mindspore.ops.operations.array_ops import AffineGrid
39
- from mindspore.ops.operations.array_ops import Im2Col
40
- from mindspore.ops.operations.array_ops import Col2Im
41
39
  from mindspore.ops.operations.array_ops import MaskedScatter
42
40
  from mindspore.ops.operations.array_ops import MaskedSelect
43
41
  from mindspore.ops.operations.array_ops import CountNonZero
@@ -360,35 +358,6 @@ def get_bprop_resize_nearest_neighbor_v2(self):
360
358
  return bprop
361
359
 
362
360
 
363
- @bprop_getters.register(Im2Col)
364
- def get_bprop_im2col(self):
365
- """
366
- Generate bprop for Im2Col
367
-
368
- Im2Col, corresponding to torch's UnFold operator.
369
- The Unfold operator has no `padding_mode` attribute,
370
- and it's implementation corresponds to the mindspore
371
- implementation with `padding_mode=CALCULATED` .
372
- So, currently the bprop function of Im2Col only supports
373
- the CALCULATED mode.
374
- """
375
- kernel_size = self.ksizes
376
- dilation = self.dilations
377
- stride = self.strides
378
- padding = (self.pads[0], self.pads[-1])
379
- col2im = Col2Im(kernel_size=kernel_size,
380
- dilation=dilation,
381
- stride=stride,
382
- padding=padding)
383
-
384
- def bprop(x, out, dout):
385
- x_shape = P.TensorShape()(x)[2:]
386
- dx = col2im(dout, x_shape)
387
- return (dx,)
388
-
389
- return bprop
390
-
391
-
392
361
  @bprop_getters.register(P.ExtractVolumePatches)
393
362
  def get_bprop_extract_volume_patches(self):
394
363
  """Generate bprop for ExtractVolumePatches"""
@@ -92,7 +92,8 @@ def get_bprop_send(self):
92
92
  """Generate bprop for Send."""
93
93
  shape = self.get_attr_dict()["shape"]
94
94
  dtype = self.get_attr_dict()["dtype"]
95
- send_grad = Receive(self.sr_tag, self.rank, shape, dtype, self.group_back)
95
+ tag = self.get_attr_dict()["sr_tag"]
96
+ send_grad = Receive(tag, self.rank, shape, dtype, self.group_back)
96
97
  virtual_input = Tensor(0.0, dtype)
97
98
 
98
99
  def bprop(x, out, dout):
@@ -105,7 +106,8 @@ def get_bprop_send(self):
105
106
  @bprop_getters.register(Receive)
106
107
  def get_bprop_receive(self):
107
108
  """Generate bprop for Receive."""
108
- receive_grad = Send(self.tag, self.rank, self.group_back)
109
+ tag = self.get_attr_dict()["sr_tag"]
110
+ receive_grad = Send(tag, self.rank, self.group_back)
109
111
  depend = P.Depend()
110
112
  cast = P.Cast()
111
113
  out_tensor = Tensor(0.0, mstype.float16)
@@ -36,6 +36,14 @@ def get_bprop_parallel_resize_bilinear(self):
36
36
  return bprop
37
37
 
38
38
 
39
+ @bprop_getters.register(P.inner_ops.GenerateEodMask)
40
+ def get_bprop_generate_eod_mask(self):
41
+
42
+ def bprop(x, out, dout):
43
+ return dout, dout
44
+ return bprop
45
+
46
+
39
47
  @bprop_getters.register(inner.PsROIPooling)
40
48
  def get_bprop_ps_roi_pooling(self):
41
49
  """Grad definition for `PsROIPooling` operation."""
@@ -18,11 +18,13 @@
18
18
  import numpy as np
19
19
  import mindspore.numpy as mnp
20
20
  from mindspore.common import dtype as mstype
21
+ import mindspore.ops as ops
21
22
  from mindspore.ops import functional as F
22
23
  from mindspore.ops import operations as P
23
24
  from mindspore import Tensor
24
25
  from mindspore.ops.operations.math_ops import Real, Imag, Complex, Angle
25
- from mindspore.ops.operations.math_ops import Polar
26
+ from mindspore.ops.operations.math_ops import Polar, SilentCheck
27
+ from mindspore.ops.operations._inner_ops import _MirrorSilentCheck
26
28
  from mindspore.ops.operations import _grad_ops as G
27
29
  from mindspore.ops.operations.math_ops import Lgamma
28
30
  from mindspore.ops.operations.math_ops import Digamma
@@ -763,6 +765,7 @@ def get_bprop_fft_with_size(self):
763
765
  to_tensor_op = P.ScalarToTensor()
764
766
  type_op = P.DType()
765
767
  concat_op = P.Concat()
768
+ concat_op_last = P.Concat(axis=-1)
766
769
  ones_op = P.Ones()
767
770
  zeros_op = P.Zeros()
768
771
  real_op = P.Real()
@@ -794,8 +797,7 @@ def get_bprop_fft_with_size(self):
794
797
  signal_sizes=offset_shape[-1:])
795
798
  irfft2d_ = FFTWithSize(signal_ndim=2, inverse=True, real=True, norm="backward", onesided=onesided,
796
799
  signal_sizes=offset_shape[-2:])
797
- irfft3d_ = FFTWithSize(signal_ndim=3, inverse=True, real=True, norm="backward", onesided=onesided,
798
- signal_sizes=offset_shape[-3:])
800
+ irfft3d_ = FFTWithSize(signal_ndim=3, inverse=True, real=False, norm="backward", onesided=onesided)
799
801
  if inverse is False:
800
802
  if onesided is True:
801
803
  terms = 0
@@ -811,6 +813,7 @@ def get_bprop_fft_with_size(self):
811
813
  vec_mask = complex_op(1 - 2 * (mnp.arange(0, input_shape[-1], 1, input_type) % 2),
812
814
  zeros_op(input_shape[-1], input_type))
813
815
  terms = real_op(dout_first) + is_even * real_op(dout_last * vec_mask)
816
+ dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
814
817
  elif signal_ndim == 2:
815
818
  dx = irfft2d_(dout)
816
819
  arange_inner = mnp.arange(0, input_shape[-2], 1, input_type)
@@ -852,26 +855,27 @@ def get_bprop_fft_with_size(self):
852
855
  dout_shape, [input_shape[-1]])))
853
856
  dout_last_term = dout_last_term * vec_mask
854
857
  terms = real_op(dout_first_term) + is_even * real_op(dout_last_term)
858
+ dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
855
859
  elif signal_ndim == 3:
856
- dx = irfft3d_(dout) * real_op(offset_size)
857
- dx = to_tensor_op(0.5, input_type) * (dx * rfft_offset_size + terms) * rfft_norm_offset
860
+ zeros_shape = offset_shape[:-1] + (offset_shape[-1] - dout_shape[-1],)
861
+ zeros_values = zeros_op(zeros_shape, input_type)
862
+ zeros_padding = complex_op(zeros_values, zeros_values)
863
+ dout = concat_op_last((dout, zeros_padding))
864
+ dx = real_op(irfft3d_(dout)) * real_op(offset_size)
858
865
  else:
859
866
  dx = irfft_fn(dout) * real_op(offset_size)
860
867
  else:
861
868
  dx = rfft_fn(dout)
862
869
  if onesided is True:
863
- if signal_ndim != 3:
864
- is_odd = dout_shape[-1] % 2
865
- last_shape = offset_shape[-1]
866
- mask = concat_op((ones_op(1, output_type), 2.0 * ones_op(
867
- (last_shape - 2 + is_odd,), output_type), ones_op((1 - is_odd,), output_type)))
868
- dx = dx * complex_op(mask, zeros_op(shape_op(mask), output_type))
869
- irfft_offset_size = to_tensor_op(
870
- _fft_with_size_back_norm(shape_op(dout), norm, inverse, signal_ndim),
871
- output_type)
872
- dx = dx * complex_op(irfft_offset_size, zeros_op(1, output_type))
873
- else:
874
- dx = dx * complex_op(offset_size, zeros_op(1, output_type))
870
+ is_odd = dout_shape[-1] % 2
871
+ last_shape = offset_shape[-1]
872
+ mask = concat_op((ones_op(1, output_type), 2.0 * ones_op(
873
+ (last_shape - 2 + is_odd,), output_type), ones_op((1 - is_odd,), output_type)))
874
+ dx = dx * complex_op(mask, zeros_op(shape_op(mask), output_type))
875
+ irfft_offset_size = to_tensor_op(
876
+ _fft_with_size_back_norm(shape_op(dout), norm, inverse, signal_ndim),
877
+ output_type)
878
+ dx = dx * complex_op(irfft_offset_size, zeros_op(1, output_type))
875
879
  else:
876
880
  dx = dx * complex_op(offset_size, zeros_op(1, output_type))
877
881
  return (dx,)
@@ -1017,3 +1021,19 @@ def get_bprop_tensor_add(self):
1017
1021
  return binop_grad_common(x, y, dout, dout)
1018
1022
 
1019
1023
  return bprop
1024
+
1025
+
1026
+ @bprop_getters.register(_MirrorSilentCheck)
1027
+ def get_bprop_mirror_silent_check(self):
1028
+ """Grad definition for '_MirrorSilentCheck' op"""
1029
+ silent_check = SilentCheck(self.min_steps, self.thresh_l1, self.coeff_l1, self.thresh_l2, self.coeff_l2)
1030
+ out_tensor = Tensor([0.0], mstype.float32)
1031
+
1032
+ def bporp(x, pre_val, min_val, max_val, n_step, loss_scale, out, dout):
1033
+ if loss_scale is not None:
1034
+ dout = dout / loss_scale
1035
+ grad = ops.norm(dout)
1036
+ dx, _, _, _, _ = silent_check(grad, dout, pre_val, min_val, max_val, n_step)
1037
+ return (dx, out_tensor, out_tensor, out_tensor, out_tensor, out_tensor)
1038
+
1039
+ return bporp
@@ -60,6 +60,7 @@ from .init_data_set_queue import _init_data_set_queue_aicpu
60
60
  from .embedding_lookup import _embedding_lookup_aicpu
61
61
  from .padding import _padding_aicpu
62
62
  from .gather import _gather_aicpu
63
+ from .generate_eod_mask import _generate_eod_mask_aicpu
63
64
  from .gather_grad import _gather_grad_aicpu
64
65
  from .gather_d_grad_v2 import _gather_d_grad_v2_aicpu
65
66
  from .gather_d import _gather_d_aicpu
@@ -0,0 +1,38 @@
1
+ # Copyright 2023 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+
16
+ """GenerateEodMask op"""
17
+ from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
18
+
19
+ generate_eod_mask_op_info = AiCPURegOp("GenerateEodMask") \
20
+ .fusion_type("OPAQUE") \
21
+ .attr("eod_token_id", "int") \
22
+ .attr("n_pos", "int") \
23
+ .attr("n_step", "listint") \
24
+ .attr("n_error_mode", "str") \
25
+ .input(0, "inputs_ids", "required") \
26
+ .output(0, "position_ids", "required") \
27
+ .dtype_format(DataType.U16_Default, DataType.U16_Default) \
28
+ .dtype_format(DataType.U32_Default, DataType.U32_Default) \
29
+ .dtype_format(DataType.U64_Default, DataType.U64_Default) \
30
+ .dtype_format(DataType.I32_Default, DataType.I32_Default) \
31
+ .dtype_format(DataType.I64_Default, DataType.I64_Default) \
32
+ .get_op_info()
33
+
34
+
35
+ @op_info_register(generate_eod_mask_op_info)
36
+ def _generate_eod_mask_aicpu():
37
+ """GenerateEodMask AiCPU register"""
38
+ return
@@ -1,4 +1,4 @@
1
- # Copyright 2022 Huawei Technologies Co., Ltd
1
+ # Copyright 2023 Huawei Technologies Co., Ltd
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,11 +24,30 @@ lsap_op_info = AiCPURegOp("LinearSumAssignment") \
24
24
  .input(2, 'maximize', "required") \
25
25
  .output(0, "row_ind", "required") \
26
26
  .output(1, "col_ind", "required") \
27
- .attr("cust_aicpu", "str") \
28
27
  .dtype_format(DataType.F64_Default, DataType.I64_Default,
29
28
  DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
30
29
  .dtype_format(DataType.F32_Default, DataType.I64_Default,
31
30
  DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
31
+ .dtype_format(DataType.F16_Default, DataType.I64_Default,
32
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
33
+ .dtype_format(DataType.BOOL_Default, DataType.I64_Default,
34
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
35
+ .dtype_format(DataType.I16_Default, DataType.I64_Default,
36
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
37
+ .dtype_format(DataType.I32_Default, DataType.I64_Default,
38
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
39
+ .dtype_format(DataType.I64_Default, DataType.I64_Default,
40
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
41
+ .dtype_format(DataType.I8_Default, DataType.I64_Default,
42
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
43
+ .dtype_format(DataType.U16_Default, DataType.I64_Default,
44
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
45
+ .dtype_format(DataType.U32_Default, DataType.I64_Default,
46
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
47
+ .dtype_format(DataType.U64_Default, DataType.I64_Default,
48
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
49
+ .dtype_format(DataType.U8_Default, DataType.I64_Default,
50
+ DataType.BOOL_Default, DataType.I64_Default, DataType.I64_Default,) \
32
51
  .get_op_info()
33
52
 
34
53