mindspore 2.4.0__cp39-cp39-win_amd64.whl → 2.4.10__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (87) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
  3. mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
  4. mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
  5. mindspore/avcodec-59.dll +0 -0
  6. mindspore/avdevice-59.dll +0 -0
  7. mindspore/avfilter-8.dll +0 -0
  8. mindspore/avformat-59.dll +0 -0
  9. mindspore/avutil-57.dll +0 -0
  10. mindspore/common/api.py +1 -4
  11. mindspore/common/file_system.py +2 -0
  12. mindspore/common/initializer.py +51 -15
  13. mindspore/common/parameter.py +6 -5
  14. mindspore/common/tensor.py +15 -49
  15. mindspore/communication/_comm_helper.py +5 -0
  16. mindspore/communication/comm_func.py +7 -7
  17. mindspore/context.py +16 -2
  18. mindspore/dataset/engine/datasets_standard_format.py +17 -0
  19. mindspore/dataset/engine/datasets_user_defined.py +27 -1
  20. mindspore/dnnl.dll +0 -0
  21. mindspore/experimental/llm_boost/__init__.py +2 -2
  22. mindspore/experimental/llm_boost/atb/boost_base.py +240 -64
  23. mindspore/experimental/llm_boost/atb/llama_boost.py +46 -29
  24. mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
  25. mindspore/include/api/context.h +1 -1
  26. mindspore/include/dataset/constants.h +2 -2
  27. mindspore/jpeg62.dll +0 -0
  28. mindspore/mindspore_backend.dll +0 -0
  29. mindspore/mindspore_common.dll +0 -0
  30. mindspore/mindspore_core.dll +0 -0
  31. mindspore/mindspore_glog.dll +0 -0
  32. mindspore/mindspore_np_dtype.dll +0 -0
  33. mindspore/mindspore_ops.dll +0 -0
  34. mindspore/mint/__init__.py +490 -2
  35. mindspore/mint/nn/__init__.py +2 -2
  36. mindspore/mint/optim/adamw.py +6 -14
  37. mindspore/nn/__init__.py +2 -0
  38. mindspore/nn/cell.py +16 -4
  39. mindspore/nn/layer/basic.py +24 -7
  40. mindspore/nn/layer/conv.py +3 -0
  41. mindspore/nn/layer/embedding.py +31 -14
  42. mindspore/nn/layer/pooling.py +8 -10
  43. mindspore/nn/optim/tft_wrapper.py +12 -15
  44. mindspore/nn/utils/__init__.py +22 -0
  45. mindspore/nn/utils/init.py +71 -0
  46. mindspore/opencv_core452.dll +0 -0
  47. mindspore/opencv_imgcodecs452.dll +0 -0
  48. mindspore/opencv_imgproc452.dll +0 -0
  49. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  50. mindspore/ops/_grad_experimental/grad_comm_ops.py +45 -8
  51. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +6 -0
  52. mindspore/ops/auto_generate/gen_extend_func.py +33 -0
  53. mindspore/ops/auto_generate/gen_ops_def.py +52 -3
  54. mindspore/ops/auto_generate/gen_ops_prim.py +158 -8
  55. mindspore/ops/function/array_func.py +2 -0
  56. mindspore/ops/function/math_func.py +12 -5
  57. mindspore/ops/function/random_func.py +221 -7
  58. mindspore/ops/operations/__init__.py +1 -1
  59. mindspore/ops/operations/array_ops.py +3 -1
  60. mindspore/ops/operations/comm_ops.py +25 -1
  61. mindspore/ops/operations/custom_ops.py +6 -4
  62. mindspore/ops/operations/manually_defined/ops_def.py +8 -10
  63. mindspore/ops/operations/nn_ops.py +7 -2
  64. mindspore/parallel/_auto_parallel_context.py +26 -5
  65. mindspore/parallel/_cell_wrapper.py +24 -3
  66. mindspore/parallel/_tensor.py +46 -2
  67. mindspore/parallel/_utils.py +39 -21
  68. mindspore/parallel/transform_safetensors.py +196 -43
  69. mindspore/profiler/profiling.py +5 -1
  70. mindspore/run_check/_check_version.py +20 -9
  71. mindspore/swresample-4.dll +0 -0
  72. mindspore/swscale-6.dll +0 -0
  73. mindspore/tinyxml2.dll +0 -0
  74. mindspore/train/_utils.py +92 -32
  75. mindspore/train/callback/_checkpoint.py +12 -9
  76. mindspore/train/callback/_on_request_exit.py +12 -1
  77. mindspore/train/callback/_tft_register.py +33 -9
  78. mindspore/train/dataset_helper.py +10 -2
  79. mindspore/train/model.py +21 -0
  80. mindspore/train/serialization.py +12 -19
  81. mindspore/turbojpeg.dll +0 -0
  82. mindspore/version.py +1 -1
  83. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/METADATA +9 -7
  84. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/RECORD +87 -85
  85. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/WHEEL +1 -1
  86. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/entry_points.txt +0 -0
  87. {mindspore-2.4.0.dist-info → mindspore-2.4.10.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,8 @@ from mindspore.common.api import _function_forbid_reuse
30
30
  from mindspore.ops.auto_generate import randperm
31
31
  from mindspore.common.generator import default_generator
32
32
  from mindspore.ops.auto_generate import UniformExt, NormalTensorTensor, \
33
- NormalTensorFloat, NormalFloatTensor, NormalFloatFloat, RandExt, RandLikeExt, MultinomialExt
33
+ NormalTensorFloat, NormalFloatTensor, NormalFloatFloat, RandExt, RandLikeExt, MultinomialExt, \
34
+ Randn, RandnLike, RandInt, RandIntLike, RandpermExt
34
35
 
35
36
  normal_tensor_tensor_op = NormalTensorTensor()
36
37
  normal_tensor_float_op = NormalTensorFloat()
@@ -42,10 +43,15 @@ real_div_ = P.RealDiv()
42
43
  reshape_ = P.Reshape()
43
44
  shape_ = P.Shape()
44
45
  top_k_ = P.TopK()
46
+ randperm_ext_ = RandpermExt()
45
47
  uniform_ = UniformExt()
46
48
  rand_ext_ = RandExt()
47
49
  rand_like_ext_ = RandLikeExt()
48
50
  multinomial_ext_ = MultinomialExt()
51
+ randn_ = Randn()
52
+ randn_like_ = RandnLike()
53
+ randint_ = RandInt()
54
+ randint_like_ = RandIntLike()
49
55
  generator_step_ = Tensor(10, mstype.int64)
50
56
 
51
57
 
@@ -287,7 +293,8 @@ def uniform_ext(tensor, a, b, generator=None):
287
293
  """
288
294
  if generator is None:
289
295
  generator = default_generator
290
- seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
296
+ seed, offset = generator._step( # pylint: disable=protected-access
297
+ generator_step_)
291
298
  return uniform_(tensor, a, b, seed, offset)
292
299
 
293
300
 
@@ -755,7 +762,8 @@ def normal_ext(mean=0.0, std=1.0, size=None, generator=None):
755
762
  """
756
763
  if generator is None:
757
764
  generator = default_generator
758
- seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
765
+ seed, offset = generator._step( # pylint: disable=protected-access
766
+ generator_step_)
759
767
 
760
768
  is_mean_tensor = isinstance(mean, Tensor)
761
769
  is_std_tensor = isinstance(std, Tensor)
@@ -1129,7 +1137,8 @@ def rand_ext(*size, generator=None, dtype=None):
1129
1137
  """
1130
1138
  if not generator:
1131
1139
  generator = default_generator
1132
- seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
1140
+ seed, offset = generator._step( # pylint: disable=protected-access
1141
+ generator_step_)
1133
1142
  return rand_ext_(size, seed, offset, dtype)
1134
1143
 
1135
1144
 
@@ -1163,10 +1172,174 @@ def rand_like_ext(input, *, dtype=None):
1163
1172
  >>> print(ops.function.random_func.rand_like_ext(a, dtype=ms.float32).shape)
1164
1173
  (2, 3)
1165
1174
  """
1166
- seed, offset = default_generator._step(generator_step_) # pylint: disable=protected-access
1175
+ seed, offset = default_generator._step( # pylint: disable=protected-access
1176
+ generator_step_)
1167
1177
  return rand_like_ext_(input, seed, offset, dtype)
1168
1178
 
1169
1179
 
1180
+ @_function_forbid_reuse
1181
+ def randn_ext(*size, generator=None, dtype=None):
1182
+ r"""
1183
+ Returns a new tensor filled with numbers from the normal distribution over an interval :math:`[0, 1)`
1184
+ based on the given shape and dtype.
1185
+
1186
+ .. warning::
1187
+ This is an experimental API that is subject to change or deletion.
1188
+
1189
+ Args:
1190
+ size (Union[int, tuple(int), list(int)]): Shape of the new tensor, e.g. :math:`(2, 3)` or :math:`2`.
1191
+
1192
+ Keyword Args:
1193
+ generator (:class:`mindspore.Generator`, optional): a pseudorandom number generator.
1194
+ Default: ``None``, uses the default pseudorandom number generator.
1195
+ dtype (:class:`mindspore.dtype`, optional): Designated tensor dtype, it must be float type. If None,
1196
+ `mindspore.float32` will be applied. Default: ``None`` .
1197
+
1198
+ Returns:
1199
+ Tensor, with the designated shape and dtype, filled with random numbers from the normal distribution on
1200
+ the interval :math:`[0, 1)`.
1201
+
1202
+ Raises:
1203
+ ValueError: If `dtype` is not a `mstype.float_type` type.
1204
+
1205
+ Supported Platforms:
1206
+ ``Ascend``
1207
+
1208
+ Examples:
1209
+ >>> from mindspore import ops
1210
+ >>> print(ops.function.random_func.randn_ext(2, 3).shape)
1211
+ (2, 3)
1212
+ """
1213
+ if not generator:
1214
+ generator = default_generator
1215
+ seed, offset = generator._step( # pylint: disable=protected-access
1216
+ generator_step_)
1217
+ return randn_(size, seed, offset, dtype)
1218
+
1219
+
1220
+ @_function_forbid_reuse
1221
+ def randn_like_ext(input, *, dtype=None):
1222
+ r"""
1223
+ Returns a new tensor filled with numbers from the normal distribution over an interval :math:`[0, 1)`
1224
+ based on the given dtype and shape of the input tensor.
1225
+
1226
+ .. warning::
1227
+ This is an experimental API that is subject to change or deletion.
1228
+
1229
+ Args:
1230
+ input (Tensor): Input Tensor to specify the output shape and its default dtype.
1231
+
1232
+ Keyword Args:
1233
+ dtype (:class:`mindspore.dtype`, optional): Designated tensor dtype, it must be float type. If None,
1234
+ the same dtype of `input` will be applied. Default: ``None`` .
1235
+
1236
+ Returns:
1237
+ Tensor, with the designated shape and dtype, filled with random numbers from the normal distribution on
1238
+ the interval :math:`[0, 1)`.
1239
+
1240
+ Raises:
1241
+ ValueError: If `dtype` is not a `mstype.float_type` type.
1242
+
1243
+ Supported Platforms:
1244
+ ``Ascend``
1245
+
1246
+ Examples:
1247
+ >>> import mindspore as ms
1248
+ >>> from mindspore import Tensor, ops
1249
+ >>> a = Tensor([[2, 3, 4], [1, 2, 3]])
1250
+ >>> print(ops.function.random_func.randn_like_ext(a, dtype=ms.float32).shape)
1251
+ (2, 3)
1252
+ """
1253
+ seed, offset = default_generator._step( # pylint: disable=protected-access
1254
+ generator_step_)
1255
+ return randn_like_(input, seed, offset, dtype)
1256
+
1257
+
1258
+ @_function_forbid_reuse
1259
+ def randint_ext(low, high, size, *, generator=None, dtype=None):
1260
+ r"""
1261
+ Returns a new tensor filled with integer numbers from the uniform distribution over an interval :math:`[low, high)`
1262
+ based on the given shape and dtype.
1263
+
1264
+ .. warning::
1265
+ This is an experimental API that is subject to change or deletion.
1266
+
1267
+ Args:
1268
+ low (int): the lower bound of the generated random number
1269
+ high (int): the upper bound of the generated random number
1270
+ size (Union[tuple(int), list(int)]): Shape of the new tensor, e.g. :math:`(2, 3)`.
1271
+
1272
+ Keyword Args:
1273
+ generator (:class:`mindspore.Generator`, optional): a pseudorandom number generator.
1274
+ Default: ``None``, uses the default pseudorandom number generator.
1275
+ dtype (:class:`mindspore.dtype`, optional): Designated tensor dtype. If None,
1276
+ `mindspore.int64` will be applied. Default: ``None`` .
1277
+
1278
+ Returns:
1279
+ Tensor, with the designated shape and dtype, filled with random numbers from the uniform distribution on
1280
+ the interval :math:`[low, high)`.
1281
+
1282
+ Raises:
1283
+ TypeError: If `size` is not a tuple.
1284
+ TypeError: If `low` or `high` is not integer.
1285
+
1286
+ Supported Platforms:
1287
+ ``Ascend``
1288
+
1289
+ Examples:
1290
+ >>> from mindspore import ops
1291
+ >>> print(ops.function.random_func.randint_ext(0, 5, (2, 3)).shape)
1292
+ (2, 3)
1293
+ """
1294
+ if not generator:
1295
+ generator = default_generator
1296
+ seed, offset = generator._step( # pylint: disable=protected-access
1297
+ generator_step_)
1298
+ return randint_(low, high, size, seed, offset, dtype)
1299
+
1300
+
1301
+ @_function_forbid_reuse
1302
+ def randint_like_ext(input, low, high, *, dtype=None):
1303
+ r"""
1304
+ Returns a new tensor filled with integer numbers from the uniform distribution over an interval :math:`[low, high)`
1305
+ based on the given dtype and shape of the input tensor.
1306
+
1307
+ .. warning::
1308
+ This is an experimental API that is subject to change or deletion.
1309
+
1310
+ Args:
1311
+ input (Tensor): Input Tensor to specify the output shape and its default dtype.
1312
+ low (int): the lower bound of the generated random number
1313
+ high (int): the upper bound of the generated random number
1314
+
1315
+ Keyword Args:
1316
+ dtype (:class:`mindspore.dtype`, optional): Designated tensor dtype. If None,
1317
+ the same dtype of `input` will be applied. Default: ``None`` .
1318
+
1319
+ Returns:
1320
+ Tensor, with the designated shape and dtype, filled with random numbers from the uniform distribution on
1321
+ the interval :math:`[low, high)`.
1322
+
1323
+ Raises:
1324
+ TypeError: If `low` or `high` is not integer.
1325
+
1326
+ Supported Platforms:
1327
+ ``Ascend``
1328
+
1329
+ Examples:
1330
+ >>> import mindspore as ms
1331
+ >>> from mindspore import Tensor, ops
1332
+ >>> a = Tensor([[2, 3, 4], [1, 2, 3]])
1333
+ >>> low = 0
1334
+ >>> high = 5
1335
+ >>> print(ops.function.random_func.randint_like_ext(a, low, high, dtype=ms.int32).shape)
1336
+ (2, 3)
1337
+ """
1338
+ seed, offset = default_generator._step( # pylint: disable=protected-access
1339
+ generator_step_)
1340
+ return randint_like_(input, low, high, seed, offset, dtype)
1341
+
1342
+
1170
1343
  @_function_forbid_reuse
1171
1344
  def randn(*size, dtype=None, seed=None):
1172
1345
  r"""
@@ -1395,6 +1568,47 @@ def randint_like(input, low, high, seed=None, *, dtype=None):
1395
1568
  return cast_(output, dtype)
1396
1569
 
1397
1570
 
1571
+ def randperm_ext(n, *, generator=None, dtype=mstype.int64):
1572
+ r"""
1573
+ Generates random permutation of integers from 0 to n-1.
1574
+
1575
+ .. warning::
1576
+ - This is an experimental API that is subject to change or deletion.
1577
+
1578
+
1579
+ Args:
1580
+ n (Union[Tensor, int]): size of the permutation. int or Tensor with shape: () or (1,) and
1581
+ data type int64. The value of `n` must be greater than zero.
1582
+ generator (:class:`mindspore.Generator`, optional): a pseudorandom number generator.
1583
+ Default: ``None``, uses the default pseudorandom number generator.
1584
+ dtype (mindspore.dtype, optional): The type of output. Default: mstype.int64.
1585
+
1586
+ Returns:
1587
+ Tensor with shape (n,) and type `dtype`.
1588
+
1589
+ Raises:
1590
+ TypeError: If `dtype` is not supported.
1591
+ ValueError: If `n` is a negative or 0 element.
1592
+ ValueError: If `n` is larger than the maximal data of the set dtype.
1593
+
1594
+ Supported Platforms:
1595
+ ``Ascend``
1596
+
1597
+ Examples:
1598
+ >>> from mindspore import ops
1599
+ >>> from mindspore import dtype as mstype
1600
+ >>> n = 4
1601
+ >>> output = ops.randperm_ext(n, dtype=mstype.int64)
1602
+ >>> print(output.shape)
1603
+ (4,)
1604
+ """
1605
+ if not generator:
1606
+ generator = default_generator
1607
+ seed, offset = generator._step( # pylint: disable=protected-access
1608
+ generator_step_)
1609
+ return randperm_ext_(n, seed, offset, dtype)
1610
+
1611
+
1398
1612
  @_function_forbid_reuse
1399
1613
  def poisson(shape, mean, seed=None):
1400
1614
  r"""
@@ -1675,10 +1889,10 @@ def multinomial_ext(input, num_samples, replacement=False, *, generator=None):
1675
1889
  >>> # [[0 0 0 0 0 0 0 0 1 0]
1676
1890
  >>> # [1 1 1 1 1 0 1 1 1 1]]
1677
1891
  """
1678
-
1679
1892
  if generator is None:
1680
1893
  generator = default_generator
1681
- seed, offset = generator._step(generator_step_) # pylint: disable=protected-access
1894
+ seed, offset = generator._step( # pylint: disable=protected-access
1895
+ generator_step_)
1682
1896
  return multinomial_ext_(input, num_samples, replacement, seed, offset)
1683
1897
 
1684
1898
 
@@ -55,7 +55,7 @@ from .comm_ops import (AllGather, AllReduce, Reduce, NeighborExchange, NeighborE
55
55
  _MirrorOperator, _MirrorMiniStepOperator, _MiniStepAllGather, ReduceOp, _VirtualDataset,
56
56
  _VirtualOutput, _VirtualDiv, _GetTensorSlice, _VirtualAdd, _VirtualAssignAdd, _VirtualAccuGrad,
57
57
  _HostAllGather, _HostReduceScatter, _MirrorMicroStepOperator, _MicroStepAllGather,
58
- _VirtualPipelineEnd, AlltoAllV, ReduceScatter)
58
+ _VirtualPipelineEnd, AlltoAllV, ReduceScatter, _VirtualAssignKvCache)
59
59
  from .control_ops import GeSwitch, Merge
60
60
  from .custom_ops import (Custom)
61
61
  from .debug_ops import (ImageSummary, InsertGradientOf, HookBackward, ScalarSummary,
@@ -771,12 +771,14 @@ class Padding(Primitive):
771
771
  class UniqueWithPad(Primitive):
772
772
  """
773
773
  'ops.UniqueWithPad' is deprecated from version 2.4 and will be removed in a future version.
774
+ Please use the :func:`mindspore.ops.unique` combined with :func:`mindspore.ops.pad` to realize
775
+ the same function.
774
776
 
775
777
  Supported Platforms:
776
778
  Deprecated
777
779
  """
778
780
 
779
- @deprecated("2.4", "ops.Unique and ops.PadV3", False)
781
+ @deprecated("2.4", "ops.unique and ops.pad", False)
780
782
  @prim_attr_register
781
783
  def __init__(self):
782
784
  """init UniqueWithPad"""
@@ -988,6 +988,9 @@ class NeighborExchangeV2(Primitive):
988
988
  in the same subnet, please check the `details \
989
989
  <https://www.mindspore.cn/docs/en/master/api_python/samples/ops/communicate_ops.html#notes>`_.
990
990
 
991
+ Users need to ensure that the length of the received data `recv_lens` is consistent with that of
992
+ the sent data `send_lens`.
993
+
991
994
  Args:
992
995
  send_rank_ids (list(int)): Ranks which the data is sent to. 8 rank_ids represents 8 directions, if one
993
996
  direction is not send to , set it -1.
@@ -1393,7 +1396,7 @@ class Send(PrimitiveWithInfer):
1393
1396
  >>> def __init__(self):
1394
1397
  >>> super(SendNet, self).__init__()
1395
1398
  >>> self.depend = ops.Depend()
1396
- >>> self.send = ops.Send(st_tag=0, dest_rank=8, group="hccl_world_group")
1399
+ >>> self.send = ops.Send(sr_tag=0, dest_rank=8, group="hccl_world_group")
1397
1400
  >>>
1398
1401
  >>> def construct(self, x):
1399
1402
  >>> out = self.depend(x, self.send(x))
@@ -1682,6 +1685,27 @@ class _VirtualAssignAdd(PrimitiveWithInfer):
1682
1685
  virtual_assign_add = _VirtualAssignAdd()
1683
1686
 
1684
1687
 
1688
+ class _VirtualAssignKvCache(PrimitiveWithInfer):
1689
+ """
1690
+ Auto parallel virtual operator. Do nothing in forward, do Assign kv cache in backward. It is only for
1691
+ internal use of parallel modules and cannot be called by users.
1692
+
1693
+ """
1694
+
1695
+ @prim_attr_register
1696
+ def __init__(self):
1697
+ """Initialize _VirtualAssignAdd."""
1698
+ self.add_prim_attr('order_enforce_skip', True)
1699
+ self.add_prim_attr('side_effect_backprop_mem', True)
1700
+
1701
+ def infer_shape(self, x_shape, y_shape, kv_equal_shape):
1702
+ return x_shape
1703
+
1704
+ def infer_dtype(self, x_dtype, y_dtype, kv_equal_dtype):
1705
+ return x_dtype
1706
+ virtual_assign_kv_cache = _VirtualAssignKvCache()
1707
+
1708
+
1685
1709
  class _VirtualAccuGrad(PrimitiveWithInfer):
1686
1710
  """
1687
1711
  Auto parallel virtual operator. Do nothing in forward, return y in backward. It is only for
@@ -251,11 +251,13 @@ class Custom(ops.PrimitiveWithInfer):
251
251
 
252
252
  - "xxx.so" file generation:
253
253
 
254
- 1) GPU Platform: Given user defined "xxx.cu" file (ex. "{path}/add.cu"), use nvcc command to compile
255
- it.(ex. "nvcc --shared -Xcompiler -fPIC -o add.so add.cu")
254
+ 1) GPU Platform: Given user defined "xxx.cu" file (ex. "{path}/add.cu"),
255
+ use nvcc command to compile
256
+ it.(ex. :code:`nvcc --shared -Xcompiler -fPIC -o add.so add.cu`)
256
257
 
257
- 2) CPU Platform: Given user defined "xxx.cc" file (ex. "{path}/add.cc"), use g++/gcc command to
258
- compile it.(ex. "g++ --shared -fPIC -o add.so add.cc")
258
+ 2) CPU Platform: Given user defined "xxx.cc" file (ex. "{path}/add.cc"),
259
+ use g++/gcc command to
260
+ compile it.(ex. :code:`g++ --shared -fPIC -o add.so add.cc`)
259
261
 
260
262
  - Define a "xxx.cc"/"xxx.cu" file:
261
263
 
@@ -1171,17 +1171,15 @@ class Cast(Primitive):
1171
1171
  self.init_prim_io_names(inputs=['x', 'dst_type'], outputs=['output'])
1172
1172
 
1173
1173
  def check_elim(self, x, dtype):
1174
- if isinstance(x, (Tensor, numbers.Number, Parameter)):
1175
- if isinstance(x, Parameter):
1176
- data = x.data
1177
- if data.dtype == dtype:
1178
- return (True, x)
1179
- if isinstance(x, Tensor) and x.dtype == dtype:
1180
- x = Tensor(x)
1181
- x.set_cast_dtype()
1174
+ if isinstance(x, Parameter):
1175
+ data = x.data
1176
+ if data.dtype == dtype:
1182
1177
  return (True, x)
1183
- if isinstance(x, numbers.Number):
1184
- return (True, Tensor(x, dtype=dtype))
1178
+ if isinstance(x, Tensor) and x.dtype == dtype:
1179
+ x.set_cast_dtype()
1180
+ return (True, x)
1181
+ if isinstance(x, numbers.Number):
1182
+ return (True, Tensor(x, dtype=dtype))
1185
1183
  return (False, None)
1186
1184
 
1187
1185
  def __call__(self, input_x, dtype):
@@ -1430,6 +1430,9 @@ class MaxPool3D(Primitive):
1430
1430
  \max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
1431
1431
  \text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
1432
1432
 
1433
+ .. note::
1434
+ For Atlas training series products, this primitive is not supported.
1435
+
1433
1436
  Args:
1434
1437
  kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
1435
1438
  is an int number that represents depth, height and width of the kernel, or a tuple
@@ -4759,7 +4762,8 @@ class SparseApplyAdagradV2(Primitive):
4759
4762
  - **grad** (Tensor) - Gradients has the same shape as `var` and
4760
4763
  :math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
4761
4764
  - **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
4762
- The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`.
4765
+ The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`. The value of indices
4766
+ must be unique. Otherwise, the result is unpredictable.
4763
4767
 
4764
4768
  Outputs:
4765
4769
  Tuple of 2 tensors, the updated parameters.
@@ -7158,7 +7162,8 @@ class Conv3DTranspose(Primitive):
7158
7162
  \times (\text{kernel_size}[2] - 1) + \text{output_padding}[2] + 1
7159
7163
 
7160
7164
  Note:
7161
- In Ascend, only support :math:`group=1`.
7165
+ - In Ascend, only support :math:`group=1`.
7166
+ - For Atlas A2 training series products, `output_padding` is currently not supported.
7162
7167
 
7163
7168
  Args:
7164
7169
  in_channel (int): The channel of the input x.
@@ -76,6 +76,7 @@ class _PipelineConfig:
76
76
  class _PipelineScheduler:
77
77
  PIPELINE_1F1B = "1f1b"
78
78
  PIPELINE_GPIPE = "gpipe"
79
+ PIPELINE_SEQPIPE = "seqpipe"
79
80
 
80
81
 
81
82
  class _AutoParallelContext:
@@ -186,6 +187,25 @@ class _AutoParallelContext:
186
187
  self.check_context_handle()
187
188
  return self._context_handle.get_dump_local_norm()
188
189
 
190
+ def set_dump_device_local_norm(self, dump_device_local_norm):
191
+ """
192
+ Set dump device local norm for auto parallel.
193
+
194
+ Args:
195
+ dump_device_local_norm (bool): User need to specify if he want to dump device local norm. Default: False
196
+
197
+ Raises:
198
+ ValueError: If the dump_device_local_norm in not a bool value.
199
+ """
200
+ self.check_context_handle()
201
+ self._context_handle.set_dump_device_local_norm(dump_device_local_norm)
202
+
203
+ def get_dump_device_local_norm(self):
204
+ """Get dump device local norm."""
205
+ self.check_context_handle()
206
+ return self._context_handle.get_dump_device_local_norm()
207
+
208
+
189
209
  def set_fusion_threshold_mb(self, fusion_threshold=64, comm_type="allreduce"):
190
210
  """
191
211
  Set fusion threshold (MB) for auto parallel.
@@ -914,7 +934,8 @@ class _AutoParallelContext:
914
934
  pipeline_config[pp_interleave])
915
935
 
916
936
  Validator.check_string(pipeline_config[pp_scheduler], [_PipelineScheduler.PIPELINE_1F1B,
917
- _PipelineScheduler.PIPELINE_GPIPE])
937
+ _PipelineScheduler.PIPELINE_GPIPE,
938
+ _PipelineScheduler.PIPELINE_SEQPIPE])
918
939
  if not pipeline_config[pp_interleave] and pipeline_config[pp_scheduler] != _PipelineScheduler.PIPELINE_1F1B:
919
940
  raise ValueError(f"When pipeline_interleave is False, {pp_scheduler} is not supported")
920
941
 
@@ -1285,7 +1306,8 @@ _set_auto_parallel_context_func_map = {
1285
1306
  "enable_alltoall": auto_parallel_context().set_enable_alltoall,
1286
1307
  "strategy_ckpt_config": auto_parallel_context().set_strategy_ckpt_config,
1287
1308
  "comm_fusion": auto_parallel_context().set_comm_fusion,
1288
- "dump_local_norm": auto_parallel_context().set_dump_local_norm}
1309
+ "dump_local_norm": auto_parallel_context().set_dump_local_norm,
1310
+ "dump_device_local_norm": auto_parallel_context().set_dump_device_local_norm}
1289
1311
 
1290
1312
  _get_auto_parallel_context_func_map = {
1291
1313
  "device_num": auto_parallel_context().get_device_num,
@@ -1318,7 +1340,8 @@ _get_auto_parallel_context_func_map = {
1318
1340
  "comm_fusion": auto_parallel_context().get_comm_fusion,
1319
1341
  "strategy_ckpt_config": auto_parallel_context().get_strategy_ckpt_config,
1320
1342
  "full_batch_is_set": auto_parallel_context().get_full_batch_is_set,
1321
- "dump_local_norm": auto_parallel_context().get_dump_local_norm}
1343
+ "dump_local_norm": auto_parallel_context().get_dump_local_norm,
1344
+ "dump_device_local_norm": auto_parallel_context().get_dump_device_local_norm}
1322
1345
 
1323
1346
 
1324
1347
  @args_type_check(device_num=int, global_rank=int, gradients_mean=bool, gradient_fp32_sync=bool,
@@ -1429,8 +1452,6 @@ def _set_auto_parallel_context(**kwargs):
1429
1452
  - reducescatter: If communication fusion type is `reducescatter`. The `mode` contains: `auto`
1430
1453
  and `size`. Config is same as `allgather`.
1431
1454
 
1432
-
1433
-
1434
1455
  Raises:
1435
1456
  ValueError: If input key is not attribute in auto parallel context.
1436
1457
  """
@@ -24,7 +24,8 @@ from mindspore.ops import operations as P
24
24
  from mindspore.ops.operations.comm_ops import AllGather
25
25
  from mindspore.communication import GlobalComm
26
26
  from mindspore.common import jit
27
- from mindspore.communication import create_group
27
+ from mindspore.communication import create_group, destroy_group
28
+ from mindspore.communication._comm_helper import _get_group_map
28
29
  from mindspore.train._utils import get_parameter_redundancy, remove_param_redundancy
29
30
 
30
31
  _ALLGATHER_CELL = None
@@ -126,9 +127,26 @@ def _restore_parallel_context(origin_parallel_mode, origin_dataset_strategy):
126
127
  if context.get_context("mode") == context.GRAPH_MODE:
127
128
  context.set_auto_parallel_context(parallel_mode=origin_parallel_mode)
128
129
  if origin_dataset_strategy != "data_parallel":
130
+ if origin_dataset_strategy is not None and isinstance(origin_dataset_strategy, list):
131
+ origin_dataset_strategy = tuple(tuple(ds_item) for ds_item in origin_dataset_strategy)
129
132
  context.set_auto_parallel_context(dataset_strategy=origin_dataset_strategy)
130
133
 
131
134
 
135
+ def _get_group_name(group_map, group):
136
+ """get group name"""
137
+ group_name = str(group)
138
+ is_manual_communication_group = True
139
+ if group_map:
140
+ for name, rank_list in group_map.items():
141
+ if list(group) == rank_list:
142
+ group_name = name
143
+ is_manual_communication_group = False
144
+ break
145
+ if is_manual_communication_group:
146
+ create_group(str(group), list(group))
147
+ return group_name, is_manual_communication_group
148
+
149
+
132
150
  def _single_parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
133
151
  """
134
152
  Broadcast single parameter to other rank in data parallel dimension.
@@ -156,8 +174,9 @@ def _single_parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
156
174
  return
157
175
  net_param_dict = net.parameters_dict()
158
176
  _chang_parallel_context(origin_dataset_strategy)
177
+ group_map = _get_group_map()
159
178
  for group, params in param_redundancy_reversed.items():
160
- create_group(str(group), list(group))
179
+ group_name, is_manual_communication_group = _get_group_name(group_map, group)
161
180
  allreduce_input = []
162
181
  for param in params:
163
182
  if param not in net_param_dict:
@@ -168,7 +187,9 @@ def _single_parameter_broadcast(net, layout, cur_rank=0, initial_rank=0):
168
187
  allreduce_input.append(real_param)
169
188
  if not allreduce_input:
170
189
  continue
171
- communicator = SingleCommunicator(str(group))
190
+ communicator = SingleCommunicator(group_name)
172
191
  for real_param in allreduce_input:
173
192
  real_param.set_data(communicator(real_param), real_param.sliced)
193
+ if is_manual_communication_group:
194
+ destroy_group(group_name)
174
195
  _restore_parallel_context(origin_parallel_mode, origin_dataset_strategy)
@@ -590,6 +590,8 @@ def _apply_operator(operator_name):
590
590
  Returns:
591
591
  The data of tensor after apply operator.
592
592
  """
593
+ if str(type(numpy_data)) == "<class 'builtins.PySafeSlice'>":
594
+ numpy_data = numpy_data[:]
593
595
  if not isinstance(numpy_data, np.ndarray):
594
596
  raise TypeError("The data should be a numpy.ndarray.")
595
597
  _check_operator(reshape_op)
@@ -629,8 +631,6 @@ def _apply_operator(operator_name):
629
631
  Returns:
630
632
  The data of tensor after apply operator.
631
633
  """
632
- if not isinstance(numpy_data, np.ndarray):
633
- raise TypeError("The data should be a numpy.ndarray.")
634
634
  _check_operator(slice_op)
635
635
  if len(slice_op[1]) % 3 != 0:
636
636
  raise ValueError("The slice operator information is wrong.")
@@ -701,6 +701,50 @@ def _load_tensor_shape(dev_mat, tensor_map, full_shape=None, rank_id=-1):
701
701
  return tuple(res)
702
702
 
703
703
 
704
+ def _count_tensor_shape(dev_mat, tensor_map, full_shape=None, rank_id=-1):
705
+ """get tensor shape"""
706
+ if rank_id == -1:
707
+ rank = get_rank()
708
+ else:
709
+ rank = rank_id
710
+ tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
711
+ tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
712
+ np_tensor_list = _chunk_shape_by_strategy(full_shape, tensor_strategy)
713
+ np_tensor_slice_index = np_tensor_list[int(tensor_slice_index)]
714
+ res = []
715
+ for index in np_tensor_slice_index:
716
+ res.append(index[1] - index[0])
717
+ return res
718
+
719
+
720
+ def _load_tensor_shape_by_layout(tensor, layout, rank_id):
721
+ """get tensor shape by layout"""
722
+ if not isinstance(layout, tuple):
723
+ raise TypeError("The layout should be tuple! layout is {}".format(layout))
724
+ if len(layout) < 7:
725
+ raise ValueError("The length of layout must be larger than 6! layout is {}".format(layout))
726
+ slice_shape = layout[2]
727
+ if slice_shape:
728
+ return slice_shape
729
+ tensor_map = layout[1]
730
+ if not tensor_map:
731
+ return tensor.shape
732
+ dev_mat = layout[0]
733
+ uniform_split = layout[4]
734
+ group = layout[5]
735
+ full_shape = layout[6]
736
+ if not full_shape:
737
+ full_shape = tensor.shape
738
+ if uniform_split == 0:
739
+ raise RuntimeError("The load tensor only support uniform split now")
740
+ tensor_slice_shape = _count_tensor_shape(dev_mat, tensor_map, full_shape, rank_id)
741
+ if group:
742
+ # get a totally shard tensor slice for parallel optimizer
743
+ size = get_group_size(group)
744
+ tensor_slice_shape[0] //= size
745
+ return tensor_slice_shape
746
+
747
+
704
748
  def _chunk_shape_by_strategy(full_shape, strategy):
705
749
  """chunk shape by strategy"""
706
750
  shape = []