mindspore 2.2.11__cp37-none-any.whl → 2.2.14__cp37-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (118) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +2 -1
  3. mindspore/_akg/akg/topi/cpp/impl.py +1 -1
  4. mindspore/_akg/akg/tvm/_ffi/base.py +1 -1
  5. mindspore/_c_dataengine.cpython-37m-aarch64-linux-gnu.so +0 -0
  6. mindspore/_c_expression.cpython-37m-aarch64-linux-gnu.so +0 -0
  7. mindspore/_c_mindrecord.cpython-37m-aarch64-linux-gnu.so +0 -0
  8. mindspore/_mindspore_offline_debug.cpython-37m-aarch64-linux-gnu.so +0 -0
  9. mindspore/bin/cache_admin +0 -0
  10. mindspore/bin/cache_server +0 -0
  11. mindspore/common/tensor.py +0 -2
  12. mindspore/communication/management.py +3 -0
  13. mindspore/context.py +34 -4
  14. mindspore/dataset/engine/datasets.py +23 -0
  15. mindspore/dataset/engine/validators.py +1 -1
  16. mindspore/dataset/vision/py_transforms_util.py +2 -2
  17. mindspore/experimental/optim/lr_scheduler.py +5 -6
  18. mindspore/lib/libdnnl.so.2 +0 -0
  19. mindspore/lib/libmindspore.so +0 -0
  20. mindspore/lib/libmindspore_backend.so +0 -0
  21. mindspore/lib/libmindspore_common.so +0 -0
  22. mindspore/lib/libmindspore_core.so +0 -0
  23. mindspore/lib/libmindspore_glog.so.0 +0 -0
  24. mindspore/lib/libmindspore_gpr.so.15 +0 -0
  25. mindspore/lib/libmindspore_grpc.so.15 +0 -0
  26. mindspore/lib/libmindspore_shared_lib.so +0 -0
  27. mindspore/lib/libopencv_core.so.4.5 +0 -0
  28. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
  29. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
  30. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +48 -0
  31. mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
  32. mindspore/lib/plugin/ascend/libakg.so +0 -0
  33. mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
  34. mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
  35. mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
  36. mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
  37. mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
  38. mindspore/mindrecord/tools/cifar100_to_mr.py +49 -57
  39. mindspore/mindrecord/tools/cifar10_to_mr.py +46 -55
  40. mindspore/mindrecord/tools/csv_to_mr.py +3 -8
  41. mindspore/mindrecord/tools/mnist_to_mr.py +4 -9
  42. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -4
  43. mindspore/nn/layer/activation.py +1 -1
  44. mindspore/nn/layer/embedding.py +2 -2
  45. mindspore/nn/loss/loss.py +1 -1
  46. mindspore/nn/optim/ada_grad.py +2 -2
  47. mindspore/nn/optim/sgd.py +3 -2
  48. mindspore/numpy/math_ops.py +1 -1
  49. mindspore/ops/__init__.py +3 -0
  50. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -31
  51. mindspore/ops/_grad_experimental/grad_comm_ops.py +4 -2
  52. mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
  53. mindspore/ops/_grad_experimental/grad_math_ops.py +37 -17
  54. mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
  55. mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +38 -0
  56. mindspore/ops/function/array_func.py +6 -5
  57. mindspore/ops/function/debug_func.py +1 -1
  58. mindspore/ops/function/linalg_func.py +21 -11
  59. mindspore/ops/function/math_func.py +3 -0
  60. mindspore/ops/function/nn_func.py +13 -11
  61. mindspore/ops/function/parameter_func.py +2 -0
  62. mindspore/ops/function/sparse_unary_func.py +2 -2
  63. mindspore/ops/function/vmap_func.py +1 -0
  64. mindspore/ops/operations/_embedding_cache_ops.py +1 -1
  65. mindspore/ops/operations/_inner_ops.py +56 -1
  66. mindspore/ops/operations/_quant_ops.py +4 -4
  67. mindspore/ops/operations/_rl_inner_ops.py +1 -1
  68. mindspore/ops/operations/array_ops.py +15 -4
  69. mindspore/ops/operations/custom_ops.py +1 -1
  70. mindspore/ops/operations/debug_ops.py +1 -1
  71. mindspore/ops/operations/image_ops.py +3 -3
  72. mindspore/ops/operations/inner_ops.py +49 -0
  73. mindspore/ops/operations/math_ops.py +62 -0
  74. mindspore/ops/operations/nn_ops.py +7 -3
  75. mindspore/ops/operations/random_ops.py +2 -0
  76. mindspore/ops/operations/sparse_ops.py +4 -4
  77. mindspore/ops/silent_check.py +162 -0
  78. mindspore/parallel/__init__.py +3 -2
  79. mindspore/parallel/_auto_parallel_context.py +82 -3
  80. mindspore/parallel/_parallel_serialization.py +34 -2
  81. mindspore/parallel/_tensor.py +3 -1
  82. mindspore/parallel/_transformer/transformer.py +8 -8
  83. mindspore/parallel/checkpoint_transform.py +191 -45
  84. mindspore/profiler/parser/ascend_cluster_generator.py +111 -0
  85. mindspore/profiler/parser/ascend_communicate_generator.py +315 -0
  86. mindspore/profiler/parser/ascend_flops_generator.py +8 -2
  87. mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
  88. mindspore/profiler/parser/ascend_hccl_generator.py +2 -2
  89. mindspore/profiler/parser/ascend_msprof_exporter.py +30 -6
  90. mindspore/profiler/parser/ascend_msprof_generator.py +16 -5
  91. mindspore/profiler/parser/ascend_op_generator.py +15 -7
  92. mindspore/profiler/parser/ascend_timeline_generator.py +5 -2
  93. mindspore/profiler/parser/base_timeline_generator.py +11 -3
  94. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
  95. mindspore/profiler/parser/framework_parser.py +8 -2
  96. mindspore/profiler/parser/memory_usage_parser.py +8 -2
  97. mindspore/profiler/parser/minddata_analyzer.py +8 -2
  98. mindspore/profiler/parser/minddata_parser.py +1 -1
  99. mindspore/profiler/parser/msadvisor_analyzer.py +4 -2
  100. mindspore/profiler/parser/msadvisor_parser.py +9 -3
  101. mindspore/profiler/profiling.py +97 -25
  102. mindspore/rewrite/api/node.py +1 -1
  103. mindspore/rewrite/api/symbol_tree.py +2 -2
  104. mindspore/train/callback/_checkpoint.py +8 -8
  105. mindspore/train/callback/_landscape.py +2 -3
  106. mindspore/train/callback/_summary_collector.py +6 -7
  107. mindspore/train/dataset_helper.py +6 -0
  108. mindspore/train/model.py +17 -5
  109. mindspore/train/serialization.py +6 -1
  110. mindspore/train/summary/_writer_pool.py +1 -1
  111. mindspore/train/summary/summary_record.py +5 -6
  112. mindspore/version.py +1 -1
  113. {mindspore-2.2.11.dist-info → mindspore-2.2.14.dist-info}/METADATA +1 -1
  114. {mindspore-2.2.11.dist-info → mindspore-2.2.14.dist-info}/RECORD +117 -114
  115. mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
  116. {mindspore-2.2.11.dist-info → mindspore-2.2.14.dist-info}/WHEEL +0 -0
  117. {mindspore-2.2.11.dist-info → mindspore-2.2.14.dist-info}/entry_points.txt +0 -0
  118. {mindspore-2.2.11.dist-info → mindspore-2.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,162 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """Silent Check."""
16
+ import os
17
+
18
+ from mindspore.common.tensor import Tensor
19
+ from mindspore.common.parameter import Parameter
20
+ import mindspore.common.dtype as mstype
21
+
22
+ from . import operations
23
+ from .operations._inner_ops import _MirrorSilentCheck
24
+ from .operations import RmsNorm as OriginRmsNorm
25
+ from .operations import LayerNorm as OriginLayerNorm
26
+ from .primitive import Primitive
27
+
28
+
29
+ NPU_ASD_ENABLE = 'NPU_ASD_ENABLE'
30
+
31
+
32
+ class ASDBase:
33
+ """
34
+ ASDBase is the base class of operator with accuracy-sensitive detection feature in python.
35
+
36
+ Args:
37
+ cls (Primitive): Original operator requiring accuracy-sensitive detection feature.
38
+ args (tuple): A variable parameter tuple to the original operator.
39
+ kwargs (dict): A variable parameter dictionary passed the original operator.
40
+
41
+ Supported Platforms:
42
+ ``Ascend``
43
+
44
+ Examples:
45
+ >>> from mindspore.ops.silent_check import ASDBase
46
+ >>> from mindspore.ops import LayerNorm as OriginLayerNorm
47
+ >>> class LayerNormASD(ASDBase):
48
+ ... def __init__(self, *args, **kwargs):
49
+ ... super().__init__(OriginLayerNorm, *args, **kwargs)
50
+ ... # init parameters for accuracy-sensitive detection by calling the base class method generate_params()
51
+ ... self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
52
+ ...
53
+ ... def __call__(self, input_x, gamma, beta):
54
+ ... if self.enable_check:
55
+ ... # execute accuracy-sensitive detection by calling the check_op of base class
56
+ ... input_x = self.check_op(
57
+ ... input_x, self.pre_val, self.min_val, self.max_val, self.cnt, None)
58
+ ... self.cnt += 1
59
+ ... # return the result of original operator
60
+ ... return self.op(input_x, gamma, beta)
61
+ """
62
+ _index = 0
63
+ __ms_class__ = True
64
+
65
+ def __init__(self, cls, *args, **kwargs):
66
+ self.op = cls(*args, **kwargs)
67
+ self.check_op = _MirrorSilentCheck()
68
+ self._suffix = "ASD_" + cls.__name__
69
+ primitive_attr = dir(Primitive)
70
+ self._op_attr_dict = {
71
+ name for name in primitive_attr if not name.startswith("_")}
72
+ self.enable_check = os.environ.get(NPU_ASD_ENABLE) == "1"
73
+
74
+ def __getattr__(self, name):
75
+ def method_wrapper(*args, **kwargs):
76
+ out = getattr(self.op, name)(*args, **kwargs)
77
+ if out is self.op:
78
+ return self
79
+ return out
80
+
81
+ if name in self._op_attr_dict:
82
+ if callable(getattr(self.op, name)):
83
+ return method_wrapper
84
+ if hasattr(self.op, name):
85
+ return getattr(self.op, name)
86
+ return super().__getattr__(self, name)
87
+
88
+ def __repr__(self):
89
+ return self.op.__repr__()
90
+
91
+ def generate_params(self):
92
+ """
93
+ Generate support params for accuracy-sensitive detection.
94
+
95
+ Returns:
96
+ tuple consisting of four elements.
97
+ The derived class initializes the parameters required for accuracy-sensitive detection by calling
98
+ this function.
99
+
100
+ Examples:
101
+ >>> from mindspore.ops.silent_check import ASDBase
102
+ >>> from mindspore.ops import LayerNorm as OriginLayerNorm
103
+ >>> class LayerNormASD(ASDBase):
104
+ ... def __init__(self, *args, **kwargs):
105
+ ... super().__init__(OriginLayerNorm, *args, **kwargs)
106
+ ... # init parameters for accuracy-sensitive detection by calling the base class function
107
+ ... self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
108
+ """
109
+ pre_val = Parameter(Tensor(0, mstype.float32),
110
+ name=f"{self._suffix}_pre_val_{self._index}",
111
+ requires_grad=False)
112
+ min_val = Parameter(Tensor(0, mstype.float32),
113
+ name=f"{self._suffix}_min_val_{self._index}",
114
+ requires_grad=False)
115
+ max_val = Parameter(Tensor(0, mstype.float32),
116
+ name=f"{self._suffix}_max_val_{self._index}",
117
+ requires_grad=False)
118
+ cnt = Parameter(Tensor(0, mstype.int32),
119
+ name=f"{self._suffix}_cnt_{self._index}",
120
+ requires_grad=False)
121
+ ASDBase._index += 1
122
+ return pre_val, min_val, max_val, cnt
123
+
124
+
125
+ class RmsNormASD(ASDBase):
126
+ """
127
+ RmsNorm with ASD.
128
+ """
129
+
130
+ def __init__(self, *args, **kwargs):
131
+ super().__init__(OriginRmsNorm, *args, **kwargs)
132
+ self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
133
+
134
+ def __call__(self, input_x, gamma):
135
+ if self.enable_check:
136
+ input_x = self.check_op(
137
+ input_x, self.pre_val, self.min_val, self.max_val, self.cnt, None)
138
+ self.cnt += 1
139
+ return self.op(input_x, gamma)
140
+
141
+
142
+ class LayerNormASD(ASDBase):
143
+ """
144
+ LayerNorm with ASD.
145
+ """
146
+
147
+ def __init__(self, *args, **kwargs):
148
+ super().__init__(OriginLayerNorm, *args, **kwargs)
149
+ self.pre_val, self.min_val, self.max_val, self.cnt = self.generate_params()
150
+
151
+ def __call__(self, input_x, gamma, beta):
152
+ if self.enable_check:
153
+ input_x = self.check_op(
154
+ input_x, self.pre_val, self.min_val, self.max_val, self.cnt, None)
155
+ self.cnt += 1
156
+ return self.op(input_x, gamma, beta)
157
+
158
+
159
+ def _silent_check():
160
+ if os.environ.get(NPU_ASD_ENABLE) == "1":
161
+ operations.LayerNorm = LayerNormASD
162
+ operations.RmsNorm = RmsNormASD
@@ -18,8 +18,9 @@ from __future__ import absolute_import
18
18
  from mindspore.parallel.algo_parameter_config import get_algo_parameters, reset_algo_parameters, \
19
19
  set_algo_parameters
20
20
  from mindspore.parallel.checkpoint_transform import rank_list_for_transform, transform_checkpoint_by_rank, \
21
- transform_checkpoints, merge_pipeline_strategys
21
+ transform_checkpoints, merge_pipeline_strategys, load_segmented_checkpoints
22
22
  from mindspore.parallel.shard import shard
23
23
 
24
24
  __all__ = ["set_algo_parameters", "reset_algo_parameters", "get_algo_parameters", "rank_list_for_transform",
25
- "transform_checkpoint_by_rank", "transform_checkpoints", "merge_pipeline_strategys", "shard"]
25
+ "transform_checkpoint_by_rank", "transform_checkpoints", "merge_pipeline_strategys", "shard",
26
+ "load_segmented_checkpoints"]
@@ -65,6 +65,19 @@ class _ParallelOptimizerConfig:
65
65
  OPTIMIZER_WEIGHT_SHARD_SIZE = "optimizer_weight_shard_size"
66
66
 
67
67
 
68
+ class _PipelineConfig:
69
+ """
70
+ The key of the Pipeline parallelism.
71
+ """
72
+ PIPELINE_INTERLEAVE = "pipeline_interleave"
73
+ PIPELINE_SCHEDULER = "pipeline_scheduler"
74
+
75
+
76
+ class _PipelineScheduler:
77
+ PIPELINE_1F1B = "1f1b"
78
+ PIPELINE_GPIPE = "gpipe"
79
+
80
+
68
81
  class _AutoParallelContext:
69
82
  """
70
83
  _AutoParallelContext is the environment in which operations are executed
@@ -105,11 +118,11 @@ class _AutoParallelContext:
105
118
  device_num (int): The device number.
106
119
 
107
120
  Raises:
108
- ValueError: If the device num is not in [1, 4096].
121
+ ValueError: If the device num is not a positive integer.
109
122
  """
110
123
  self.check_context_handle()
111
- if device_num < 1 or device_num > 4096:
112
- raise ValueError("The context configuration parameter 'device_num' must be in [1, 4096], "
124
+ if device_num < 1:
125
+ raise ValueError("The context configuration parameter 'device_num' must be a positive integer, "
113
126
  "but got the value of device_num : {}.".format(device_num))
114
127
  from mindspore.communication._comm_helper import _HCCL_TEST_AVAILABLE
115
128
  self._context_handle.set_hccl_test_avaible(_HCCL_TEST_AVAILABLE)
@@ -229,6 +242,16 @@ class _AutoParallelContext:
229
242
  self.check_context_handle()
230
243
  return self._context_handle.get_pipeline_stage_split_num()
231
244
 
245
+ def get_pipeline_interleave(self):
246
+ """Get pipeline interleave flag"""
247
+ self.check_context_handle()
248
+ return self._context_handle.get_pipeline_interleave()
249
+
250
+ def get_pipeline_scheduler(self):
251
+ """Get pipeline scheduler"""
252
+ self.check_context_handle()
253
+ return self._context_handle.get_pipeline_scheduler()
254
+
232
255
  def set_pipeline_segments(self, segments):
233
256
  """Set the segments of the pipeline"""
234
257
  if isinstance(segments, bool) or not isinstance(segments, int):
@@ -782,6 +805,57 @@ class _AutoParallelContext:
782
805
  self.check_context_handle()
783
806
  return self._context_handle.get_enable_fold_pipeline()
784
807
 
808
+ def set_pipeline_config(self, pipeline_config):
809
+ r"""
810
+ Set the configuration for pipeline parallelism. The configuration provides more detailed behavior control about
811
+ parallel training when pipeline parallelism is enabled.
812
+
813
+ Args:
814
+ pipeline_config (dict): The configuration for pipeline parallelism. It supports following keys:
815
+
816
+ - pipeline_interleave(bool): Setting true enable interleave scheduler for pipeline parallelism. This
817
+ scheduler requires more memory but less bubble.
818
+ - pipeline_scheduler(string): There are two choices, "1f1b" and "gpipe". default is "1f1b"
819
+
820
+ - 1f1b: It requires less memory and bubble ratio, for it run backward pass when corresponding forward pass
821
+ finished.
822
+ - gpipe: It requires more memory and bubble ratio, for it run backward pass after all forward pass
823
+ finished.
824
+
825
+ Raises:
826
+ TypeError: If the type of `pipeline_config` is not `dict`.
827
+ ValueError: If the key in `pipeline_config` not in ["pipeline_interleave", "pipeline_scheduler"].
828
+ ValueError: If pipeline interleave is False, pipeline scheduler is not `1f1b`.
829
+ """
830
+ self.check_context_handle()
831
+
832
+ if not isinstance(pipeline_config, dict):
833
+ raise TypeError("For 'set_pipeline_config', the argument 'pipeine_config' "
834
+ "must be dict, but got the type : {}.".format(type(pipeline_config)))
835
+
836
+ pp_interleave = _PipelineConfig.PIPELINE_INTERLEAVE
837
+ pp_scheduler = _PipelineConfig.PIPELINE_SCHEDULER
838
+
839
+ for config_name in pipeline_config:
840
+ unknown_config = []
841
+ if config_name not in [pp_interleave, pp_scheduler]:
842
+ unknown_config.append(config_name)
843
+
844
+ if unknown_config:
845
+ raise ValueError("Unknown config: {}".format(unknown_config))
846
+
847
+ Validator.check_bool(
848
+ pipeline_config[pp_interleave], pp_interleave, pp_interleave)
849
+ self._context_handle.set_pipeline_interleave(
850
+ pipeline_config[pp_interleave])
851
+
852
+ Validator.check_string(pipeline_config[pp_scheduler], [_PipelineScheduler.PIPELINE_1F1B,
853
+ _PipelineScheduler.PIPELINE_GPIPE])
854
+ if not pipeline_config[pp_interleave] and pipeline_config[pp_scheduler] != _PipelineScheduler.PIPELINE_1F1B:
855
+ raise ValueError(f"When pipeline_interleave is False, {pp_scheduler} is not supported")
856
+
857
+ self._context_handle.set_pipeline_scheduler(pipeline_config[pp_scheduler])
858
+
785
859
  def get_enable_parallel_optimizer(self):
786
860
  """Get parallel optimizer flag."""
787
861
  self.check_context_handle()
@@ -1068,6 +1142,7 @@ class _AutoParallelContext:
1068
1142
  self.set_enable_all_gather_fusion(openstate)
1069
1143
  self.set_enable_reduce_scatter_fusion(openstate)
1070
1144
 
1145
+
1071
1146
  def _set_ops_strategy_json_config(type="SAVE", path="", mode="all"):
1072
1147
  """
1073
1148
  Set strategy json configuration.
@@ -1091,6 +1166,7 @@ def _set_ops_strategy_json_config(type="SAVE", path="", mode="all"):
1091
1166
  else:
1092
1167
  raise KeyError("Type must be 'SAVE' or 'LOAD' and mode must be 'all' or 'principal'")
1093
1168
 
1169
+
1094
1170
  _AUTO_PARALLEL_CONTEXT = None
1095
1171
 
1096
1172
 
@@ -1126,6 +1202,7 @@ _set_auto_parallel_context_func_map = {
1126
1202
  "dataset_strategy": auto_parallel_context().set_dataset_strategy,
1127
1203
  "enable_parallel_optimizer": auto_parallel_context().set_enable_parallel_optimizer,
1128
1204
  "parallel_optimizer_config": auto_parallel_context().set_parallel_optimizer_config,
1205
+ "pipeline_config": auto_parallel_context().set_pipeline_config,
1129
1206
  "grad_accumulation_step": auto_parallel_context().set_grad_accumulation_step,
1130
1207
  "all_reduce_fusion_config": auto_parallel_context().set_all_reduce_fusion_split_indices,
1131
1208
  "communi_parallel_mode": auto_parallel_context().set_communi_parallel_mode,
@@ -1143,6 +1220,8 @@ _get_auto_parallel_context_func_map = {
1143
1220
  "gradient_fp32_sync": auto_parallel_context().get_gradient_fp32_sync,
1144
1221
  "loss_repeated_mean": auto_parallel_context().get_loss_repeated_mean,
1145
1222
  "pipeline_stages": auto_parallel_context().get_pipeline_stages,
1223
+ "pipeline_interleave": auto_parallel_context().get_pipeline_interleave,
1224
+ "pipeline_scheduler": auto_parallel_context().get_pipeline_scheduler,
1146
1225
  "parallel_mode": auto_parallel_context().get_parallel_mode,
1147
1226
  "search_mode": auto_parallel_context().get_strategy_search_mode,
1148
1227
  "auto_parallel_search_mode": auto_parallel_context().get_auto_parallel_search_mode,
@@ -243,6 +243,33 @@ def _extract_pipeline_stage_num(strategy_file):
243
243
  return pipeline_stage_num
244
244
 
245
245
 
246
+ def _extract_src_dst_layout_map_by_src(src_strategy_file=None, dst_strategy_file=None):
247
+ """Extract strategy list by src strategy"""
248
+ src_layout_map = _extract_layout_map(src_strategy_file)
249
+ dst_layout_map = _extract_layout_map(dst_strategy_file)
250
+ if dst_layout_map is None:
251
+ return src_layout_map, dst_layout_map
252
+ for param_name in list(dst_layout_map.keys()):
253
+ if param_name in src_layout_map.keys():
254
+ continue
255
+ dst_layout_map.pop(param_name)
256
+ stage_id = 0
257
+ if src_strategy_file[-5:] == ".json":
258
+ with open(src_strategy_file, 'r') as f:
259
+ json_content = json.load(f)
260
+ strategy_items = json_content.get("parallel_strategy_item")
261
+ if not strategy_items:
262
+ raise ValueError("The strategy file {} if empty.".format(src_strategy_file))
263
+ stage_id = strategy_items.get(list(strategy_items.keys())[0]).get('stage')
264
+ else:
265
+ src_parallel_strategy_map = _load_protobuf_strategy(src_strategy_file)
266
+ strategy_items = src_parallel_strategy_map.parallel_strategy_item
267
+ if not strategy_items:
268
+ raise ValueError("The strategy file {} if empty.".format(src_strategy_file))
269
+ stage_id = strategy_items[0].parallel_strategys.stage
270
+ return src_layout_map, dst_layout_map, stage_id
271
+
272
+
246
273
  def _extract_src_dst_layout_map(rank_id, src_strategy_file=None, dst_strategy_file=None):
247
274
  """Extract strategy list"""
248
275
  src_layout_map = _extract_layout_map(src_strategy_file)
@@ -341,6 +368,7 @@ def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, s
341
368
  Transform model parallel dimension for distributed checkpoint files.
342
369
  """
343
370
  transform_param_dict = {}
371
+ device_num = -1
344
372
  for param_name, _ in param_total_dict.items():
345
373
  tensor_shape = list(param_total_dict[param_name].values())[0].shape
346
374
  from_dev_matrix = [1]
@@ -394,14 +422,18 @@ def _transform_parallel_checkpoint(rank_id, param_total_dict, param_attr_dict, s
394
422
  to_info_tuple = (to_opt_shard_size, to_dev_matrix_origin, to_tensor_map_origin, origin_tensor_shape)
395
423
  _insert_opt_shard_reshape(param_rank_map, from_info_tuple, to_info_tuple)
396
424
  transform_operator_stack = _generate_transform_operator_stack(param_rank_map, rank_id)
397
- _apply_tensor_transform_operators(transform_operator_stack, param_total_dict[param_name], device_num)
398
- transform_tensor = ms.Tensor(param_total_dict[param_name][rank_id % device_num])
425
+ param_total_dict_copy = param_total_dict[param_name].copy()
426
+ _apply_tensor_transform_operators(transform_operator_stack, param_total_dict_copy, device_num)
427
+ transform_tensor = ms.Tensor(param_total_dict_copy[rank_id % device_num])
399
428
  requires_grad = param_attr_dict[param_name][rank_id % device_num][0]
400
429
  layerwise_parallel = param_attr_dict[param_name][rank_id % device_num][1]
401
430
  transform_para = ms.Parameter(transform_tensor, param_name, requires_grad, layerwise_parallel)
402
431
  if param_type_dict[param_name][rank_id % device_num] == "BFloat16":
403
432
  transform_para.set_dtype(ms.bfloat16)
404
433
  transform_param_dict[param_name] = transform_para
434
+ if device_num < 0:
435
+ raise ValueError("None of the parameters in checkpoint file are in either src strategy or "
436
+ "dst strategy. Please check correctness of strategy files.")
405
437
 
406
438
  # Handle those parameter like learning_rate, global_step which not in strategy_file.
407
439
  for param_name, _ in param_total_dict.items():
@@ -223,7 +223,9 @@ def _load_tensor(tensor, dev_mat, tensor_map, rank_id=-1):
223
223
  tensor_strategy = _get_tensor_strategy(dev_mat, tensor_map)
224
224
  tensor_slice_index = _get_tensor_slice_index(dev_mat, tensor_strategy, tensor_map, rank)
225
225
  if tensor.dtype == mstype.bfloat16:
226
- tensor = tensor.float()
226
+ from mindspore.ops.operations import Cast
227
+ cpu_cast = Cast().set_device("CPU")
228
+ tensor = cpu_cast(tensor, mstype.float32)
227
229
  np_tensor = tensor.asnumpy()
228
230
  np_tensor_list = _chunk_tensor_by_strategy(np_tensor, tensor_strategy)
229
231
  np_tensor_slice = np_tensor_list[int(tensor_slice_index)]
@@ -805,14 +805,14 @@ class MultiHeadAttention(Cell):
805
805
  - **attention_mask** (Tensor) - If the use_past is False or is_first_iteration=True, the attention mask
806
806
  matrix should ba (batch_size, src_seq_length, tgt_seq_length), or None. None means there will be no mask
807
807
  in softmax computation. Otherwise, the mask must be (batch_size, 1, tgt_seq_length)
808
- - **key_past** (Tensor) - Float16 tensor with shape (batch_size, num_heads, size_per_head, tgt_seq_length).
808
+ - **key_past** (Tensor) - float16 tensor with shape (batch_size, num_heads, size_per_head, tgt_seq_length).
809
809
  The past calculated key vector. Used for incremental prediction when the use_past is True.
810
810
  Default None.
811
- - **value_past** (Tensor) - Float16 tensor with shape
811
+ - **value_past** (Tensor) - float16 tensor with shape
812
812
  (batch_size, num_heads, tgt_seq_length, size_per_head).
813
813
  The past calculated value vector. Used for incremental prediction when the use_past is True.
814
814
  Default None.
815
- - **batch_valid_length** (Tensor) - Int32 tensor with shape (batch_size,) the past calculated the index.
815
+ - **batch_valid_length** (Tensor) - int32 tensor with shape (batch_size,) the past calculated the index.
816
816
  Used for incremental prediction when the use_past is True. Default None.
817
817
 
818
818
  Outputs:
@@ -1412,7 +1412,7 @@ class TransformerEncoderLayer(Cell):
1412
1412
  be no mask in softmax computation. Otherwise, should be [batch_size, 1, hidden_size]
1413
1413
  - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
1414
1414
  past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
1415
- - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
1415
+ - **batch_valid_length** (Tensor) - int32 tensor with shape [batch_size] the past calculated the index.
1416
1416
  Used for incremental prediction when the use_past is True. Default None.
1417
1417
 
1418
1418
  Outputs:
@@ -1824,7 +1824,7 @@ class TransformerDecoderLayer(Cell):
1824
1824
  means there will be no mask in softmax computation in cross attention. Default None.
1825
1825
  - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
1826
1826
  past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
1827
- - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
1827
+ - **batch_valid_length** (Tensor) - int32 tensor with shape [batch_size] the past calculated the index.
1828
1828
  Used for incremental prediction when the use_past is True. Default None.
1829
1829
 
1830
1830
  Outputs:
@@ -2333,7 +2333,7 @@ class TransformerEncoder(Cell):
2333
2333
  be no mask in softmax computation. Otherwise, should be [batch_size, 1, hidden_size]
2334
2334
  - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
2335
2335
  past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
2336
- - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
2336
+ - **batch_valid_length** (Tensor) - int32 tensor with shape [batch_size] the past calculated the index.
2337
2337
  Used for incremental prediction when the use_past is True. Default None.
2338
2338
 
2339
2339
  Outputs:
@@ -2589,7 +2589,7 @@ class TransformerDecoder(Cell):
2589
2589
  means there will be no mask in softmax computation in cross attention. Default None.
2590
2590
  - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
2591
2591
  past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
2592
- - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
2592
+ - **batch_valid_length** (Tensor) - int32 tensor with shape [batch_size] the past calculated the index.
2593
2593
  Used for incremental prediction when the use_past is True. Default None.
2594
2594
 
2595
2595
  Outputs:
@@ -2842,7 +2842,7 @@ class Transformer(Cell):
2842
2842
  seq_length, hidden_size], this should be none if the decoder layer is 0 or the user wants no mask.
2843
2843
  - **init_reset** (Tensor) - A bool tensor with shape [1], used to clear the past key parameter and
2844
2844
  past value parameter used in the incremental prediction. Only valid when use_past is True. Default True.
2845
- - **batch_valid_length** (Tensor) - Int32 tensor with shape [batch_size] the past calculated the index.
2845
+ - **batch_valid_length** (Tensor) - int32 tensor with shape [batch_size] the past calculated the index.
2846
2846
  Used for incremental prediction when the use_past is True. Default None.
2847
2847
 
2848
2848
  Outputs: