mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.1__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (275) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +3 -1
  3. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +50 -9
  7. mindspore/_extends/parse/compile_config.py +41 -0
  8. mindspore/_extends/parse/parser.py +9 -7
  9. mindspore/_extends/parse/standard_method.py +52 -14
  10. mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
  11. mindspore/amp.py +24 -10
  12. mindspore/common/__init__.py +6 -4
  13. mindspore/common/_pijit_context.py +190 -0
  14. mindspore/common/_register_for_tensor.py +2 -1
  15. mindspore/common/_tensor_overload.py +139 -0
  16. mindspore/common/api.py +102 -87
  17. mindspore/common/dump.py +5 -6
  18. mindspore/common/generator.py +1 -7
  19. mindspore/common/hook_handle.py +14 -26
  20. mindspore/common/initializer.py +51 -15
  21. mindspore/common/mindir_util.py +2 -2
  22. mindspore/common/parameter.py +62 -15
  23. mindspore/common/recompute.py +39 -9
  24. mindspore/common/sparse_tensor.py +7 -3
  25. mindspore/common/tensor.py +183 -37
  26. mindspore/communication/__init__.py +1 -1
  27. mindspore/communication/_comm_helper.py +38 -3
  28. mindspore/communication/comm_func.py +315 -60
  29. mindspore/communication/management.py +14 -14
  30. mindspore/context.py +132 -22
  31. mindspore/dataset/__init__.py +1 -1
  32. mindspore/dataset/audio/__init__.py +1 -1
  33. mindspore/dataset/core/config.py +7 -0
  34. mindspore/dataset/core/validator_helpers.py +7 -0
  35. mindspore/dataset/engine/cache_client.py +1 -1
  36. mindspore/dataset/engine/datasets.py +72 -44
  37. mindspore/dataset/engine/datasets_audio.py +7 -7
  38. mindspore/dataset/engine/datasets_standard_format.py +53 -3
  39. mindspore/dataset/engine/datasets_text.py +20 -20
  40. mindspore/dataset/engine/datasets_user_defined.py +174 -104
  41. mindspore/dataset/engine/datasets_vision.py +33 -33
  42. mindspore/dataset/engine/iterators.py +29 -0
  43. mindspore/dataset/engine/obs/util.py +7 -0
  44. mindspore/dataset/engine/queue.py +114 -60
  45. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  46. mindspore/dataset/engine/validators.py +34 -14
  47. mindspore/dataset/text/__init__.py +1 -4
  48. mindspore/dataset/transforms/__init__.py +0 -3
  49. mindspore/dataset/utils/line_reader.py +2 -0
  50. mindspore/dataset/vision/__init__.py +1 -4
  51. mindspore/dataset/vision/utils.py +1 -1
  52. mindspore/dataset/vision/validators.py +2 -1
  53. mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
  54. mindspore/experimental/es/embedding_service.py +883 -0
  55. mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
  56. mindspore/experimental/llm_boost/__init__.py +21 -0
  57. mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
  58. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  59. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  60. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  61. mindspore/experimental/llm_boost/register.py +129 -0
  62. mindspore/experimental/llm_boost/utils.py +31 -0
  63. mindspore/experimental/optim/adamw.py +85 -0
  64. mindspore/experimental/optim/optimizer.py +3 -0
  65. mindspore/hal/__init__.py +3 -3
  66. mindspore/hal/contiguous_tensors_handle.py +175 -0
  67. mindspore/hal/stream.py +18 -0
  68. mindspore/include/api/model_group.h +13 -1
  69. mindspore/include/api/types.h +10 -10
  70. mindspore/include/dataset/config.h +2 -2
  71. mindspore/include/dataset/constants.h +2 -2
  72. mindspore/include/dataset/execute.h +2 -2
  73. mindspore/include/dataset/vision.h +4 -0
  74. mindspore/log.py +1 -1
  75. mindspore/mindrecord/filewriter.py +68 -51
  76. mindspore/mindspore_backend.dll +0 -0
  77. mindspore/mindspore_common.dll +0 -0
  78. mindspore/mindspore_core.dll +0 -0
  79. mindspore/mindspore_np_dtype.dll +0 -0
  80. mindspore/mindspore_ops.dll +0 -0
  81. mindspore/mint/__init__.py +983 -46
  82. mindspore/mint/distributed/__init__.py +31 -0
  83. mindspore/mint/distributed/distributed.py +254 -0
  84. mindspore/mint/nn/__init__.py +268 -23
  85. mindspore/mint/nn/functional.py +125 -19
  86. mindspore/mint/nn/layer/__init__.py +39 -0
  87. mindspore/mint/nn/layer/activation.py +133 -0
  88. mindspore/mint/nn/layer/normalization.py +477 -0
  89. mindspore/mint/nn/layer/pooling.py +110 -0
  90. mindspore/mint/optim/adamw.py +26 -13
  91. mindspore/mint/special/__init__.py +63 -0
  92. mindspore/multiprocessing/__init__.py +2 -1
  93. mindspore/nn/__init__.py +0 -1
  94. mindspore/nn/cell.py +276 -96
  95. mindspore/nn/layer/activation.py +211 -44
  96. mindspore/nn/layer/basic.py +137 -10
  97. mindspore/nn/layer/embedding.py +137 -2
  98. mindspore/nn/layer/normalization.py +101 -5
  99. mindspore/nn/layer/padding.py +34 -48
  100. mindspore/nn/layer/pooling.py +161 -7
  101. mindspore/nn/layer/transformer.py +3 -3
  102. mindspore/nn/loss/__init__.py +2 -2
  103. mindspore/nn/loss/loss.py +84 -6
  104. mindspore/nn/optim/__init__.py +2 -1
  105. mindspore/nn/optim/adadelta.py +1 -1
  106. mindspore/nn/optim/adam.py +1 -1
  107. mindspore/nn/optim/lamb.py +1 -1
  108. mindspore/nn/optim/tft_wrapper.py +124 -0
  109. mindspore/nn/wrap/cell_wrapper.py +12 -23
  110. mindspore/nn/wrap/grad_reducer.py +5 -5
  111. mindspore/nn/wrap/loss_scale.py +17 -3
  112. mindspore/numpy/__init__.py +1 -1
  113. mindspore/numpy/array_creations.py +65 -68
  114. mindspore/numpy/array_ops.py +64 -60
  115. mindspore/numpy/fft.py +610 -75
  116. mindspore/numpy/logic_ops.py +11 -10
  117. mindspore/numpy/math_ops.py +85 -84
  118. mindspore/numpy/utils_const.py +4 -4
  119. mindspore/opencv_core452.dll +0 -0
  120. mindspore/opencv_imgcodecs452.dll +0 -0
  121. mindspore/opencv_imgproc452.dll +0 -0
  122. mindspore/ops/__init__.py +6 -4
  123. mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
  124. mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
  125. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
  126. mindspore/ops/_vmap/vmap_array_ops.py +2 -4
  127. mindspore/ops/_vmap/vmap_math_ops.py +17 -1
  128. mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
  129. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
  130. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
  131. mindspore/ops/auto_generate/gen_extend_func.py +767 -13
  132. mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
  133. mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
  134. mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
  135. mindspore/ops/composite/base.py +85 -48
  136. mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
  137. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
  138. mindspore/ops/function/__init__.py +22 -0
  139. mindspore/ops/function/array_func.py +492 -153
  140. mindspore/ops/function/debug_func.py +113 -1
  141. mindspore/ops/function/fft_func.py +15 -2
  142. mindspore/ops/function/grad/grad_func.py +3 -2
  143. mindspore/ops/function/math_func.py +564 -207
  144. mindspore/ops/function/nn_func.py +817 -383
  145. mindspore/ops/function/other_func.py +3 -2
  146. mindspore/ops/function/random_func.py +402 -12
  147. mindspore/ops/function/reshard_func.py +13 -11
  148. mindspore/ops/function/sparse_unary_func.py +1 -1
  149. mindspore/ops/function/vmap_func.py +3 -2
  150. mindspore/ops/functional.py +24 -14
  151. mindspore/ops/op_info_register.py +3 -3
  152. mindspore/ops/operations/__init__.py +7 -2
  153. mindspore/ops/operations/_grad_ops.py +2 -76
  154. mindspore/ops/operations/_infer_ops.py +1 -1
  155. mindspore/ops/operations/_inner_ops.py +71 -94
  156. mindspore/ops/operations/array_ops.py +14 -146
  157. mindspore/ops/operations/comm_ops.py +63 -53
  158. mindspore/ops/operations/custom_ops.py +83 -19
  159. mindspore/ops/operations/debug_ops.py +42 -10
  160. mindspore/ops/operations/manually_defined/_inner.py +12 -0
  161. mindspore/ops/operations/manually_defined/ops_def.py +273 -20
  162. mindspore/ops/operations/math_ops.py +12 -223
  163. mindspore/ops/operations/nn_ops.py +20 -114
  164. mindspore/ops/operations/other_ops.py +7 -4
  165. mindspore/ops/operations/random_ops.py +46 -1
  166. mindspore/ops/primitive.py +18 -6
  167. mindspore/ops_generate/arg_dtype_cast.py +2 -0
  168. mindspore/ops_generate/gen_aclnn_implement.py +11 -11
  169. mindspore/ops_generate/gen_constants.py +36 -0
  170. mindspore/ops_generate/gen_ops.py +67 -52
  171. mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
  172. mindspore/ops_generate/gen_pyboost_func.py +131 -47
  173. mindspore/ops_generate/op_proto.py +10 -3
  174. mindspore/ops_generate/pyboost_utils.py +14 -1
  175. mindspore/ops_generate/template.py +43 -21
  176. mindspore/parallel/__init__.py +3 -1
  177. mindspore/parallel/_auto_parallel_context.py +31 -9
  178. mindspore/parallel/_cell_wrapper.py +85 -0
  179. mindspore/parallel/_parallel_serialization.py +47 -19
  180. mindspore/parallel/_tensor.py +127 -13
  181. mindspore/parallel/_utils.py +53 -22
  182. mindspore/parallel/algo_parameter_config.py +5 -5
  183. mindspore/parallel/checkpoint_transform.py +46 -39
  184. mindspore/parallel/cluster/process_entity/__init__.py +1 -1
  185. mindspore/parallel/cluster/process_entity/_api.py +31 -23
  186. mindspore/parallel/cluster/process_entity/_utils.py +2 -27
  187. mindspore/parallel/parameter_broadcast.py +3 -4
  188. mindspore/parallel/shard.py +162 -31
  189. mindspore/parallel/transform_safetensors.py +1146 -0
  190. mindspore/profiler/__init__.py +2 -1
  191. mindspore/profiler/common/constant.py +29 -0
  192. mindspore/profiler/common/registry.py +47 -0
  193. mindspore/profiler/common/util.py +28 -0
  194. mindspore/profiler/dynamic_profiler.py +694 -0
  195. mindspore/profiler/envprofiling.py +17 -19
  196. mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
  197. mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
  198. mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
  199. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
  200. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
  201. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
  202. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  203. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
  204. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
  205. mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
  206. mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
  207. mindspore/profiler/parser/base_timeline_generator.py +19 -25
  208. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  209. mindspore/profiler/parser/framework_parser.py +1 -391
  210. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  211. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  212. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  213. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  214. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  215. mindspore/profiler/parser/profiler_info.py +78 -6
  216. mindspore/profiler/profiler.py +153 -0
  217. mindspore/profiler/profiling.py +285 -413
  218. mindspore/rewrite/__init__.py +1 -2
  219. mindspore/rewrite/common/namespace.py +4 -4
  220. mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
  221. mindspore/run_check/_check_version.py +39 -104
  222. mindspore/safeguard/rewrite_obfuscation.py +591 -247
  223. mindspore/train/__init__.py +4 -3
  224. mindspore/train/_utils.py +105 -19
  225. mindspore/train/amp.py +171 -53
  226. mindspore/train/callback/__init__.py +2 -2
  227. mindspore/train/callback/_callback.py +4 -4
  228. mindspore/train/callback/_checkpoint.py +97 -31
  229. mindspore/train/callback/_cluster_monitor.py +1 -1
  230. mindspore/train/callback/_flops_collector.py +1 -0
  231. mindspore/train/callback/_loss_monitor.py +3 -3
  232. mindspore/train/callback/_on_request_exit.py +145 -31
  233. mindspore/train/callback/_summary_collector.py +5 -5
  234. mindspore/train/callback/_tft_register.py +375 -0
  235. mindspore/train/dataset_helper.py +15 -3
  236. mindspore/train/metrics/metric.py +3 -3
  237. mindspore/train/metrics/roc.py +4 -4
  238. mindspore/train/mind_ir_pb2.py +44 -39
  239. mindspore/train/model.py +154 -58
  240. mindspore/train/serialization.py +342 -128
  241. mindspore/utils/__init__.py +21 -0
  242. mindspore/utils/utils.py +60 -0
  243. mindspore/version.py +1 -1
  244. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
  245. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +248 -242
  246. mindspore/include/c_api/ms/abstract.h +0 -67
  247. mindspore/include/c_api/ms/attribute.h +0 -197
  248. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  249. mindspore/include/c_api/ms/base/macros.h +0 -32
  250. mindspore/include/c_api/ms/base/status.h +0 -33
  251. mindspore/include/c_api/ms/base/types.h +0 -283
  252. mindspore/include/c_api/ms/context.h +0 -102
  253. mindspore/include/c_api/ms/graph.h +0 -160
  254. mindspore/include/c_api/ms/node.h +0 -606
  255. mindspore/include/c_api/ms/tensor.h +0 -161
  256. mindspore/include/c_api/ms/value.h +0 -84
  257. mindspore/mindspore_shared_lib.dll +0 -0
  258. mindspore/nn/extend/basic.py +0 -140
  259. mindspore/nn/extend/embedding.py +0 -143
  260. mindspore/nn/extend/layer/normalization.py +0 -109
  261. mindspore/nn/extend/pooling.py +0 -117
  262. mindspore/nn/layer/embedding_service.py +0 -531
  263. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  264. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  265. mindspore/ops/extend/__init__.py +0 -53
  266. mindspore/ops/extend/array_func.py +0 -218
  267. mindspore/ops/extend/math_func.py +0 -76
  268. mindspore/ops/extend/nn_func.py +0 -308
  269. mindspore/ops/silent_check.py +0 -162
  270. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  271. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  272. mindspore/train/callback/_mindio_ttp.py +0 -443
  273. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +0 -0
  274. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
  275. {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
@@ -102,7 +102,8 @@ def _set_envs():
102
102
  os.environ["RANK_ID"] = str(get_rank())
103
103
  if os.getenv("RANK_SIZE") is None:
104
104
  os.environ["RANK_SIZE"] = str(get_group_size())
105
- os.environ["DEVICE_ID"] = str(get_local_rank())
105
+ if os.getenv("DEVICE_ID") is None:
106
+ os.environ["DEVICE_ID"] = str(get_local_rank())
106
107
 
107
108
 
108
109
  def init(backend_name=None):
@@ -140,7 +141,7 @@ def init(backend_name=None):
140
141
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
141
142
  without any third-party or configuration file dependencies.
142
143
  Please see the `msrun start up
143
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
144
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
144
145
  for more details.
145
146
 
146
147
  >>> from mindspore.communication import init
@@ -165,6 +166,7 @@ def init(backend_name=None):
165
166
  if os.getenv("MS_ROLE") == "MS_SCHED":
166
167
  backend_name = "mccl"
167
168
 
169
+ _set_elegant_exit_handle()
168
170
  if backend_name == "hccl":
169
171
  if _is_ps_mode():
170
172
  # Use MindSpore cluster to build network for Parameter Server training.
@@ -173,7 +175,6 @@ def init(backend_name=None):
173
175
  raise RuntimeError("Parameter server and scheduler should use 'CPU' as backend instead of 'Ascend'")
174
176
  if _get_ps_context("worker_num") == 1:
175
177
  GlobalComm.INITED = True
176
- _set_elegant_exit_handle()
177
178
  return
178
179
  if device_target != "Ascend":
179
180
  raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', "
@@ -203,7 +204,6 @@ def init(backend_name=None):
203
204
  "but got 'backend_name' : {}".format(backend_name))
204
205
 
205
206
  GlobalComm.INITED = True
206
- _set_elegant_exit_handle()
207
207
  _set_envs()
208
208
 
209
209
 
@@ -227,7 +227,7 @@ def release():
227
227
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
228
228
  without any third-party or configuration file dependencies.
229
229
  Please see the `msrun start up
230
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
230
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
231
231
  for more details.
232
232
 
233
233
  >>> from mindspore.communication import init, release
@@ -266,7 +266,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
266
266
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
267
267
  without any third-party or configuration file dependencies.
268
268
  Please see the `msrun start up
269
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
269
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
270
270
  for more details.
271
271
 
272
272
  >>> from mindspore.communication import init, get_rank
@@ -311,7 +311,7 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
311
311
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
312
312
  without any third-party or configuration file dependencies.
313
313
  Please see the `msrun start up
314
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
314
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
315
315
  for more details.
316
316
 
317
317
  >>> import mindspore as ms
@@ -359,7 +359,7 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
359
359
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
360
360
  without any third-party or configuration file dependencies.
361
361
  Please see the `msrun start up
362
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
362
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
363
363
  for more details.
364
364
 
365
365
  >>> import mindspore as ms
@@ -406,7 +406,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
406
406
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
407
407
  without any third-party or configuration file dependencies.
408
408
  Please see the `msrun start up
409
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
409
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
410
410
  for more details.
411
411
 
412
412
  >>> import mindspore as ms
@@ -456,7 +456,7 @@ def get_world_rank_from_group_rank(group, group_rank_id):
456
456
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
457
457
  without any third-party or configuration file dependencies.
458
458
  Please see the `msrun start up
459
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
459
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
460
460
  for more details.
461
461
 
462
462
  >>> import mindspore as ms
@@ -510,7 +510,7 @@ def get_group_rank_from_world_rank(world_rank_id, group):
510
510
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
511
511
  without any third-party or configuration file dependencies.
512
512
  Please see the `msrun start up
513
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
513
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
514
514
  for more details.
515
515
 
516
516
  >>> import mindspore as ms
@@ -561,7 +561,7 @@ def create_group(group, rank_ids):
561
561
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
562
562
  without any third-party or configuration file dependencies.
563
563
  Please see the `msrun start up
564
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
564
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
565
565
  for more details.
566
566
 
567
567
  >>> import mindspore as ms
@@ -609,7 +609,7 @@ def destroy_group(group):
609
609
  For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
610
610
  without any third-party or configuration file dependencies.
611
611
  Please see the `msrun start up
612
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
612
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
613
613
  for more details.
614
614
 
615
615
  >>> import mindspore as ms
@@ -656,7 +656,7 @@ def get_process_group_ranks(group=GlobalComm.WORLD_COMM_GROUP):
656
656
  without any third-party or configuration file dependencies.
657
657
 
658
658
  Please see the `msrun start up
659
- <https://www.mindspore.cn/tutorials/experts/zh-CN/master/parallel/msrun_launcher.html>`_
659
+ <https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
660
660
  for more details.
661
661
 
662
662
  This example should be run with 4 devices.
mindspore/context.py CHANGED
@@ -35,6 +35,7 @@ from mindspore.parallel._ps_context import _set_ps_context, _get_ps_context, _re
35
35
  _need_reset_device_target_for_ps
36
36
  from mindspore.parallel._offload_context import _set_offload_context, _get_offload_context
37
37
  from mindspore.hal.device import is_initialized
38
+ from mindspore.common import api
38
39
 
39
40
  __all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'STRICT', 'COMPATIBLE', 'LAX', 'set_context', 'get_context',
40
41
  'set_auto_parallel_context', 'get_auto_parallel_context', 'reset_auto_parallel_context', 'ParallelMode',
@@ -68,7 +69,7 @@ def _make_directory(path):
68
69
  if not os.path.exists(path):
69
70
  logger.debug("The directory(%s) doesn't exist, will create it", path)
70
71
  try:
71
- os.makedirs(path)
72
+ os.makedirs(path, mode=0o700)
72
73
  except FileExistsError:
73
74
  logger.debug("The directory(%s) already exist.", path)
74
75
  except PermissionError as e:
@@ -168,7 +169,10 @@ class _Context:
168
169
  self._support_binary = False
169
170
  self.enable_compile_cache = None
170
171
  self._mode = PYNATIVE_MODE
171
- self._jit_config = {}
172
+ self.aoe_config = {}
173
+ self.jit_config = {}
174
+ self.ascend_config = {}
175
+ self.gpu_config = {}
172
176
 
173
177
  def __getattribute__(self, attr):
174
178
  value = object.__getattribute__(self, attr)
@@ -188,7 +192,7 @@ class _Context:
188
192
 
189
193
  def get_jit_config(self):
190
194
  """Get current jit_config."""
191
- return self._jit_config
195
+ return self.jit_config
192
196
 
193
197
  def set_mode(self, mode):
194
198
  """
@@ -248,6 +252,16 @@ class _Context:
248
252
  else:
249
253
  self.set_param(ms_ctx_param.memory_optimize_level, 1)
250
254
 
255
+ def set_exec_order(self, exec_order):
256
+ """
257
+ The execution order mode, support "bfs", "dfs", "gpto".
258
+ """
259
+ exec_order_modes = ["bfs", "dfs", "gpto"]
260
+ if exec_order not in exec_order_modes:
261
+ raise ValueError(f"For 'context.set_context', the argument 'exec_order' must be one of "
262
+ f"{exec_order_modes}, but got {exec_order}.")
263
+ self.set_param(ms_ctx_param.exec_order, exec_order)
264
+
251
265
  def set_memory_offload(self, memory_offload):
252
266
  """
253
267
  Enable memory offload or not, support "ON", "OFF".
@@ -277,6 +291,29 @@ class _Context:
277
291
  f"{deterministic_options}, but got {deterministic}.")
278
292
  self.set_param(ms_ctx_param.deterministic, deterministic)
279
293
 
294
+ hccl_deterministic = os.getenv("HCCL_DETERMINISTIC")
295
+ te_parallel_compiler = os.getenv("TE_PARALLEL_COMPILER")
296
+ if deterministic == "ON":
297
+ if hccl_deterministic and hccl_deterministic != "true":
298
+ logger.warning(f"Environment 'HCCL_DETERMINISTIC' should be 'true' when set deterministic='ON', but "
299
+ f"got '{hccl_deterministic}'. 'HCCL_DETERMINISTIC' will be set to 'true'.")
300
+ if te_parallel_compiler and te_parallel_compiler != "1":
301
+ logger.warning(f"Environment 'TE_PARALLEL_COMPILER' should be '1' when set deterministic='ON', but "
302
+ f"got '{te_parallel_compiler}'. 'TE_PARALLEL_COMPILER' will be set to '1'.")
303
+ os.environ["HCCL_DETERMINISTIC"] = "true"
304
+ os.environ["TE_PARALLEL_COMPILER"] = "1"
305
+ if deterministic == "OFF":
306
+ if hccl_deterministic and hccl_deterministic != "false":
307
+ logger.warning(f"Environment 'HCCL_DETERMINISTIC' should not be set or be 'false' when set "
308
+ f"deterministic='OFF', but got '{hccl_deterministic}'. 'HCCL_DETERMINISTIC' "
309
+ f"will be unset.")
310
+ del os.environ["HCCL_DETERMINISTIC"]
311
+ if te_parallel_compiler and te_parallel_compiler != "0":
312
+ logger.warning(f"Environment 'TE_PARALLEL_COMPILER' should not be set or be '0' when set "
313
+ f"deterministic='OFF', but got '{te_parallel_compiler}'. 'TE_PARALLEL_COMPILER' "
314
+ f"will be unset.")
315
+ del os.environ["TE_PARALLEL_COMPILER"]
316
+
280
317
  def set_ascend_config(self, ascend_config):
281
318
  """
282
319
  Enable ascend config.
@@ -298,6 +335,8 @@ class _Context:
298
335
  - parallel_speed_up_json_path(Union[str, None]): The path to the parallel speed up json file.
299
336
  If its value is None or '', it does not take effect. Default None.
300
337
  - host_scheduling_max_threshold(int): The host scheduling max threshold.
338
+ - hccl_watchdog (bool): Enable a thread to monitor the failure of collective communication.
339
+ Default: ``True`` .
301
340
  """
302
341
  ascend_cfg_modes = {
303
342
  'precision_mode': ["force_fp16", "allow_fp32_to_fp16", "allow_mix_precision", "must_keep_origin_dtype",
@@ -316,6 +355,7 @@ class _Context:
316
355
  'save_checkpoint_steps': (int,),
317
356
  'need_ckpt': (bool,),
318
357
  'last_triggered_step': (int,),
358
+ 'hccl_watchdog': (bool,),
319
359
  'topo_order': (dict,),
320
360
  'op_debug_option': (str, None),
321
361
  }
@@ -335,6 +375,7 @@ class _Context:
335
375
  'save_checkpoint_steps': self._set_save_checkpoint_steps,
336
376
  'need_ckpt': self._set_need_ckpt,
337
377
  'last_triggered_step': self._set_last_triggered_step,
378
+ 'hccl_watchdog': self._set_hccl_watchdog,
338
379
  'topo_order': self._set_topo_order
339
380
  }
340
381
  ascend_cfg_set = tuple(ascend_cfg_modes.keys())
@@ -351,6 +392,7 @@ class _Context:
351
392
  f"{supported_modes}, but got {type(ascend_value)}.")
352
393
  cfg_setter = ascend_cfg_setters.get(ascend_key)
353
394
  cfg_setter(ascend_value)
395
+ self.ascend_config = ascend_config
354
396
 
355
397
  def set_gpu_config(self, gpu_config):
356
398
  """
@@ -392,6 +434,7 @@ class _Context:
392
434
  self.set_param(ms_ctx_param.conv_allow_tf32, gpu_config[gpu_key])
393
435
  if gpu_key == 'matmul_allow_tf32':
394
436
  self.set_param(ms_ctx_param.matmul_allow_tf32, gpu_config[gpu_key])
437
+ self.gpu_config = gpu_config
395
438
 
396
439
  def set_jit_config(self, jit_config):
397
440
  """
@@ -410,12 +453,13 @@ class _Context:
410
453
  f"{jit_cfgs}, but got {jit_key}.")
411
454
  supported_value = jit_cfgs.get(jit_key)
412
455
  if jit_config[jit_key] not in supported_value:
413
- raise ValueError(f"For 'jit_cfgs', the value of argument {jit_key} must be one of "
456
+ raise ValueError(f"For 'jit_config', the value of argument {jit_key} must be one of "
414
457
  f"{supported_value}, but got {jit_config[jit_key]}.")
415
- self._jit_config = jit_config
416
458
  self.set_param(key_args_map[jit_key], jit_config[jit_key])
459
+ self.jit_config = jit_config
417
460
 
418
- if 'infer_boost' in jit_config and jit_config['infer_boost'] == "on" and jit_config['jit_level'] != "O0":
461
+ jit_level = jit_config.get("jit_level", None)
462
+ if jit_config.get("infer_boost", None) == "on" and (jit_level == "O1" or jit_level == "O2"):
419
463
  raise ValueError(f"Only jit_level set O0 can set infer_boost to on.")
420
464
 
421
465
  def set_backend_policy(self, policy):
@@ -488,6 +532,7 @@ class _Context:
488
532
  f"{supported_value}, but got {aoe_config[aoe_config_key]}.")
489
533
  if aoe_config_key == 'job_type':
490
534
  self.set_param(ms_ctx_param.aoe_job_type, aoe_config[aoe_config_key])
535
+ self.aoe_config = aoe_config
491
536
 
492
537
  def set_device_id(self, device_id):
493
538
  if device_id < 0 or device_id > 4095:
@@ -626,6 +671,7 @@ class _Context:
626
671
  'inter_op_parallel_num': set_inter_op_parallel_num,
627
672
  'runtime_num_threads': set_runtime_num_threads,
628
673
  'memory_optimize_level': set_memory_optimize_level,
674
+ 'exec_order': set_exec_order,
629
675
  'op_timeout': set_op_timeout,
630
676
  'memory_offload': set_memory_offload,
631
677
  'deterministic': set_deterministic,
@@ -744,6 +790,12 @@ class _Context:
744
790
  options_str = json.dumps(topo_order)
745
791
  self.set_param(ms_ctx_param.topo_order, options_str)
746
792
 
793
+ def _set_hccl_watchdog(self, flag):
794
+ """set hccl watchdog"""
795
+ if not isinstance(flag, bool):
796
+ raise TypeError(f"For 'ascend_config', the type of 'hccl_watchdog' must be bool, but got {type(flag)}.")
797
+ self.set_param(ms_ctx_param.hccl_watchdog, flag)
798
+
747
799
  def _set_need_ckpt(self, need_ckpt):
748
800
  """Set need ckpt flag"""
749
801
  if not isinstance(need_ckpt, bool):
@@ -772,7 +824,7 @@ class _Context:
772
824
  """"Check and set speedup config for auto parallel."""
773
825
  if speedup_config_path is None or speedup_config_path == "":
774
826
  return
775
- speedup_config_real_path = os.path.abspath(speedup_config_path)
827
+ speedup_config_real_path = os.path.realpath(speedup_config_path)
776
828
  if not os.path.exists(speedup_config_real_path):
777
829
  raise ValueError(f"For 'ascend_config', the path to parallel_speed_up_json: "
778
830
  f"{speedup_config_real_path} does not exist, please check whether the "
@@ -790,10 +842,17 @@ class _Context:
790
842
  "enable_begin_end_inline_opt": (ms_ctx_param.enable_begin_end_inline_opt, bool),
791
843
  "enable_concat_eliminate_opt": (ms_ctx_param.enable_concat_eliminate_opt, bool),
792
844
  "interleaved_layernorm_comm": (ms_ctx_param.interleaved_layernorm_comm, bool),
845
+ "enable_allreduce_slice_to_reducescatter":
846
+ (ms_ctx_param.enable_allreduce_slice_to_reducescatter, bool),
847
+ "enable_interleave_split_concat_branch":
848
+ (ms_ctx_param.enable_interleave_split_concat_branch, bool),
849
+ "enable_offloading_packed_experts": (ms_ctx_param.enable_offloading_packed_experts, bool),
793
850
  "compute_communicate_fusion_level":
794
851
  (ms_ctx_param.compute_communicate_fusion_level, int),
795
852
  "enable_flash_attention_load_balance":
796
- (ms_ctx_param.enable_flash_attention_load_balance, bool)}
853
+ (ms_ctx_param.enable_flash_attention_load_balance, bool),
854
+ "dataset_broadcast_opt_level":
855
+ (ms_ctx_param.dataset_broadcast_opt_level, int)}
797
856
  with open(speedup_config_real_path, 'r') as f:
798
857
  speedup_config = json.load(f)
799
858
  for key, value in speedup_config.items():
@@ -876,6 +935,7 @@ def set_auto_parallel_context(**kwargs):
876
935
  \ strategy_ckpt_config
877
936
  \ group_ckpt_save_file
878
937
  \ auto_pipeline
938
+ \ dump_local_norm
879
939
  =========================== ===========================
880
940
 
881
941
  Args:
@@ -1027,6 +1087,9 @@ def set_auto_parallel_context(**kwargs):
1027
1087
  auto_pipeline (bool): Set the pipeline stage number to automatic. Its value will be selected between 1 and the
1028
1088
  parameter `pipeline_stages`. This option requires the `parallel_mode` to be ``auto_parallel``
1029
1089
  and the `search_mode` to be ``recursive_programming``. Default: ``False`` .
1090
+ dump_local_norm (bool): Whether to dump local_norm value, when the `parallel_mode` is set to
1091
+ ``semi_auto_parallel`` or ``auto_parallel``.
1092
+ Default: ``False`` .
1030
1093
 
1031
1094
  Raises:
1032
1095
  ValueError: If input key is not attribute in auto parallel context.
@@ -1097,11 +1160,12 @@ def reset_auto_parallel_context():
1097
1160
  - strategy_ckpt_save_file: ''.
1098
1161
  - full_batch: False.
1099
1162
  - enable_parallel_optimizer: False.
1100
- - force_fp32_communication: False
1163
+ - force_fp32_communication: False.
1101
1164
  - enable_alltoall: False.
1102
1165
  - pipeline_stages: 1.
1103
1166
  - pipeline_result_broadcast: False.
1104
1167
  - fusion_threshold: 64.
1168
+ - dump_local_norm: False.
1105
1169
  - auto_pipeline: False.
1106
1170
 
1107
1171
  Examples:
@@ -1109,6 +1173,7 @@ def reset_auto_parallel_context():
1109
1173
  >>> ms.reset_auto_parallel_context()
1110
1174
  """
1111
1175
  _reset_auto_parallel_context()
1176
+ api.ms_compile_cache.clear()
1112
1177
 
1113
1178
 
1114
1179
  @args_type_check(offload_config=dict)
@@ -1118,7 +1183,8 @@ def set_offload_context(offload_config):
1118
1183
 
1119
1184
  Note:
1120
1185
  The offload configuration is only used if the memory offload feature is enabled
1121
- via mindspore.set_context(memory_offload="ON").
1186
+ via mindspore.set_context(memory_offload="ON"), and the memory_optimize_level must be set to O0. On the Ascend
1187
+ hardware platform, the graph compilation level must be O0.
1122
1188
 
1123
1189
  Args:
1124
1190
  offload_config (dict): A dict contains the keys and values for setting the offload context
@@ -1311,6 +1377,8 @@ def set_context(**kwargs):
1311
1377
  | | gpu_config | GPU |
1312
1378
  | +------------------------------+----------------------------+
1313
1379
  | | jit_config | CPU/GPU/Ascend |
1380
+ | +------------------------------+----------------------------+
1381
+ | | exec_order | Ascend |
1314
1382
  +-------------------------+------------------------------+----------------------------+
1315
1383
 
1316
1384
  Args:
@@ -1320,12 +1388,14 @@ def set_context(**kwargs):
1320
1388
  If device target is not set, the version of MindSpore package is used.
1321
1389
  max_device_memory (str): Set the maximum memory available for devices. The format is "xxGB".
1322
1390
  Default: ``" 1024GB"`` . The actual used memory size is the minimum of the available memory of the device
1323
- and max_device_memory. 'max_device_memory' should be set before the program runs.
1391
+ and max_device_memory. 'max_device_memory' should be set before the program runs. When virtual memory is
1392
+ enabled, a too small 'max_device_memory' will cause frequent defragmentation, affecting performance.
1324
1393
  variable_memory_max_size (str): This parameter is deprecated, and will be removed in a future version.
1325
1394
  Please use parameter 'max_device_memory' instead.
1326
- mempool_block_size (str): Set the size of the memory pool block in PyNative mode or jit level is 'O0'/'O1'
1327
- for devices. The format is "xxGB". Default: ``"1GB"`` . Minimum size is "1G". The actual used memory block
1328
- size is the minimum of the available memory of the device and mempool_block_size.
1395
+ mempool_block_size (str): It takes effect when virtual memory is turned off, set the size of the memory pool
1396
+ block for devices. The format is "xxGB". Default: ``"1GB"`` . Minimum size is "1G". The actual used memory
1397
+ block size is the minimum of the available memory of the device and mempool_block_size. When there is
1398
+ enough memory, the memory will be expanded by this value.
1329
1399
  op_timeout (int): Set the maximum duration of executing an operator in seconds.
1330
1400
  If the execution time exceeds this value, system will terminate the task.
1331
1401
  0 means endless wait. The defaults for AI Core and AICPU operators vary on different hardware.
@@ -1413,7 +1483,7 @@ def set_context(**kwargs):
1413
1483
  If enable_graph_kernel is set to ``True`` , acceleration can be enabled.
1414
1484
  For details of graph kernel fusion, please check
1415
1485
  `Enabling Graph Kernel Fusion
1416
- <https://www.mindspore.cn/tutorials/experts/en/master/optimize/graph_fusion_engine.html>`_.
1486
+ <https://www.mindspore.cn/docs/en/master/model_train/optimize/graph_fusion_engine.html>`_.
1417
1487
  graph_kernel_flags (str):
1418
1488
  Optimization options of graph kernel fusion, and the priority is higher when it conflicts
1419
1489
  with enable_graph_kernel. Only for experienced users.
@@ -1438,6 +1508,11 @@ def set_context(**kwargs):
1438
1508
  Be caution when using this level.
1439
1509
 
1440
1510
  - dump_as_text: dumps detail info as text files. Default: ``False`` .
1511
+ - enable_cluster_ops: Add user-specified operator to the set of operators involved in fusion. For example,
1512
+ by setting ``--enable_cluster_ops=MatMul``, MatMul operator can be included in the fusion process.
1513
+ - enable_pass/disable_pass: Enable/disable user-specified custom fusion passes. See details in
1514
+ `Custom Fusion Pass
1515
+ <https://www.mindspore.cn/docs/en/master/model_train/custom_program/fusion_pass.html>`_.
1441
1516
 
1442
1517
  enable_reduce_precision (bool): Whether to enable precision reduction.
1443
1518
  If the operator does not support the user-specified precision, the precision will
@@ -1468,6 +1543,7 @@ def set_context(**kwargs):
1468
1543
  if enable_compile_cache is still set to ``True`` and the network scripts are not changed,
1469
1544
  the compile cache is loaded. Note that only limited automatic detection for the changes of
1470
1545
  python scripts is supported by now, which means that there is a correctness risk. Default: ``False`` .
1546
+ Currently, do not support the graph which is larger than 2G after compiled.
1471
1547
  This is an experimental prototype that is subject to change and/or deletion.
1472
1548
  compile_cache_path (str): Path to save the compile cache. Default: ``"."``.
1473
1549
  If the specified directory does not exist, the system will automatically create the directory.
@@ -1477,7 +1553,8 @@ def set_context(**kwargs):
1477
1553
  which means use the default num.
1478
1554
  runtime_num_threads(int): The thread pool number of cpu kernel used in runtime,
1479
1555
  which must bigger than or equal to 0. Default value is ``30`` , if you run many processes at
1480
- the same time, you should set the value smaller to avoid thread contention.
1556
+ the same time, you should set the value smaller to avoid thread contention. If set runtime_num_threads to 1,
1557
+ the runtime asynchronous pipeline capability cannot be enabled, which may affect performance.
1481
1558
  disable_format_transform (bool): Whether to disable the automatic format transform function from NCHW to NHWC.
1482
1559
  When the network training performance of fp16 is worse than fp32, `disable_format_transform` can be set to
1483
1560
  ``True`` to try to improve training performance. Default: ``False`` .
@@ -1588,7 +1665,7 @@ def set_context(**kwargs):
1588
1665
  `LazyInline <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.lazy_inline.html>`
1589
1666
  Default: False.
1590
1667
  - compute_communicate_fusion_level (int): Enable the fusion between compute and communicate.
1591
- Default: ``0``.
1668
+ Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
1592
1669
 
1593
1670
  - 0: Disable fusion.
1594
1671
 
@@ -1597,8 +1674,27 @@ def set_context(**kwargs):
1597
1674
  - 2: Apply fusion to backward nodes.
1598
1675
 
1599
1676
  - 3: Apply fusion to all nodes.
1677
+ - dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
1678
+ support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
1679
+
1680
+ - 0: Disable this optimize.
1681
+
1682
+ - 1: Optimize dataset reader between pipeline stage.
1683
+
1684
+ - 2: Optimize dataset reader within pipeline stage.
1685
+
1686
+ - 3: Optimize dataset reader with all scenes.
1600
1687
  - bias_add_comm_swap (bool): Enable node execution order swap communication operators and add operators
1601
1688
  if ``True``. Only 1-dimension bias node is supported. Default: ``False``.
1689
+ - enable_allreduce_slice_to_reducescatter (bool): Enable allreduce optimization. In the scenario where
1690
+ the batchmatmul model introduces allreduce in parallel, if the subsequent nodes are stridedslice
1691
+ operator with model parallel, allreduce will be optimized as reducescatter according to the identified
1692
+ patterns. Typical used in MoE module with groupwise alltoall. Default: ``False``.
1693
+ - enable_interleave_split_concat_branch (bool): Enable communication computation parallel optimization
1694
+ for branches formed by split and concat operators with ``enable_interleave`` attribute. It is typical
1695
+ used in MoE parallel scenario. After splitting the input data, each slice of data is processed by the
1696
+ MoE module, and then the branch results are concatenated. When the optimization is enable,
1697
+ communication and computation will be executed in parallel between branches. Default: ``False``.
1602
1698
  - host_scheduling_max_threshold(int): The max threshold to control whether the dynamic shape process is
1603
1699
  used when run the static graph, the default value is 0. When the number of operations in the static graph
1604
1700
  is less than the max threshold, this graph will be executed in dynamic shape process. In large model
@@ -1698,12 +1794,13 @@ def set_context(**kwargs):
1698
1794
 
1699
1795
  - jit_level (str): Used to control the compilation optimization level. Default: ``""`` , The framework
1700
1796
  automatically selects the execution method based on product, Altas training product is O2, and all other
1701
- products are O0. The value range is as follows:
1797
+ products are O0. In addition, The option of the dynamic shape must be O0 or O1, O2 is not supported.
1798
+ The value range is as follows:
1702
1799
 
1703
1800
  - ``"O0"``: Except for optimizations that may affect functionality, all other optimizations are turned
1704
1801
  off, adopt KernelByKernel execution mode.
1705
1802
  - ``"O1"``: Using commonly used optimizations and automatic operator fusion optimizations,
1706
- adopt KernelByKernel execution mode.
1803
+ adopt KernelByKernel execution mode. This optimization level is experimental and is being improved.
1707
1804
  - ``"O2"``: Ultimate performance optimization, adopt Sink execution mode.
1708
1805
 
1709
1806
  - infer_boost (str): Used to control the infer mode. Default: ``"off"`` . The value range is as follows:
@@ -1711,6 +1808,18 @@ def set_context(**kwargs):
1711
1808
  - ``"on"``: Enable infer mode, get better infer performance.
1712
1809
  - ``"off"``: Disable infer mode, use forward to infer, performance is not good.
1713
1810
 
1811
+ exec_order (str): Set the sorting method for operator execution in GRAPH_MODE Currently, only three sorting
1812
+ methods are supported: bfs and gpto, and the default method is bfs.
1813
+
1814
+ - ``"bfs"``: The default sorting method, breadth priority, good communication masking, relatively good
1815
+ performance.
1816
+ - ``"dfs"``: An optional sorting method, depth-first sorting. The performance is relatively worse than that
1817
+ of bfs execution order, but it occupies less memory. It is recommended to try dfs in scenarios where other
1818
+ execution orders run out of memory (OOM).
1819
+ - ``"gpto"``: An optional sorting method. This method combines multiple execution orders and selects a
1820
+ method with relatively good performance. There may be some performance gains in scenarios with multiple
1821
+ replicas running in parallel.
1822
+
1714
1823
  Raises:
1715
1824
  ValueError: If input key is not an attribute in context.
1716
1825
 
@@ -1753,6 +1862,7 @@ def set_context(**kwargs):
1753
1862
  >>> ms.set_context(gpu_config={"conv_fprop_algo": "performance", "conv_allow_tf32": True,
1754
1863
  ... "matmul_allow_tf32": True})
1755
1864
  >>> ms.set_context(jit_config={"jit_level": "O0"})
1865
+ >>> ms.set_context(exec_order="gpto")
1756
1866
  """
1757
1867
  ctx = _context()
1758
1868
  # set device target first
@@ -1790,12 +1900,12 @@ def set_context(**kwargs):
1790
1900
  continue
1791
1901
  if not _check_target_specific_cfgs(device, key):
1792
1902
  continue
1793
- if hasattr(ctx, key):
1794
- setattr(ctx, key, value)
1795
- continue
1796
1903
  if key in ctx.setters:
1797
1904
  ctx.setters[key](ctx, value)
1798
1905
  continue
1906
+ if hasattr(ctx, key):
1907
+ setattr(ctx, key, value)
1908
+ continue
1799
1909
  # enum variables beginning with '_' are for internal use
1800
1910
  if key in ms_ctx_param.__members__ and key[0] != '_':
1801
1911
  ctx.set_param(ms_ctx_param.__members__[key], value)
@@ -21,7 +21,7 @@ Besides, this module provides APIs to sample data while loading.
21
21
 
22
22
  We can enable cache in most of the dataset with its key arguments 'cache'. Please notice that cache is not supported
23
23
  on Windows platform yet. Do not use it while loading and processing data on Windows. More introductions and limitations
24
- can refer `Single-Node Tensor Cache <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
24
+ can refer `Single-Node Tensor Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
25
25
 
26
26
  Common imported modules in corresponding API examples are as follows:
27
27
 
@@ -43,7 +43,7 @@ The data transform operation can be executed in the data processing pipeline or
43
43
  `introduction to data processing pipeline <https://www.mindspore.cn/docs/en/master/api_python/
44
44
  mindspore.dataset.html#introduction-to-data-processing-pipeline>`_ .
45
45
  - Eager mode is more like a function call to process data. Examples refer to
46
- `Lightweight Data Processing <https://www.mindspore.cn/tutorials/en/master/advanced/dataset/eager.html>`_ .
46
+ `Lightweight Data Processing <https://www.mindspore.cn/docs/en/master/model_train/dataset/eager.html>`_ .
47
47
  """
48
48
  from __future__ import absolute_import
49
49
 
@@ -32,6 +32,8 @@ import mindspore._c_dataengine as cde
32
32
  from mindspore import log as logger
33
33
  from mindspore.dataset.core.validator_helpers import replace_none, type_check
34
34
  from mindspore.dataset.debug import DebugHook, PrintMetaDataHook
35
+ from mindspore.dataset.core.validator_helpers import check_independent_mode
36
+
35
37
 
36
38
  __all__ = ['set_sending_batches', 'load', '_init_device_info',
37
39
  'set_seed', 'get_seed',
@@ -544,6 +546,8 @@ def set_enable_autotune(enable, filepath_prefix=None):
544
546
  if not isinstance(enable, bool):
545
547
  raise TypeError("enable must be of type bool.")
546
548
 
549
+ check_independent_mode("Dataset AutoTune", enable)
550
+
547
551
  save_autoconfig = bool(enable and filepath_prefix is not None)
548
552
 
549
553
  if filepath_prefix and not isinstance(filepath_prefix, str):
@@ -728,6 +732,9 @@ def set_auto_offload(offload):
728
732
  """
729
733
  if not isinstance(offload, bool):
730
734
  raise TypeError("offload must be a bool dtype")
735
+
736
+ check_independent_mode("Dataset Offload", offload)
737
+
731
738
  _config.set_auto_offload(offload)
732
739
 
733
740
 
@@ -766,6 +766,13 @@ def check_dict(data, key_type, value_type, param_name):
766
766
  .format(key, param_name, value_type, type(value)))
767
767
 
768
768
 
769
+ def check_independent_mode(feature_name, condition=True):
770
+ # todo in Dataset Independent mode
771
+ independent_process_env = os.getenv("MS_INDEPENDENT_DATASET", None)
772
+ if condition and independent_process_env and independent_process_env.strip() in ['True', 'true']:
773
+ raise RuntimeError(f"{feature_name} is not supported in Dataset Independent mode.")
774
+
775
+
769
776
  def check_feature_shape(data, shape, param_name):
770
777
  if isinstance(data, dict):
771
778
  for key, value in data.items():
@@ -27,7 +27,7 @@ class DatasetCache:
27
27
  A client to interface with tensor caching service.
28
28
 
29
29
  For details, please check
30
- `Tutorial <https://www.mindspore.cn/tutorials/experts/en/master/dataset/cache.html>`_ .
30
+ `Tutorial <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
31
31
 
32
32
  Args:
33
33
  session_id (int): A user assigned session id for the current pipeline.