mindspore 2.3.0__cp39-cp39-win_amd64.whl → 2.4.1__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +3 -1
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +50 -9
- mindspore/_extends/parse/compile_config.py +41 -0
- mindspore/_extends/parse/parser.py +9 -7
- mindspore/_extends/parse/standard_method.py +52 -14
- mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
- mindspore/amp.py +24 -10
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/common/__init__.py +6 -4
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_tensor.py +2 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/api.py +102 -87
- mindspore/common/dump.py +5 -6
- mindspore/common/generator.py +1 -7
- mindspore/common/hook_handle.py +14 -26
- mindspore/common/initializer.py +51 -15
- mindspore/common/mindir_util.py +2 -2
- mindspore/common/parameter.py +62 -15
- mindspore/common/recompute.py +39 -9
- mindspore/common/sparse_tensor.py +7 -3
- mindspore/common/tensor.py +183 -37
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +38 -3
- mindspore/communication/comm_func.py +315 -60
- mindspore/communication/management.py +14 -14
- mindspore/context.py +132 -22
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/__init__.py +1 -1
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +1 -1
- mindspore/dataset/engine/datasets.py +72 -44
- mindspore/dataset/engine/datasets_audio.py +7 -7
- mindspore/dataset/engine/datasets_standard_format.py +53 -3
- mindspore/dataset/engine/datasets_text.py +20 -20
- mindspore/dataset/engine/datasets_user_defined.py +174 -104
- mindspore/dataset/engine/datasets_vision.py +33 -33
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/queue.py +114 -60
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +34 -14
- mindspore/dataset/text/__init__.py +1 -4
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +1 -4
- mindspore/dataset/vision/utils.py +1 -1
- mindspore/dataset/vision/validators.py +2 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/adamw.py +85 -0
- mindspore/experimental/optim/optimizer.py +3 -0
- mindspore/hal/__init__.py +3 -3
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/stream.py +18 -0
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/types.h +10 -10
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +2 -2
- mindspore/include/dataset/vision.h +4 -0
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +1 -1
- mindspore/mindrecord/filewriter.py +68 -51
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +983 -46
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/mint/nn/__init__.py +268 -23
- mindspore/mint/nn/functional.py +125 -19
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/adamw.py +26 -13
- mindspore/mint/special/__init__.py +63 -0
- mindspore/multiprocessing/__init__.py +2 -1
- mindspore/nn/__init__.py +0 -1
- mindspore/nn/cell.py +276 -96
- mindspore/nn/layer/activation.py +211 -44
- mindspore/nn/layer/basic.py +137 -10
- mindspore/nn/layer/embedding.py +137 -2
- mindspore/nn/layer/normalization.py +101 -5
- mindspore/nn/layer/padding.py +34 -48
- mindspore/nn/layer/pooling.py +161 -7
- mindspore/nn/layer/transformer.py +3 -3
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +84 -6
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -1
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/tft_wrapper.py +124 -0
- mindspore/nn/wrap/cell_wrapper.py +12 -23
- mindspore/nn/wrap/grad_reducer.py +5 -5
- mindspore/nn/wrap/loss_scale.py +17 -3
- mindspore/numpy/__init__.py +1 -1
- mindspore/numpy/array_creations.py +65 -68
- mindspore/numpy/array_ops.py +64 -60
- mindspore/numpy/fft.py +610 -75
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +85 -84
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -4
- mindspore/ops/_grad_experimental/grad_array_ops.py +0 -11
- mindspore/ops/_grad_experimental/grad_comm_ops.py +67 -4
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
- mindspore/ops/_vmap/vmap_array_ops.py +2 -4
- mindspore/ops/_vmap/vmap_math_ops.py +17 -1
- mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +91 -7
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
- mindspore/ops/auto_generate/gen_extend_func.py +767 -13
- mindspore/ops/auto_generate/gen_ops_def.py +2452 -364
- mindspore/ops/auto_generate/gen_ops_prim.py +5442 -1756
- mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
- mindspore/ops/composite/base.py +85 -48
- mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
- mindspore/ops/function/__init__.py +22 -0
- mindspore/ops/function/array_func.py +492 -153
- mindspore/ops/function/debug_func.py +113 -1
- mindspore/ops/function/fft_func.py +15 -2
- mindspore/ops/function/grad/grad_func.py +3 -2
- mindspore/ops/function/math_func.py +564 -207
- mindspore/ops/function/nn_func.py +817 -383
- mindspore/ops/function/other_func.py +3 -2
- mindspore/ops/function/random_func.py +402 -12
- mindspore/ops/function/reshard_func.py +13 -11
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/function/vmap_func.py +3 -2
- mindspore/ops/functional.py +24 -14
- mindspore/ops/op_info_register.py +3 -3
- mindspore/ops/operations/__init__.py +7 -2
- mindspore/ops/operations/_grad_ops.py +2 -76
- mindspore/ops/operations/_infer_ops.py +1 -1
- mindspore/ops/operations/_inner_ops.py +71 -94
- mindspore/ops/operations/array_ops.py +14 -146
- mindspore/ops/operations/comm_ops.py +63 -53
- mindspore/ops/operations/custom_ops.py +83 -19
- mindspore/ops/operations/debug_ops.py +42 -10
- mindspore/ops/operations/manually_defined/_inner.py +12 -0
- mindspore/ops/operations/manually_defined/ops_def.py +273 -20
- mindspore/ops/operations/math_ops.py +12 -223
- mindspore/ops/operations/nn_ops.py +20 -114
- mindspore/ops/operations/other_ops.py +7 -4
- mindspore/ops/operations/random_ops.py +46 -1
- mindspore/ops/primitive.py +18 -6
- mindspore/ops_generate/arg_dtype_cast.py +2 -0
- mindspore/ops_generate/gen_aclnn_implement.py +11 -11
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +67 -52
- mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
- mindspore/ops_generate/gen_pyboost_func.py +131 -47
- mindspore/ops_generate/op_proto.py +10 -3
- mindspore/ops_generate/pyboost_utils.py +14 -1
- mindspore/ops_generate/template.py +43 -21
- mindspore/parallel/__init__.py +3 -1
- mindspore/parallel/_auto_parallel_context.py +31 -9
- mindspore/parallel/_cell_wrapper.py +85 -0
- mindspore/parallel/_parallel_serialization.py +47 -19
- mindspore/parallel/_tensor.py +127 -13
- mindspore/parallel/_utils.py +53 -22
- mindspore/parallel/algo_parameter_config.py +5 -5
- mindspore/parallel/checkpoint_transform.py +46 -39
- mindspore/parallel/cluster/process_entity/__init__.py +1 -1
- mindspore/parallel/cluster/process_entity/_api.py +31 -23
- mindspore/parallel/cluster/process_entity/_utils.py +2 -27
- mindspore/parallel/parameter_broadcast.py +3 -4
- mindspore/parallel/shard.py +162 -31
- mindspore/parallel/transform_safetensors.py +1146 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/util.py +28 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +17 -19
- mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
- mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
- mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
- mindspore/profiler/parser/base_timeline_generator.py +19 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +1 -391
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/profiler_info.py +78 -6
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +285 -413
- mindspore/rewrite/__init__.py +1 -2
- mindspore/rewrite/common/namespace.py +4 -4
- mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
- mindspore/run_check/_check_version.py +39 -104
- mindspore/safeguard/rewrite_obfuscation.py +591 -247
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +4 -3
- mindspore/train/_utils.py +105 -19
- mindspore/train/amp.py +171 -53
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +97 -31
- mindspore/train/callback/_cluster_monitor.py +1 -1
- mindspore/train/callback/_flops_collector.py +1 -0
- mindspore/train/callback/_loss_monitor.py +3 -3
- mindspore/train/callback/_on_request_exit.py +145 -31
- mindspore/train/callback/_summary_collector.py +5 -5
- mindspore/train/callback/_tft_register.py +375 -0
- mindspore/train/dataset_helper.py +15 -3
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/mind_ir_pb2.py +44 -39
- mindspore/train/model.py +154 -58
- mindspore/train/serialization.py +342 -128
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/version.py +1 -1
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/METADATA +13 -7
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/RECORD +260 -254
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/WHEEL +1 -1
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -283
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/extend/basic.py +0 -140
- mindspore/nn/extend/embedding.py +0 -143
- mindspore/nn/extend/layer/normalization.py +0 -109
- mindspore/nn/extend/pooling.py +0 -117
- mindspore/nn/layer/embedding_service.py +0 -531
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/extend/__init__.py +0 -53
- mindspore/ops/extend/array_func.py +0 -218
- mindspore/ops/extend/math_func.py +0 -76
- mindspore/ops/extend/nn_func.py +0 -308
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/train/callback/_mindio_ttp.py +0 -443
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.3.0.dist-info → mindspore-2.4.1.dist-info}/top_level.txt +0 -0
|
@@ -102,7 +102,8 @@ def _set_envs():
|
|
|
102
102
|
os.environ["RANK_ID"] = str(get_rank())
|
|
103
103
|
if os.getenv("RANK_SIZE") is None:
|
|
104
104
|
os.environ["RANK_SIZE"] = str(get_group_size())
|
|
105
|
-
os.
|
|
105
|
+
if os.getenv("DEVICE_ID") is None:
|
|
106
|
+
os.environ["DEVICE_ID"] = str(get_local_rank())
|
|
106
107
|
|
|
107
108
|
|
|
108
109
|
def init(backend_name=None):
|
|
@@ -140,7 +141,7 @@ def init(backend_name=None):
|
|
|
140
141
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
141
142
|
without any third-party or configuration file dependencies.
|
|
142
143
|
Please see the `msrun start up
|
|
143
|
-
<https://www.mindspore.cn/
|
|
144
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
144
145
|
for more details.
|
|
145
146
|
|
|
146
147
|
>>> from mindspore.communication import init
|
|
@@ -165,6 +166,7 @@ def init(backend_name=None):
|
|
|
165
166
|
if os.getenv("MS_ROLE") == "MS_SCHED":
|
|
166
167
|
backend_name = "mccl"
|
|
167
168
|
|
|
169
|
+
_set_elegant_exit_handle()
|
|
168
170
|
if backend_name == "hccl":
|
|
169
171
|
if _is_ps_mode():
|
|
170
172
|
# Use MindSpore cluster to build network for Parameter Server training.
|
|
@@ -173,7 +175,6 @@ def init(backend_name=None):
|
|
|
173
175
|
raise RuntimeError("Parameter server and scheduler should use 'CPU' as backend instead of 'Ascend'")
|
|
174
176
|
if _get_ps_context("worker_num") == 1:
|
|
175
177
|
GlobalComm.INITED = True
|
|
176
|
-
_set_elegant_exit_handle()
|
|
177
178
|
return
|
|
178
179
|
if device_target != "Ascend":
|
|
179
180
|
raise RuntimeError("For 'init', the argument 'backend_name' should be '{}' to init '{}', "
|
|
@@ -203,7 +204,6 @@ def init(backend_name=None):
|
|
|
203
204
|
"but got 'backend_name' : {}".format(backend_name))
|
|
204
205
|
|
|
205
206
|
GlobalComm.INITED = True
|
|
206
|
-
_set_elegant_exit_handle()
|
|
207
207
|
_set_envs()
|
|
208
208
|
|
|
209
209
|
|
|
@@ -227,7 +227,7 @@ def release():
|
|
|
227
227
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
228
228
|
without any third-party or configuration file dependencies.
|
|
229
229
|
Please see the `msrun start up
|
|
230
|
-
<https://www.mindspore.cn/
|
|
230
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
231
231
|
for more details.
|
|
232
232
|
|
|
233
233
|
>>> from mindspore.communication import init, release
|
|
@@ -266,7 +266,7 @@ def get_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
266
266
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
267
267
|
without any third-party or configuration file dependencies.
|
|
268
268
|
Please see the `msrun start up
|
|
269
|
-
<https://www.mindspore.cn/
|
|
269
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
270
270
|
for more details.
|
|
271
271
|
|
|
272
272
|
>>> from mindspore.communication import init, get_rank
|
|
@@ -311,7 +311,7 @@ def get_local_rank(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
311
311
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
312
312
|
without any third-party or configuration file dependencies.
|
|
313
313
|
Please see the `msrun start up
|
|
314
|
-
<https://www.mindspore.cn/
|
|
314
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
315
315
|
for more details.
|
|
316
316
|
|
|
317
317
|
>>> import mindspore as ms
|
|
@@ -359,7 +359,7 @@ def get_group_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
359
359
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
360
360
|
without any third-party or configuration file dependencies.
|
|
361
361
|
Please see the `msrun start up
|
|
362
|
-
<https://www.mindspore.cn/
|
|
362
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
363
363
|
for more details.
|
|
364
364
|
|
|
365
365
|
>>> import mindspore as ms
|
|
@@ -406,7 +406,7 @@ def get_local_rank_size(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
406
406
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
407
407
|
without any third-party or configuration file dependencies.
|
|
408
408
|
Please see the `msrun start up
|
|
409
|
-
<https://www.mindspore.cn/
|
|
409
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
410
410
|
for more details.
|
|
411
411
|
|
|
412
412
|
>>> import mindspore as ms
|
|
@@ -456,7 +456,7 @@ def get_world_rank_from_group_rank(group, group_rank_id):
|
|
|
456
456
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
457
457
|
without any third-party or configuration file dependencies.
|
|
458
458
|
Please see the `msrun start up
|
|
459
|
-
<https://www.mindspore.cn/
|
|
459
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
460
460
|
for more details.
|
|
461
461
|
|
|
462
462
|
>>> import mindspore as ms
|
|
@@ -510,7 +510,7 @@ def get_group_rank_from_world_rank(world_rank_id, group):
|
|
|
510
510
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
511
511
|
without any third-party or configuration file dependencies.
|
|
512
512
|
Please see the `msrun start up
|
|
513
|
-
<https://www.mindspore.cn/
|
|
513
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
514
514
|
for more details.
|
|
515
515
|
|
|
516
516
|
>>> import mindspore as ms
|
|
@@ -561,7 +561,7 @@ def create_group(group, rank_ids):
|
|
|
561
561
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
562
562
|
without any third-party or configuration file dependencies.
|
|
563
563
|
Please see the `msrun start up
|
|
564
|
-
<https://www.mindspore.cn/
|
|
564
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
565
565
|
for more details.
|
|
566
566
|
|
|
567
567
|
>>> import mindspore as ms
|
|
@@ -609,7 +609,7 @@ def destroy_group(group):
|
|
|
609
609
|
For Ascend/GPU/CPU devices, it is recommended to use the msrun startup method
|
|
610
610
|
without any third-party or configuration file dependencies.
|
|
611
611
|
Please see the `msrun start up
|
|
612
|
-
<https://www.mindspore.cn/
|
|
612
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
613
613
|
for more details.
|
|
614
614
|
|
|
615
615
|
>>> import mindspore as ms
|
|
@@ -656,7 +656,7 @@ def get_process_group_ranks(group=GlobalComm.WORLD_COMM_GROUP):
|
|
|
656
656
|
without any third-party or configuration file dependencies.
|
|
657
657
|
|
|
658
658
|
Please see the `msrun start up
|
|
659
|
-
<https://www.mindspore.cn/
|
|
659
|
+
<https://www.mindspore.cn/docs/zh-CN/master/model_train/parallel/msrun_launcher.html>`_
|
|
660
660
|
for more details.
|
|
661
661
|
|
|
662
662
|
This example should be run with 4 devices.
|
mindspore/context.py
CHANGED
|
@@ -35,6 +35,7 @@ from mindspore.parallel._ps_context import _set_ps_context, _get_ps_context, _re
|
|
|
35
35
|
_need_reset_device_target_for_ps
|
|
36
36
|
from mindspore.parallel._offload_context import _set_offload_context, _get_offload_context
|
|
37
37
|
from mindspore.hal.device import is_initialized
|
|
38
|
+
from mindspore.common import api
|
|
38
39
|
|
|
39
40
|
__all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'STRICT', 'COMPATIBLE', 'LAX', 'set_context', 'get_context',
|
|
40
41
|
'set_auto_parallel_context', 'get_auto_parallel_context', 'reset_auto_parallel_context', 'ParallelMode',
|
|
@@ -68,7 +69,7 @@ def _make_directory(path):
|
|
|
68
69
|
if not os.path.exists(path):
|
|
69
70
|
logger.debug("The directory(%s) doesn't exist, will create it", path)
|
|
70
71
|
try:
|
|
71
|
-
os.makedirs(path)
|
|
72
|
+
os.makedirs(path, mode=0o700)
|
|
72
73
|
except FileExistsError:
|
|
73
74
|
logger.debug("The directory(%s) already exist.", path)
|
|
74
75
|
except PermissionError as e:
|
|
@@ -168,7 +169,10 @@ class _Context:
|
|
|
168
169
|
self._support_binary = False
|
|
169
170
|
self.enable_compile_cache = None
|
|
170
171
|
self._mode = PYNATIVE_MODE
|
|
171
|
-
self.
|
|
172
|
+
self.aoe_config = {}
|
|
173
|
+
self.jit_config = {}
|
|
174
|
+
self.ascend_config = {}
|
|
175
|
+
self.gpu_config = {}
|
|
172
176
|
|
|
173
177
|
def __getattribute__(self, attr):
|
|
174
178
|
value = object.__getattribute__(self, attr)
|
|
@@ -188,7 +192,7 @@ class _Context:
|
|
|
188
192
|
|
|
189
193
|
def get_jit_config(self):
|
|
190
194
|
"""Get current jit_config."""
|
|
191
|
-
return self.
|
|
195
|
+
return self.jit_config
|
|
192
196
|
|
|
193
197
|
def set_mode(self, mode):
|
|
194
198
|
"""
|
|
@@ -248,6 +252,16 @@ class _Context:
|
|
|
248
252
|
else:
|
|
249
253
|
self.set_param(ms_ctx_param.memory_optimize_level, 1)
|
|
250
254
|
|
|
255
|
+
def set_exec_order(self, exec_order):
|
|
256
|
+
"""
|
|
257
|
+
The execution order mode, support "bfs", "dfs", "gpto".
|
|
258
|
+
"""
|
|
259
|
+
exec_order_modes = ["bfs", "dfs", "gpto"]
|
|
260
|
+
if exec_order not in exec_order_modes:
|
|
261
|
+
raise ValueError(f"For 'context.set_context', the argument 'exec_order' must be one of "
|
|
262
|
+
f"{exec_order_modes}, but got {exec_order}.")
|
|
263
|
+
self.set_param(ms_ctx_param.exec_order, exec_order)
|
|
264
|
+
|
|
251
265
|
def set_memory_offload(self, memory_offload):
|
|
252
266
|
"""
|
|
253
267
|
Enable memory offload or not, support "ON", "OFF".
|
|
@@ -277,6 +291,29 @@ class _Context:
|
|
|
277
291
|
f"{deterministic_options}, but got {deterministic}.")
|
|
278
292
|
self.set_param(ms_ctx_param.deterministic, deterministic)
|
|
279
293
|
|
|
294
|
+
hccl_deterministic = os.getenv("HCCL_DETERMINISTIC")
|
|
295
|
+
te_parallel_compiler = os.getenv("TE_PARALLEL_COMPILER")
|
|
296
|
+
if deterministic == "ON":
|
|
297
|
+
if hccl_deterministic and hccl_deterministic != "true":
|
|
298
|
+
logger.warning(f"Environment 'HCCL_DETERMINISTIC' should be 'true' when set deterministic='ON', but "
|
|
299
|
+
f"got '{hccl_deterministic}'. 'HCCL_DETERMINISTIC' will be set to 'true'.")
|
|
300
|
+
if te_parallel_compiler and te_parallel_compiler != "1":
|
|
301
|
+
logger.warning(f"Environment 'TE_PARALLEL_COMPILER' should be '1' when set deterministic='ON', but "
|
|
302
|
+
f"got '{te_parallel_compiler}'. 'TE_PARALLEL_COMPILER' will be set to '1'.")
|
|
303
|
+
os.environ["HCCL_DETERMINISTIC"] = "true"
|
|
304
|
+
os.environ["TE_PARALLEL_COMPILER"] = "1"
|
|
305
|
+
if deterministic == "OFF":
|
|
306
|
+
if hccl_deterministic and hccl_deterministic != "false":
|
|
307
|
+
logger.warning(f"Environment 'HCCL_DETERMINISTIC' should not be set or be 'false' when set "
|
|
308
|
+
f"deterministic='OFF', but got '{hccl_deterministic}'. 'HCCL_DETERMINISTIC' "
|
|
309
|
+
f"will be unset.")
|
|
310
|
+
del os.environ["HCCL_DETERMINISTIC"]
|
|
311
|
+
if te_parallel_compiler and te_parallel_compiler != "0":
|
|
312
|
+
logger.warning(f"Environment 'TE_PARALLEL_COMPILER' should not be set or be '0' when set "
|
|
313
|
+
f"deterministic='OFF', but got '{te_parallel_compiler}'. 'TE_PARALLEL_COMPILER' "
|
|
314
|
+
f"will be unset.")
|
|
315
|
+
del os.environ["TE_PARALLEL_COMPILER"]
|
|
316
|
+
|
|
280
317
|
def set_ascend_config(self, ascend_config):
|
|
281
318
|
"""
|
|
282
319
|
Enable ascend config.
|
|
@@ -298,6 +335,8 @@ class _Context:
|
|
|
298
335
|
- parallel_speed_up_json_path(Union[str, None]): The path to the parallel speed up json file.
|
|
299
336
|
If its value is None or '', it does not take effect. Default None.
|
|
300
337
|
- host_scheduling_max_threshold(int): The host scheduling max threshold.
|
|
338
|
+
- hccl_watchdog (bool): Enable a thread to monitor the failure of collective communication.
|
|
339
|
+
Default: ``True`` .
|
|
301
340
|
"""
|
|
302
341
|
ascend_cfg_modes = {
|
|
303
342
|
'precision_mode': ["force_fp16", "allow_fp32_to_fp16", "allow_mix_precision", "must_keep_origin_dtype",
|
|
@@ -316,6 +355,7 @@ class _Context:
|
|
|
316
355
|
'save_checkpoint_steps': (int,),
|
|
317
356
|
'need_ckpt': (bool,),
|
|
318
357
|
'last_triggered_step': (int,),
|
|
358
|
+
'hccl_watchdog': (bool,),
|
|
319
359
|
'topo_order': (dict,),
|
|
320
360
|
'op_debug_option': (str, None),
|
|
321
361
|
}
|
|
@@ -335,6 +375,7 @@ class _Context:
|
|
|
335
375
|
'save_checkpoint_steps': self._set_save_checkpoint_steps,
|
|
336
376
|
'need_ckpt': self._set_need_ckpt,
|
|
337
377
|
'last_triggered_step': self._set_last_triggered_step,
|
|
378
|
+
'hccl_watchdog': self._set_hccl_watchdog,
|
|
338
379
|
'topo_order': self._set_topo_order
|
|
339
380
|
}
|
|
340
381
|
ascend_cfg_set = tuple(ascend_cfg_modes.keys())
|
|
@@ -351,6 +392,7 @@ class _Context:
|
|
|
351
392
|
f"{supported_modes}, but got {type(ascend_value)}.")
|
|
352
393
|
cfg_setter = ascend_cfg_setters.get(ascend_key)
|
|
353
394
|
cfg_setter(ascend_value)
|
|
395
|
+
self.ascend_config = ascend_config
|
|
354
396
|
|
|
355
397
|
def set_gpu_config(self, gpu_config):
|
|
356
398
|
"""
|
|
@@ -392,6 +434,7 @@ class _Context:
|
|
|
392
434
|
self.set_param(ms_ctx_param.conv_allow_tf32, gpu_config[gpu_key])
|
|
393
435
|
if gpu_key == 'matmul_allow_tf32':
|
|
394
436
|
self.set_param(ms_ctx_param.matmul_allow_tf32, gpu_config[gpu_key])
|
|
437
|
+
self.gpu_config = gpu_config
|
|
395
438
|
|
|
396
439
|
def set_jit_config(self, jit_config):
|
|
397
440
|
"""
|
|
@@ -410,12 +453,13 @@ class _Context:
|
|
|
410
453
|
f"{jit_cfgs}, but got {jit_key}.")
|
|
411
454
|
supported_value = jit_cfgs.get(jit_key)
|
|
412
455
|
if jit_config[jit_key] not in supported_value:
|
|
413
|
-
raise ValueError(f"For '
|
|
456
|
+
raise ValueError(f"For 'jit_config', the value of argument {jit_key} must be one of "
|
|
414
457
|
f"{supported_value}, but got {jit_config[jit_key]}.")
|
|
415
|
-
self._jit_config = jit_config
|
|
416
458
|
self.set_param(key_args_map[jit_key], jit_config[jit_key])
|
|
459
|
+
self.jit_config = jit_config
|
|
417
460
|
|
|
418
|
-
|
|
461
|
+
jit_level = jit_config.get("jit_level", None)
|
|
462
|
+
if jit_config.get("infer_boost", None) == "on" and (jit_level == "O1" or jit_level == "O2"):
|
|
419
463
|
raise ValueError(f"Only jit_level set O0 can set infer_boost to on.")
|
|
420
464
|
|
|
421
465
|
def set_backend_policy(self, policy):
|
|
@@ -488,6 +532,7 @@ class _Context:
|
|
|
488
532
|
f"{supported_value}, but got {aoe_config[aoe_config_key]}.")
|
|
489
533
|
if aoe_config_key == 'job_type':
|
|
490
534
|
self.set_param(ms_ctx_param.aoe_job_type, aoe_config[aoe_config_key])
|
|
535
|
+
self.aoe_config = aoe_config
|
|
491
536
|
|
|
492
537
|
def set_device_id(self, device_id):
|
|
493
538
|
if device_id < 0 or device_id > 4095:
|
|
@@ -626,6 +671,7 @@ class _Context:
|
|
|
626
671
|
'inter_op_parallel_num': set_inter_op_parallel_num,
|
|
627
672
|
'runtime_num_threads': set_runtime_num_threads,
|
|
628
673
|
'memory_optimize_level': set_memory_optimize_level,
|
|
674
|
+
'exec_order': set_exec_order,
|
|
629
675
|
'op_timeout': set_op_timeout,
|
|
630
676
|
'memory_offload': set_memory_offload,
|
|
631
677
|
'deterministic': set_deterministic,
|
|
@@ -744,6 +790,12 @@ class _Context:
|
|
|
744
790
|
options_str = json.dumps(topo_order)
|
|
745
791
|
self.set_param(ms_ctx_param.topo_order, options_str)
|
|
746
792
|
|
|
793
|
+
def _set_hccl_watchdog(self, flag):
|
|
794
|
+
"""set hccl watchdog"""
|
|
795
|
+
if not isinstance(flag, bool):
|
|
796
|
+
raise TypeError(f"For 'ascend_config', the type of 'hccl_watchdog' must be bool, but got {type(flag)}.")
|
|
797
|
+
self.set_param(ms_ctx_param.hccl_watchdog, flag)
|
|
798
|
+
|
|
747
799
|
def _set_need_ckpt(self, need_ckpt):
|
|
748
800
|
"""Set need ckpt flag"""
|
|
749
801
|
if not isinstance(need_ckpt, bool):
|
|
@@ -772,7 +824,7 @@ class _Context:
|
|
|
772
824
|
""""Check and set speedup config for auto parallel."""
|
|
773
825
|
if speedup_config_path is None or speedup_config_path == "":
|
|
774
826
|
return
|
|
775
|
-
speedup_config_real_path = os.path.
|
|
827
|
+
speedup_config_real_path = os.path.realpath(speedup_config_path)
|
|
776
828
|
if not os.path.exists(speedup_config_real_path):
|
|
777
829
|
raise ValueError(f"For 'ascend_config', the path to parallel_speed_up_json: "
|
|
778
830
|
f"{speedup_config_real_path} does not exist, please check whether the "
|
|
@@ -790,10 +842,17 @@ class _Context:
|
|
|
790
842
|
"enable_begin_end_inline_opt": (ms_ctx_param.enable_begin_end_inline_opt, bool),
|
|
791
843
|
"enable_concat_eliminate_opt": (ms_ctx_param.enable_concat_eliminate_opt, bool),
|
|
792
844
|
"interleaved_layernorm_comm": (ms_ctx_param.interleaved_layernorm_comm, bool),
|
|
845
|
+
"enable_allreduce_slice_to_reducescatter":
|
|
846
|
+
(ms_ctx_param.enable_allreduce_slice_to_reducescatter, bool),
|
|
847
|
+
"enable_interleave_split_concat_branch":
|
|
848
|
+
(ms_ctx_param.enable_interleave_split_concat_branch, bool),
|
|
849
|
+
"enable_offloading_packed_experts": (ms_ctx_param.enable_offloading_packed_experts, bool),
|
|
793
850
|
"compute_communicate_fusion_level":
|
|
794
851
|
(ms_ctx_param.compute_communicate_fusion_level, int),
|
|
795
852
|
"enable_flash_attention_load_balance":
|
|
796
|
-
(ms_ctx_param.enable_flash_attention_load_balance, bool)
|
|
853
|
+
(ms_ctx_param.enable_flash_attention_load_balance, bool),
|
|
854
|
+
"dataset_broadcast_opt_level":
|
|
855
|
+
(ms_ctx_param.dataset_broadcast_opt_level, int)}
|
|
797
856
|
with open(speedup_config_real_path, 'r') as f:
|
|
798
857
|
speedup_config = json.load(f)
|
|
799
858
|
for key, value in speedup_config.items():
|
|
@@ -876,6 +935,7 @@ def set_auto_parallel_context(**kwargs):
|
|
|
876
935
|
\ strategy_ckpt_config
|
|
877
936
|
\ group_ckpt_save_file
|
|
878
937
|
\ auto_pipeline
|
|
938
|
+
\ dump_local_norm
|
|
879
939
|
=========================== ===========================
|
|
880
940
|
|
|
881
941
|
Args:
|
|
@@ -1027,6 +1087,9 @@ def set_auto_parallel_context(**kwargs):
|
|
|
1027
1087
|
auto_pipeline (bool): Set the pipeline stage number to automatic. Its value will be selected between 1 and the
|
|
1028
1088
|
parameter `pipeline_stages`. This option requires the `parallel_mode` to be ``auto_parallel``
|
|
1029
1089
|
and the `search_mode` to be ``recursive_programming``. Default: ``False`` .
|
|
1090
|
+
dump_local_norm (bool): Whether to dump local_norm value, when the `parallel_mode` is set to
|
|
1091
|
+
``semi_auto_parallel`` or ``auto_parallel``.
|
|
1092
|
+
Default: ``False`` .
|
|
1030
1093
|
|
|
1031
1094
|
Raises:
|
|
1032
1095
|
ValueError: If input key is not attribute in auto parallel context.
|
|
@@ -1097,11 +1160,12 @@ def reset_auto_parallel_context():
|
|
|
1097
1160
|
- strategy_ckpt_save_file: ''.
|
|
1098
1161
|
- full_batch: False.
|
|
1099
1162
|
- enable_parallel_optimizer: False.
|
|
1100
|
-
- force_fp32_communication: False
|
|
1163
|
+
- force_fp32_communication: False.
|
|
1101
1164
|
- enable_alltoall: False.
|
|
1102
1165
|
- pipeline_stages: 1.
|
|
1103
1166
|
- pipeline_result_broadcast: False.
|
|
1104
1167
|
- fusion_threshold: 64.
|
|
1168
|
+
- dump_local_norm: False.
|
|
1105
1169
|
- auto_pipeline: False.
|
|
1106
1170
|
|
|
1107
1171
|
Examples:
|
|
@@ -1109,6 +1173,7 @@ def reset_auto_parallel_context():
|
|
|
1109
1173
|
>>> ms.reset_auto_parallel_context()
|
|
1110
1174
|
"""
|
|
1111
1175
|
_reset_auto_parallel_context()
|
|
1176
|
+
api.ms_compile_cache.clear()
|
|
1112
1177
|
|
|
1113
1178
|
|
|
1114
1179
|
@args_type_check(offload_config=dict)
|
|
@@ -1118,7 +1183,8 @@ def set_offload_context(offload_config):
|
|
|
1118
1183
|
|
|
1119
1184
|
Note:
|
|
1120
1185
|
The offload configuration is only used if the memory offload feature is enabled
|
|
1121
|
-
via mindspore.set_context(memory_offload="ON").
|
|
1186
|
+
via mindspore.set_context(memory_offload="ON"), and the memory_optimize_level must be set to O0. On the Ascend
|
|
1187
|
+
hardware platform, the graph compilation level must be O0.
|
|
1122
1188
|
|
|
1123
1189
|
Args:
|
|
1124
1190
|
offload_config (dict): A dict contains the keys and values for setting the offload context
|
|
@@ -1311,6 +1377,8 @@ def set_context(**kwargs):
|
|
|
1311
1377
|
| | gpu_config | GPU |
|
|
1312
1378
|
| +------------------------------+----------------------------+
|
|
1313
1379
|
| | jit_config | CPU/GPU/Ascend |
|
|
1380
|
+
| +------------------------------+----------------------------+
|
|
1381
|
+
| | exec_order | Ascend |
|
|
1314
1382
|
+-------------------------+------------------------------+----------------------------+
|
|
1315
1383
|
|
|
1316
1384
|
Args:
|
|
@@ -1320,12 +1388,14 @@ def set_context(**kwargs):
|
|
|
1320
1388
|
If device target is not set, the version of MindSpore package is used.
|
|
1321
1389
|
max_device_memory (str): Set the maximum memory available for devices. The format is "xxGB".
|
|
1322
1390
|
Default: ``" 1024GB"`` . The actual used memory size is the minimum of the available memory of the device
|
|
1323
|
-
and max_device_memory. 'max_device_memory' should be set before the program runs.
|
|
1391
|
+
and max_device_memory. 'max_device_memory' should be set before the program runs. When virtual memory is
|
|
1392
|
+
enabled, a too small 'max_device_memory' will cause frequent defragmentation, affecting performance.
|
|
1324
1393
|
variable_memory_max_size (str): This parameter is deprecated, and will be removed in a future version.
|
|
1325
1394
|
Please use parameter 'max_device_memory' instead.
|
|
1326
|
-
mempool_block_size (str):
|
|
1327
|
-
for devices. The format is "xxGB". Default: ``"1GB"`` . Minimum size is "1G". The actual used memory
|
|
1328
|
-
size is the minimum of the available memory of the device and mempool_block_size.
|
|
1395
|
+
mempool_block_size (str): It takes effect when virtual memory is turned off, set the size of the memory pool
|
|
1396
|
+
block for devices. The format is "xxGB". Default: ``"1GB"`` . Minimum size is "1G". The actual used memory
|
|
1397
|
+
block size is the minimum of the available memory of the device and mempool_block_size. When there is
|
|
1398
|
+
enough memory, the memory will be expanded by this value.
|
|
1329
1399
|
op_timeout (int): Set the maximum duration of executing an operator in seconds.
|
|
1330
1400
|
If the execution time exceeds this value, system will terminate the task.
|
|
1331
1401
|
0 means endless wait. The defaults for AI Core and AICPU operators vary on different hardware.
|
|
@@ -1413,7 +1483,7 @@ def set_context(**kwargs):
|
|
|
1413
1483
|
If enable_graph_kernel is set to ``True`` , acceleration can be enabled.
|
|
1414
1484
|
For details of graph kernel fusion, please check
|
|
1415
1485
|
`Enabling Graph Kernel Fusion
|
|
1416
|
-
<https://www.mindspore.cn/
|
|
1486
|
+
<https://www.mindspore.cn/docs/en/master/model_train/optimize/graph_fusion_engine.html>`_.
|
|
1417
1487
|
graph_kernel_flags (str):
|
|
1418
1488
|
Optimization options of graph kernel fusion, and the priority is higher when it conflicts
|
|
1419
1489
|
with enable_graph_kernel. Only for experienced users.
|
|
@@ -1438,6 +1508,11 @@ def set_context(**kwargs):
|
|
|
1438
1508
|
Be caution when using this level.
|
|
1439
1509
|
|
|
1440
1510
|
- dump_as_text: dumps detail info as text files. Default: ``False`` .
|
|
1511
|
+
- enable_cluster_ops: Add user-specified operator to the set of operators involved in fusion. For example,
|
|
1512
|
+
by setting ``--enable_cluster_ops=MatMul``, MatMul operator can be included in the fusion process.
|
|
1513
|
+
- enable_pass/disable_pass: Enable/disable user-specified custom fusion passes. See details in
|
|
1514
|
+
`Custom Fusion Pass
|
|
1515
|
+
<https://www.mindspore.cn/docs/en/master/model_train/custom_program/fusion_pass.html>`_.
|
|
1441
1516
|
|
|
1442
1517
|
enable_reduce_precision (bool): Whether to enable precision reduction.
|
|
1443
1518
|
If the operator does not support the user-specified precision, the precision will
|
|
@@ -1468,6 +1543,7 @@ def set_context(**kwargs):
|
|
|
1468
1543
|
if enable_compile_cache is still set to ``True`` and the network scripts are not changed,
|
|
1469
1544
|
the compile cache is loaded. Note that only limited automatic detection for the changes of
|
|
1470
1545
|
python scripts is supported by now, which means that there is a correctness risk. Default: ``False`` .
|
|
1546
|
+
Currently, do not support the graph which is larger than 2G after compiled.
|
|
1471
1547
|
This is an experimental prototype that is subject to change and/or deletion.
|
|
1472
1548
|
compile_cache_path (str): Path to save the compile cache. Default: ``"."``.
|
|
1473
1549
|
If the specified directory does not exist, the system will automatically create the directory.
|
|
@@ -1477,7 +1553,8 @@ def set_context(**kwargs):
|
|
|
1477
1553
|
which means use the default num.
|
|
1478
1554
|
runtime_num_threads(int): The thread pool number of cpu kernel used in runtime,
|
|
1479
1555
|
which must bigger than or equal to 0. Default value is ``30`` , if you run many processes at
|
|
1480
|
-
the same time, you should set the value smaller to avoid thread contention.
|
|
1556
|
+
the same time, you should set the value smaller to avoid thread contention. If set runtime_num_threads to 1,
|
|
1557
|
+
the runtime asynchronous pipeline capability cannot be enabled, which may affect performance.
|
|
1481
1558
|
disable_format_transform (bool): Whether to disable the automatic format transform function from NCHW to NHWC.
|
|
1482
1559
|
When the network training performance of fp16 is worse than fp32, `disable_format_transform` can be set to
|
|
1483
1560
|
``True`` to try to improve training performance. Default: ``False`` .
|
|
@@ -1588,7 +1665,7 @@ def set_context(**kwargs):
|
|
|
1588
1665
|
`LazyInline <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.lazy_inline.html>`
|
|
1589
1666
|
Default: False.
|
|
1590
1667
|
- compute_communicate_fusion_level (int): Enable the fusion between compute and communicate.
|
|
1591
|
-
Default: ``0``.
|
|
1668
|
+
Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
|
|
1592
1669
|
|
|
1593
1670
|
- 0: Disable fusion.
|
|
1594
1671
|
|
|
@@ -1597,8 +1674,27 @@ def set_context(**kwargs):
|
|
|
1597
1674
|
- 2: Apply fusion to backward nodes.
|
|
1598
1675
|
|
|
1599
1676
|
- 3: Apply fusion to all nodes.
|
|
1677
|
+
- dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
|
|
1678
|
+
support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
|
|
1679
|
+
|
|
1680
|
+
- 0: Disable this optimize.
|
|
1681
|
+
|
|
1682
|
+
- 1: Optimize dataset reader between pipeline stage.
|
|
1683
|
+
|
|
1684
|
+
- 2: Optimize dataset reader within pipeline stage.
|
|
1685
|
+
|
|
1686
|
+
- 3: Optimize dataset reader with all scenes.
|
|
1600
1687
|
- bias_add_comm_swap (bool): Enable node execution order swap communication operators and add operators
|
|
1601
1688
|
if ``True``. Only 1-dimension bias node is supported. Default: ``False``.
|
|
1689
|
+
- enable_allreduce_slice_to_reducescatter (bool): Enable allreduce optimization. In the scenario where
|
|
1690
|
+
the batchmatmul model introduces allreduce in parallel, if the subsequent nodes are stridedslice
|
|
1691
|
+
operator with model parallel, allreduce will be optimized as reducescatter according to the identified
|
|
1692
|
+
patterns. Typical used in MoE module with groupwise alltoall. Default: ``False``.
|
|
1693
|
+
- enable_interleave_split_concat_branch (bool): Enable communication computation parallel optimization
|
|
1694
|
+
for branches formed by split and concat operators with ``enable_interleave`` attribute. It is typical
|
|
1695
|
+
used in MoE parallel scenario. After splitting the input data, each slice of data is processed by the
|
|
1696
|
+
MoE module, and then the branch results are concatenated. When the optimization is enable,
|
|
1697
|
+
communication and computation will be executed in parallel between branches. Default: ``False``.
|
|
1602
1698
|
- host_scheduling_max_threshold(int): The max threshold to control whether the dynamic shape process is
|
|
1603
1699
|
used when run the static graph, the default value is 0. When the number of operations in the static graph
|
|
1604
1700
|
is less than the max threshold, this graph will be executed in dynamic shape process. In large model
|
|
@@ -1698,12 +1794,13 @@ def set_context(**kwargs):
|
|
|
1698
1794
|
|
|
1699
1795
|
- jit_level (str): Used to control the compilation optimization level. Default: ``""`` , The framework
|
|
1700
1796
|
automatically selects the execution method based on product, Altas training product is O2, and all other
|
|
1701
|
-
products are O0. The
|
|
1797
|
+
products are O0. In addition, The option of the dynamic shape must be O0 or O1, O2 is not supported.
|
|
1798
|
+
The value range is as follows:
|
|
1702
1799
|
|
|
1703
1800
|
- ``"O0"``: Except for optimizations that may affect functionality, all other optimizations are turned
|
|
1704
1801
|
off, adopt KernelByKernel execution mode.
|
|
1705
1802
|
- ``"O1"``: Using commonly used optimizations and automatic operator fusion optimizations,
|
|
1706
|
-
adopt KernelByKernel execution mode.
|
|
1803
|
+
adopt KernelByKernel execution mode. This optimization level is experimental and is being improved.
|
|
1707
1804
|
- ``"O2"``: Ultimate performance optimization, adopt Sink execution mode.
|
|
1708
1805
|
|
|
1709
1806
|
- infer_boost (str): Used to control the infer mode. Default: ``"off"`` . The value range is as follows:
|
|
@@ -1711,6 +1808,18 @@ def set_context(**kwargs):
|
|
|
1711
1808
|
- ``"on"``: Enable infer mode, get better infer performance.
|
|
1712
1809
|
- ``"off"``: Disable infer mode, use forward to infer, performance is not good.
|
|
1713
1810
|
|
|
1811
|
+
exec_order (str): Set the sorting method for operator execution in GRAPH_MODE Currently, only three sorting
|
|
1812
|
+
methods are supported: bfs and gpto, and the default method is bfs.
|
|
1813
|
+
|
|
1814
|
+
- ``"bfs"``: The default sorting method, breadth priority, good communication masking, relatively good
|
|
1815
|
+
performance.
|
|
1816
|
+
- ``"dfs"``: An optional sorting method, depth-first sorting. The performance is relatively worse than that
|
|
1817
|
+
of bfs execution order, but it occupies less memory. It is recommended to try dfs in scenarios where other
|
|
1818
|
+
execution orders run out of memory (OOM).
|
|
1819
|
+
- ``"gpto"``: An optional sorting method. This method combines multiple execution orders and selects a
|
|
1820
|
+
method with relatively good performance. There may be some performance gains in scenarios with multiple
|
|
1821
|
+
replicas running in parallel.
|
|
1822
|
+
|
|
1714
1823
|
Raises:
|
|
1715
1824
|
ValueError: If input key is not an attribute in context.
|
|
1716
1825
|
|
|
@@ -1753,6 +1862,7 @@ def set_context(**kwargs):
|
|
|
1753
1862
|
>>> ms.set_context(gpu_config={"conv_fprop_algo": "performance", "conv_allow_tf32": True,
|
|
1754
1863
|
... "matmul_allow_tf32": True})
|
|
1755
1864
|
>>> ms.set_context(jit_config={"jit_level": "O0"})
|
|
1865
|
+
>>> ms.set_context(exec_order="gpto")
|
|
1756
1866
|
"""
|
|
1757
1867
|
ctx = _context()
|
|
1758
1868
|
# set device target first
|
|
@@ -1790,12 +1900,12 @@ def set_context(**kwargs):
|
|
|
1790
1900
|
continue
|
|
1791
1901
|
if not _check_target_specific_cfgs(device, key):
|
|
1792
1902
|
continue
|
|
1793
|
-
if hasattr(ctx, key):
|
|
1794
|
-
setattr(ctx, key, value)
|
|
1795
|
-
continue
|
|
1796
1903
|
if key in ctx.setters:
|
|
1797
1904
|
ctx.setters[key](ctx, value)
|
|
1798
1905
|
continue
|
|
1906
|
+
if hasattr(ctx, key):
|
|
1907
|
+
setattr(ctx, key, value)
|
|
1908
|
+
continue
|
|
1799
1909
|
# enum variables beginning with '_' are for internal use
|
|
1800
1910
|
if key in ms_ctx_param.__members__ and key[0] != '_':
|
|
1801
1911
|
ctx.set_param(ms_ctx_param.__members__[key], value)
|
mindspore/dataset/__init__.py
CHANGED
|
@@ -21,7 +21,7 @@ Besides, this module provides APIs to sample data while loading.
|
|
|
21
21
|
|
|
22
22
|
We can enable cache in most of the dataset with its key arguments 'cache'. Please notice that cache is not supported
|
|
23
23
|
on Windows platform yet. Do not use it while loading and processing data on Windows. More introductions and limitations
|
|
24
|
-
can refer `Single-Node Tensor Cache <https://www.mindspore.cn/
|
|
24
|
+
can refer `Single-Node Tensor Cache <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
25
25
|
|
|
26
26
|
Common imported modules in corresponding API examples are as follows:
|
|
27
27
|
|
|
@@ -43,7 +43,7 @@ The data transform operation can be executed in the data processing pipeline or
|
|
|
43
43
|
`introduction to data processing pipeline <https://www.mindspore.cn/docs/en/master/api_python/
|
|
44
44
|
mindspore.dataset.html#introduction-to-data-processing-pipeline>`_ .
|
|
45
45
|
- Eager mode is more like a function call to process data. Examples refer to
|
|
46
|
-
`Lightweight Data Processing <https://www.mindspore.cn/
|
|
46
|
+
`Lightweight Data Processing <https://www.mindspore.cn/docs/en/master/model_train/dataset/eager.html>`_ .
|
|
47
47
|
"""
|
|
48
48
|
from __future__ import absolute_import
|
|
49
49
|
|
mindspore/dataset/core/config.py
CHANGED
|
@@ -32,6 +32,8 @@ import mindspore._c_dataengine as cde
|
|
|
32
32
|
from mindspore import log as logger
|
|
33
33
|
from mindspore.dataset.core.validator_helpers import replace_none, type_check
|
|
34
34
|
from mindspore.dataset.debug import DebugHook, PrintMetaDataHook
|
|
35
|
+
from mindspore.dataset.core.validator_helpers import check_independent_mode
|
|
36
|
+
|
|
35
37
|
|
|
36
38
|
__all__ = ['set_sending_batches', 'load', '_init_device_info',
|
|
37
39
|
'set_seed', 'get_seed',
|
|
@@ -544,6 +546,8 @@ def set_enable_autotune(enable, filepath_prefix=None):
|
|
|
544
546
|
if not isinstance(enable, bool):
|
|
545
547
|
raise TypeError("enable must be of type bool.")
|
|
546
548
|
|
|
549
|
+
check_independent_mode("Dataset AutoTune", enable)
|
|
550
|
+
|
|
547
551
|
save_autoconfig = bool(enable and filepath_prefix is not None)
|
|
548
552
|
|
|
549
553
|
if filepath_prefix and not isinstance(filepath_prefix, str):
|
|
@@ -728,6 +732,9 @@ def set_auto_offload(offload):
|
|
|
728
732
|
"""
|
|
729
733
|
if not isinstance(offload, bool):
|
|
730
734
|
raise TypeError("offload must be a bool dtype")
|
|
735
|
+
|
|
736
|
+
check_independent_mode("Dataset Offload", offload)
|
|
737
|
+
|
|
731
738
|
_config.set_auto_offload(offload)
|
|
732
739
|
|
|
733
740
|
|
|
@@ -766,6 +766,13 @@ def check_dict(data, key_type, value_type, param_name):
|
|
|
766
766
|
.format(key, param_name, value_type, type(value)))
|
|
767
767
|
|
|
768
768
|
|
|
769
|
+
def check_independent_mode(feature_name, condition=True):
|
|
770
|
+
# todo in Dataset Independent mode
|
|
771
|
+
independent_process_env = os.getenv("MS_INDEPENDENT_DATASET", None)
|
|
772
|
+
if condition and independent_process_env and independent_process_env.strip() in ['True', 'true']:
|
|
773
|
+
raise RuntimeError(f"{feature_name} is not supported in Dataset Independent mode.")
|
|
774
|
+
|
|
775
|
+
|
|
769
776
|
def check_feature_shape(data, shape, param_name):
|
|
770
777
|
if isinstance(data, dict):
|
|
771
778
|
for key, value in data.items():
|
|
@@ -27,7 +27,7 @@ class DatasetCache:
|
|
|
27
27
|
A client to interface with tensor caching service.
|
|
28
28
|
|
|
29
29
|
For details, please check
|
|
30
|
-
`Tutorial <https://www.mindspore.cn/
|
|
30
|
+
`Tutorial <https://www.mindspore.cn/docs/en/master/model_train/dataset/cache.html>`_ .
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
33
|
session_id (int): A user assigned session id for the current pipeline.
|