mindspore 2.2.14__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +8 -5
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +124 -25
- mindspore/_extends/builtin_operations.py +2 -1
- mindspore/_extends/graph_kernel/model/graph_parallel.py +16 -6
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +3 -16
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +16 -4
- mindspore/_extends/parallel_compile/akg_compiler/compiler.py +1 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +2 -1
- mindspore/_extends/parallel_compile/akg_compiler/util.py +5 -2
- mindspore/_extends/parse/__init__.py +18 -14
- mindspore/_extends/parse/compile_config.py +299 -0
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +182 -68
- mindspore/_extends/parse/resources.py +45 -14
- mindspore/_extends/parse/standard_method.py +192 -252
- mindspore/{ops/_op_impl/tbe/atomic_addr_clean.py → _extends/pijit/__init__.py} +6 -16
- mindspore/_extends/pijit/pijit_func_white_list.py +669 -0
- mindspore/_extends/remote/kernel_build_server.py +2 -0
- mindspore/_profiler.py +30 -0
- mindspore/amp.py +67 -26
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +2 -2
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/boost/less_batch_normalization.py +9 -6
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +20 -7
- mindspore/common/_jit_fallback_utils.py +2 -3
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_adapter.py +7 -0
- mindspore/common/_register_for_recompute.py +48 -0
- mindspore/common/_register_for_tensor.py +10 -10
- mindspore/common/_stub_tensor.py +7 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/_utils.py +5 -17
- mindspore/common/api.py +449 -129
- mindspore/common/auto_dynamic_shape.py +27 -14
- mindspore/common/dtype.py +17 -10
- mindspore/common/dump.py +8 -11
- mindspore/common/file_system.py +48 -0
- mindspore/common/generator.py +254 -0
- mindspore/common/hook_handle.py +65 -30
- mindspore/common/initializer.py +1 -1
- mindspore/common/jit_config.py +34 -14
- mindspore/common/lazy_inline.py +72 -19
- mindspore/common/mindir_util.py +12 -2
- mindspore/common/mutable.py +79 -14
- mindspore/common/no_inline.py +54 -0
- mindspore/common/np_dtype.py +25 -0
- mindspore/common/parameter.py +73 -21
- mindspore/common/recompute.py +292 -0
- mindspore/common/seed.py +9 -9
- mindspore/common/sparse_tensor.py +276 -24
- mindspore/common/symbol.py +122 -0
- mindspore/common/tensor.py +668 -514
- mindspore/communication/__init__.py +6 -11
- mindspore/communication/_comm_helper.py +43 -3
- mindspore/communication/comm_func.py +1395 -0
- mindspore/communication/management.py +117 -104
- mindspore/config/op_info.config +22 -54
- mindspore/context.py +455 -71
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +6 -6
- mindspore/dataset/audio/transforms.py +711 -158
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +2 -2
- mindspore/dataset/engine/datasets.py +201 -116
- mindspore/dataset/engine/datasets_audio.py +14 -14
- mindspore/dataset/engine/datasets_standard_format.py +83 -3
- mindspore/dataset/engine/datasets_text.py +39 -39
- mindspore/dataset/engine/datasets_user_defined.py +230 -141
- mindspore/dataset/engine/datasets_vision.py +78 -74
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/offload.py +5 -7
- mindspore/dataset/engine/queue.py +138 -66
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +41 -15
- mindspore/dataset/text/__init__.py +2 -5
- mindspore/dataset/text/transforms.py +408 -121
- mindspore/dataset/text/utils.py +9 -9
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/transforms/transforms.py +261 -76
- mindspore/dataset/utils/browse_dataset.py +9 -9
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +7 -10
- mindspore/dataset/vision/c_transforms.py +10 -10
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +2844 -549
- mindspore/dataset/vision/utils.py +161 -10
- mindspore/dataset/vision/validators.py +16 -3
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/{rewrite/ast_creator_register.py → experimental/es/__init__.py} +5 -20
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/experimental/es/embedding_service_layer.py +581 -0
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/experimental/llm_boost/atb/__init__.py +23 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/__init__.py +12 -2
- mindspore/experimental/optim/adadelta.py +161 -0
- mindspore/experimental/optim/adagrad.py +168 -0
- mindspore/experimental/optim/adam.py +35 -34
- mindspore/experimental/optim/adamax.py +170 -0
- mindspore/experimental/optim/adamw.py +124 -15
- mindspore/experimental/optim/asgd.py +153 -0
- mindspore/experimental/optim/lr_scheduler.py +66 -121
- mindspore/experimental/optim/nadam.py +157 -0
- mindspore/experimental/optim/optimizer.py +18 -8
- mindspore/experimental/optim/radam.py +194 -0
- mindspore/experimental/optim/rmsprop.py +154 -0
- mindspore/experimental/optim/rprop.py +164 -0
- mindspore/experimental/optim/sgd.py +28 -19
- mindspore/hal/__init__.py +40 -0
- mindspore/hal/_ascend.py +57 -0
- mindspore/hal/_base.py +57 -0
- mindspore/hal/_cpu.py +56 -0
- mindspore/hal/_gpu.py +57 -0
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/device.py +356 -0
- mindspore/hal/event.py +179 -0
- mindspore/hal/memory.py +326 -0
- mindspore/hal/stream.py +357 -0
- mindspore/include/api/data_type.h +2 -2
- mindspore/include/api/dual_abi_helper.h +16 -3
- mindspore/include/api/model.h +4 -3
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/status.h +14 -0
- mindspore/include/api/types.h +10 -10
- mindspore/include/c_api/model_c.h +173 -0
- mindspore/include/c_api/types_c.h +19 -0
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +3 -5
- mindspore/include/dataset/vision.h +58 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +3 -3
- mindspore/mindrecord/__init__.py +5 -1
- mindspore/mindrecord/config.py +809 -0
- mindspore/mindrecord/filereader.py +25 -0
- mindspore/mindrecord/filewriter.py +138 -103
- mindspore/mindrecord/mindpage.py +40 -6
- mindspore/mindrecord/shardutils.py +3 -2
- mindspore/mindrecord/shardwriter.py +7 -0
- mindspore/mindrecord/tools/cifar100_to_mr.py +8 -13
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -15
- mindspore/mindrecord/tools/csv_to_mr.py +4 -9
- mindspore/mindrecord/tools/imagenet_to_mr.py +3 -8
- mindspore/mindrecord/tools/mnist_to_mr.py +7 -12
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +1586 -0
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/{rewrite/ast_transformers → mint/linalg}/__init__.py +9 -4
- mindspore/mint/nn/__init__.py +757 -0
- mindspore/mint/nn/functional.py +679 -0
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/__init__.py +24 -0
- mindspore/mint/optim/adamw.py +206 -0
- mindspore/mint/special/__init__.py +63 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +73 -0
- mindspore/nn/cell.py +461 -323
- mindspore/nn/dynamic_lr.py +2 -2
- mindspore/nn/layer/activation.py +292 -135
- mindspore/nn/layer/basic.py +288 -83
- mindspore/nn/layer/channel_shuffle.py +3 -16
- mindspore/nn/layer/container.py +3 -3
- mindspore/nn/layer/conv.py +75 -66
- mindspore/nn/layer/embedding.py +221 -45
- mindspore/nn/layer/image.py +4 -7
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +150 -68
- mindspore/nn/layer/padding.py +64 -87
- mindspore/nn/layer/pooling.py +175 -12
- mindspore/nn/layer/rnn_cells.py +6 -16
- mindspore/nn/layer/rnns.py +6 -5
- mindspore/nn/layer/thor_layer.py +1 -2
- mindspore/nn/layer/timedistributed.py +1 -1
- mindspore/nn/layer/transformer.py +55 -53
- mindspore/nn/learning_rate_schedule.py +6 -5
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +145 -88
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/ada_grad.py +4 -2
- mindspore/nn/optim/adadelta.py +4 -2
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +102 -181
- mindspore/nn/optim/adamax.py +4 -2
- mindspore/nn/optim/adasum.py +3 -3
- mindspore/nn/optim/asgd.py +4 -2
- mindspore/nn/optim/ftrl.py +31 -61
- mindspore/nn/optim/lamb.py +5 -3
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +6 -4
- mindspore/nn/optim/momentum.py +13 -25
- mindspore/nn/optim/optimizer.py +6 -3
- mindspore/nn/optim/proximal_ada_grad.py +4 -2
- mindspore/nn/optim/rmsprop.py +9 -3
- mindspore/nn/optim/rprop.py +4 -2
- mindspore/nn/optim/sgd.py +5 -3
- mindspore/nn/optim/tft_wrapper.py +127 -0
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/custom_ops.py +2 -2
- mindspore/nn/probability/distribution/beta.py +2 -2
- mindspore/nn/probability/distribution/categorical.py +4 -6
- mindspore/nn/probability/distribution/cauchy.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +2 -2
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +13 -1
- mindspore/nn/wrap/__init__.py +2 -1
- mindspore/nn/wrap/cell_wrapper.py +46 -12
- mindspore/nn/wrap/grad_reducer.py +148 -8
- mindspore/nn/wrap/loss_scale.py +44 -7
- mindspore/numpy/__init__.py +2 -0
- mindspore/numpy/array_creations.py +67 -68
- mindspore/numpy/array_ops.py +70 -66
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/fft.py +966 -0
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +147 -152
- mindspore/numpy/utils.py +3 -0
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +9 -6
- mindspore/ops/_grad_experimental/grad_array_ops.py +4 -129
- mindspore/ops/_grad_experimental/grad_comm_ops.py +135 -36
- mindspore/ops/_grad_experimental/grad_math_ops.py +61 -298
- mindspore/ops/_grad_experimental/grad_nn_ops.py +0 -53
- mindspore/ops/_grad_experimental/grad_quant_ops.py +3 -3
- mindspore/ops/_grad_experimental/grad_sparse.py +1 -1
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/__init__.py +0 -1
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -0
- mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +1 -1
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +1 -3
- mindspore/ops/_op_impl/aicpu/poisson.py +2 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -3
- mindspore/ops/_op_impl/cpu/adam.py +2 -2
- mindspore/ops/_op_impl/cpu/adam_weight_decay.py +3 -2
- mindspore/ops/_op_impl/cpu/maximum_grad.py +16 -14
- mindspore/ops/_op_impl/cpu/minimum_grad.py +8 -0
- mindspore/ops/_vmap/vmap_array_ops.py +162 -101
- mindspore/ops/_vmap/vmap_base.py +8 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +95 -9
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +143 -58
- mindspore/ops/_vmap/vmap_image_ops.py +70 -13
- mindspore/ops/_vmap/vmap_math_ops.py +147 -59
- mindspore/ops/_vmap/vmap_nn_ops.py +292 -117
- mindspore/ops/_vmap/vmap_other_ops.py +1 -1
- mindspore/ops/auto_generate/__init__.py +31 -0
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +309 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +252 -0
- mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
- mindspore/ops/auto_generate/gen_extend_func.py +1701 -0
- mindspore/ops/auto_generate/gen_ops_def.py +8482 -0
- mindspore/ops/auto_generate/gen_ops_prim.py +16704 -0
- mindspore/ops/auto_generate/pyboost_inner_prim.py +549 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +201 -66
- mindspore/ops/composite/math_ops.py +10 -49
- mindspore/ops/composite/multitype_ops/_compile_utils.py +192 -618
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +25 -134
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +8 -2
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +9 -3
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +8 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/pow_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +32 -21
- mindspore/ops/composite/multitype_ops/sub_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +6 -3
- mindspore/ops/deprecated.py +14 -3
- mindspore/ops/function/__init__.py +53 -11
- mindspore/ops/function/array_func.py +1269 -1821
- mindspore/ops/function/clip_func.py +19 -31
- mindspore/ops/function/debug_func.py +114 -5
- mindspore/ops/function/fft_func.py +44 -0
- mindspore/ops/function/grad/grad_func.py +30 -22
- mindspore/ops/function/image_func.py +27 -21
- mindspore/ops/function/linalg_func.py +35 -68
- mindspore/ops/function/math_func.py +1170 -2697
- mindspore/ops/function/nn_func.py +2116 -1128
- mindspore/ops/function/other_func.py +8 -8
- mindspore/ops/function/parameter_func.py +5 -93
- mindspore/ops/function/random_func.py +435 -113
- mindspore/ops/function/reshard_func.py +104 -0
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/function/sparse_unary_func.py +9 -16
- mindspore/ops/function/spectral_func.py +1 -1
- mindspore/ops/function/vmap_func.py +16 -15
- mindspore/ops/functional.py +355 -346
- mindspore/ops/op_info_register.py +18 -45
- mindspore/ops/operations/__init__.py +38 -24
- mindspore/ops/operations/_grad_ops.py +21 -927
- mindspore/ops/operations/_infer_ops.py +19 -0
- mindspore/ops/operations/_inner_ops.py +173 -607
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_scalar_ops.py +5 -480
- mindspore/ops/operations/_sequence_ops.py +6 -36
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +106 -2837
- mindspore/ops/operations/comm_ops.py +799 -127
- mindspore/ops/operations/custom_ops.py +124 -119
- mindspore/ops/operations/debug_ops.py +142 -41
- mindspore/ops/operations/image_ops.py +1 -217
- mindspore/ops/operations/inner_ops.py +5 -40
- mindspore/ops/operations/linalg_ops.py +1 -49
- mindspore/ops/operations/manually_defined/__init__.py +24 -0
- mindspore/ops/operations/manually_defined/_inner.py +73 -0
- mindspore/ops/operations/manually_defined/ops_def.py +2271 -0
- mindspore/ops/operations/math_ops.py +666 -4972
- mindspore/ops/operations/nn_ops.py +205 -2213
- mindspore/ops/operations/other_ops.py +60 -49
- mindspore/ops/operations/random_ops.py +50 -54
- mindspore/ops/operations/reshard_ops.py +53 -0
- mindspore/ops/operations/sparse_ops.py +4 -4
- mindspore/ops/primitive.py +216 -103
- mindspore/ops_generate/__init__.py +27 -0
- mindspore/ops_generate/arg_dtype_cast.py +252 -0
- mindspore/ops_generate/arg_handler.py +197 -0
- mindspore/ops_generate/gen_aclnn_implement.py +263 -0
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +1099 -0
- mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
- mindspore/ops_generate/gen_pyboost_func.py +1052 -0
- mindspore/ops_generate/gen_utils.py +209 -0
- mindspore/ops_generate/op_proto.py +145 -0
- mindspore/ops_generate/pyboost_utils.py +367 -0
- mindspore/ops_generate/template.py +261 -0
- mindspore/parallel/__init__.py +8 -4
- mindspore/parallel/_auto_parallel_context.py +100 -10
- mindspore/parallel/_cell_wrapper.py +99 -9
- mindspore/parallel/_cost_model_context.py +1 -1
- mindspore/parallel/_dp_allreduce_fusion.py +159 -159
- mindspore/parallel/_parallel_serialization.py +67 -23
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +99 -22
- mindspore/parallel/_transformer/__init__.py +1 -1
- mindspore/parallel/_transformer/layers.py +1 -1
- mindspore/parallel/_transformer/loss.py +1 -1
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/op_parallel_config.py +1 -1
- mindspore/parallel/_transformer/transformer.py +2 -2
- mindspore/parallel/_utils.py +173 -6
- mindspore/parallel/algo_parameter_config.py +8 -10
- mindspore/parallel/checkpoint_transform.py +204 -38
- mindspore/parallel/cluster/__init__.py +15 -0
- mindspore/parallel/cluster/process_entity/__init__.py +18 -0
- mindspore/parallel/cluster/process_entity/_api.py +352 -0
- mindspore/parallel/cluster/process_entity/_utils.py +101 -0
- mindspore/parallel/cluster/run.py +136 -0
- mindspore/parallel/mpi/__init__.py +1 -1
- mindspore/parallel/mpi/_mpi_config.py +1 -1
- mindspore/parallel/parameter_broadcast.py +151 -0
- mindspore/parallel/shard.py +279 -37
- mindspore/parallel/transform_safetensors.py +993 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +4 -2
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/process_pool.py +41 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/singleton.py +28 -0
- mindspore/profiler/common/util.py +153 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +18 -20
- mindspore/{_extends/parallel_compile/tbe_compiler → profiler/parser/ascend_analysis}/__init__.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +71 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +180 -0
- mindspore/profiler/parser/ascend_analysis/function_event.py +185 -0
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +136 -0
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +131 -0
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +104 -0
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +123 -0
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +75 -0
- mindspore/profiler/parser/ascend_cluster_generator.py +14 -9
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -1
- mindspore/profiler/parser/ascend_flops_generator.py +20 -4
- mindspore/profiler/parser/ascend_hccl_generator.py +29 -278
- mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
- mindspore/profiler/parser/ascend_memory_generator.py +185 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +148 -146
- mindspore/profiler/parser/ascend_msprof_generator.py +73 -283
- mindspore/profiler/parser/ascend_op_generator.py +92 -42
- mindspore/profiler/parser/ascend_timeline_generator.py +298 -133
- mindspore/profiler/parser/base_timeline_generator.py +25 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +4 -393
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/integrator.py +3 -1
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/minddata_parser.py +72 -3
- mindspore/profiler/parser/profiler_info.py +94 -7
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +631 -508
- mindspore/rewrite/__init__.py +2 -14
- mindspore/rewrite/api/node.py +122 -36
- mindspore/rewrite/api/pattern_engine.py +2 -3
- mindspore/rewrite/api/scoped_value.py +16 -15
- mindspore/rewrite/api/symbol_tree.py +45 -29
- mindspore/rewrite/ast_helpers/__init__.py +3 -6
- mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +48 -0
- mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +160 -92
- mindspore/rewrite/common/__init__.py +1 -2
- mindspore/rewrite/common/config.py +24 -0
- mindspore/rewrite/common/{rewrite_elog.py → error_log.py} +39 -39
- mindspore/rewrite/{namer.py → common/namer.py} +63 -18
- mindspore/rewrite/common/namespace.py +118 -0
- mindspore/rewrite/node/__init__.py +5 -5
- mindspore/rewrite/node/call_function.py +23 -7
- mindspore/rewrite/node/cell_container.py +7 -3
- mindspore/rewrite/node/control_flow.py +53 -28
- mindspore/rewrite/node/node.py +212 -196
- mindspore/rewrite/node/node_manager.py +51 -22
- mindspore/rewrite/node/node_topological_manager.py +3 -23
- mindspore/rewrite/parsers/__init__.py +12 -0
- mindspore/rewrite/parsers/arguments_parser.py +8 -9
- mindspore/rewrite/parsers/assign_parser.py +637 -413
- mindspore/rewrite/parsers/attribute_parser.py +3 -4
- mindspore/rewrite/parsers/class_def_parser.py +115 -148
- mindspore/rewrite/parsers/constant_parser.py +5 -5
- mindspore/rewrite/parsers/container_parser.py +4 -6
- mindspore/rewrite/parsers/expr_parser.py +55 -0
- mindspore/rewrite/parsers/for_parser.py +31 -98
- mindspore/rewrite/parsers/function_def_parser.py +13 -5
- mindspore/rewrite/parsers/if_parser.py +28 -10
- mindspore/rewrite/parsers/module_parser.py +8 -182
- mindspore/rewrite/parsers/parser.py +1 -5
- mindspore/rewrite/parsers/parser_register.py +1 -1
- mindspore/rewrite/parsers/return_parser.py +5 -10
- mindspore/rewrite/parsers/while_parser.py +59 -0
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree/__init__.py +20 -0
- mindspore/rewrite/{symbol_tree.py → symbol_tree/symbol_tree.py} +705 -186
- mindspore/rewrite/{symbol_tree_builder.py → symbol_tree/symbol_tree_builder.py} +8 -8
- mindspore/rewrite/{symbol_tree_dumper.py → symbol_tree/symbol_tree_dumper.py} +4 -4
- mindspore/run_check/_check_version.py +40 -115
- mindspore/run_check/run_check.py +1 -1
- mindspore/safeguard/rewrite_obfuscation.py +597 -263
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +7 -5
- mindspore/train/_utils.py +204 -4
- mindspore/train/amp.py +335 -295
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/__init__.py +5 -2
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +220 -43
- mindspore/train/callback/_cluster_monitor.py +201 -0
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_flops_collector.py +239 -0
- mindspore/train/callback/_landscape.py +15 -9
- mindspore/train/callback/_loss_monitor.py +5 -5
- mindspore/train/callback/_on_request_exit.py +136 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
- mindspore/train/callback/_summary_collector.py +12 -12
- mindspore/train/callback/_tft_register.py +352 -0
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/data_sink.py +6 -5
- mindspore/train/dataset_helper.py +66 -23
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/accuracy.py +7 -7
- mindspore/train/metrics/confusion_matrix.py +8 -6
- mindspore/train/metrics/cosine_similarity.py +6 -4
- mindspore/train/metrics/error.py +2 -2
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/perplexity.py +2 -1
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/metrics/topk.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +382 -76
- mindspore/train/serialization.py +787 -288
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +51 -28
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/METADATA +8 -4
- mindspore-2.4.0.dist-info/RECORD +1406 -0
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +1 -0
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +0 -662
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +0 -377
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +0 -201
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +0 -515
- mindspore/gen_ops.py +0 -273
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -282
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/layer/flash_attention.py +0 -189
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/_op_impl/cpu/concat.py +0 -39
- mindspore/ops/_op_impl/cpu/tensor_shape.py +0 -42
- mindspore/ops/_op_impl/tbe/__init__.py +0 -47
- mindspore/ops/_op_impl/tbe/abs.py +0 -38
- mindspore/ops/_op_impl/tbe/abs_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/abs_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/abs_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/accumulate_n_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/accumulate_n_v2_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/acos.py +0 -37
- mindspore/ops/_op_impl/tbe/acos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acos_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acos_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/acosh.py +0 -37
- mindspore/ops/_op_impl/tbe/acosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acosh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acosh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_max_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_min_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/acts_ulq.py +0 -45
- mindspore/ops/_op_impl/tbe/acts_ulq_input_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/adam_apply_one.py +0 -50
- mindspore/ops/_op_impl/tbe/adam_apply_one_assign.py +0 -53
- mindspore/ops/_op_impl/tbe/adam_apply_one_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_assign.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/adaptive_max_pool2d.py +0 -37
- mindspore/ops/_op_impl/tbe/add.py +0 -42
- mindspore/ops/_op_impl/tbe/add_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/add_n.py +0 -39
- mindspore/ops/_op_impl/tbe/add_n_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/addcdiv.py +0 -41
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/addcmul.py +0 -43
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_ada_max.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_ada_max_ds.py +0 -69
- mindspore/ops/_op_impl/tbe/apply_adadelta.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_adadelta_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_adagrad_d_a.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_adam.py +0 -79
- mindspore/ops/_op_impl/tbe/apply_adam_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad.py +0 -60
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad_ds.py +0 -61
- mindspore/ops/_op_impl/tbe/apply_add_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_add_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop.py +0 -77
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop_ds.py +0 -78
- mindspore/ops/_op_impl/tbe/apply_ftrl.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_ftrl_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_gradient_descent.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_gradient_descent_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/apply_keras_momentum.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_momentum.py +0 -64
- mindspore/ops/_op_impl/tbe/apply_momentum_ds.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +0 -57
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py +0 -54
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_rms_prop.py +0 -52
- mindspore/ops/_op_impl/tbe/approximate_equal.py +0 -39
- mindspore/ops/_op_impl/tbe/approximate_equal_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_max.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/arg_min.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_v2_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_min_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/asin.py +0 -37
- mindspore/ops/_op_impl/tbe/asin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asin_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asin_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/asinh.py +0 -37
- mindspore/ops/_op_impl/tbe/asinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asinh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asinh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/assign.py +0 -79
- mindspore/ops/_op_impl/tbe/assign_add.py +0 -59
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +0 -60
- mindspore/ops/_op_impl/tbe/assign_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/assign_sub.py +0 -55
- mindspore/ops/_op_impl/tbe/assign_sub_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/atan.py +0 -37
- mindspore/ops/_op_impl/tbe/atan2.py +0 -38
- mindspore/ops/_op_impl/tbe/atan2_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/atan_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/atan_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/atan_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/atanh.py +0 -37
- mindspore/ops/_op_impl/tbe/atanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/avg_pool.py +0 -43
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +0 -45
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell.py +0 -57
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad.py +0 -50
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -51
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_input_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_weight_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul.py +0 -42
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul_v2.py +0 -47
- mindspore/ops/_op_impl/tbe/batch_to_space.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/batchnorm.py +0 -58
- mindspore/ops/_op_impl/tbe/batchnorm_grad.py +0 -58
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +0 -42
- mindspore/ops/_op_impl/tbe/bessel_i0e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i0e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bessel_i1e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i1e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +0 -53
- mindspore/ops/_op_impl/tbe/binary_cross_entropy.py +0 -39
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bitwise_and.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_and_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_or.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_or_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_xor.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_xor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_infer.py +0 -43
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_inference.py +0 -50
- mindspore/ops/_op_impl/tbe/bn_training_reduce.py +0 -38
- mindspore/ops/_op_impl/tbe/bn_training_reduce_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -52
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -53
- mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/bn_training_update_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/bn_training_update_v3.py +0 -51
- mindspore/ops/_op_impl/tbe/bounding_box_decode.py +0 -41
- mindspore/ops/_op_impl/tbe/bounding_box_decode_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/bounding_box_encode.py +0 -38
- mindspore/ops/_op_impl/tbe/broadcast_to.py +0 -40
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cast.py +0 -55
- mindspore/ops/_op_impl/tbe/cast_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/cdist.py +0 -38
- mindspore/ops/_op_impl/tbe/cdist_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/ceil.py +0 -37
- mindspore/ops/_op_impl/tbe/ceil_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/celu.py +0 -39
- mindspore/ops/_op_impl/tbe/centralization.py +0 -39
- mindspore/ops/_op_impl/tbe/check_valid.py +0 -38
- mindspore/ops/_op_impl/tbe/check_valid_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/clip_by_value.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_value_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/concat.py +0 -40
- mindspore/ops/_op_impl/tbe/concat_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/confusion_matrix.py +0 -63
- mindspore/ops/_op_impl/tbe/confusion_mul_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/confusion_softmax_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/confusion_transpose_d.py +0 -39
- mindspore/ops/_op_impl/tbe/conv2d.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/conv2d_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_transpose.py +0 -48
- mindspore/ops/_op_impl/tbe/conv3d.py +0 -45
- mindspore/ops/_op_impl/tbe/conv3d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_transpose.py +0 -47
- mindspore/ops/_op_impl/tbe/conv3d_transpose_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/cos.py +0 -37
- mindspore/ops/_op_impl/tbe/cos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/cosh.py +0 -37
- mindspore/ops/_op_impl/tbe/cosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -42
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/cum_sum.py +0 -42
- mindspore/ops/_op_impl/tbe/cum_sum_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cummin.py +0 -41
- mindspore/ops/_op_impl/tbe/cumprod.py +0 -42
- mindspore/ops/_op_impl/tbe/data_format_dim_map.py +0 -38
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +0 -45
- mindspore/ops/_op_impl/tbe/deformable_offsets_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/depth_to_space_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +0 -44
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_filter.py +0 -41
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_input.py +0 -41
- mindspore/ops/_op_impl/tbe/diag.py +0 -38
- mindspore/ops/_op_impl/tbe/diag_part.py +0 -38
- mindspore/ops/_op_impl/tbe/dilation.py +0 -40
- mindspore/ops/_op_impl/tbe/div.py +0 -41
- mindspore/ops/_op_impl/tbe/div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/div_no_nan.py +0 -41
- mindspore/ops/_op_impl/tbe/div_no_nan_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/dropout_do_mask.py +0 -38
- mindspore/ops/_op_impl/tbe/dropout_do_mask_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +0 -34
- mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +0 -95
- mindspore/ops/_op_impl/tbe/dynamic_rnn.py +0 -82
- mindspore/ops/_op_impl/tbe/elu.py +0 -38
- mindspore/ops/_op_impl/tbe/elu_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/elu_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/elu_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/equal.py +0 -42
- mindspore/ops/_op_impl/tbe/equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/erf.py +0 -37
- mindspore/ops/_op_impl/tbe/erf_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfc.py +0 -37
- mindspore/ops/_op_impl/tbe/erfc_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfinv.py +0 -36
- mindspore/ops/_op_impl/tbe/exp.py +0 -40
- mindspore/ops/_op_impl/tbe/exp_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/expand_dims.py +0 -38
- mindspore/ops/_op_impl/tbe/expm1.py +0 -37
- mindspore/ops/_op_impl/tbe/expm1_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/extract_image_patches.py +0 -41
- mindspore/ops/_op_impl/tbe/extract_volume_patches.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fast_gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/fast_gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/fast_gelu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/fill.py +0 -56
- mindspore/ops/_op_impl/tbe/fill_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/flatten.py +0 -48
- mindspore/ops/_op_impl/tbe/floor.py +0 -37
- mindspore/ops/_op_impl/tbe/floor_div.py +0 -41
- mindspore/ops/_op_impl/tbe/floor_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/floor_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/floor_mod.py +0 -39
- mindspore/ops/_op_impl/tbe/floor_mod_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/fused_dbn_dw.py +0 -52
- mindspore/ops/_op_impl/tbe/fused_mul_add.py +0 -38
- mindspore/ops/_op_impl/tbe/fused_mul_add_n.py +0 -48
- mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py +0 -53
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum.py +0 -57
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py +0 -67
- mindspore/ops/_op_impl/tbe/gather_nd.py +0 -52
- mindspore/ops/_op_impl/tbe/gather_nd_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/gather_v2_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/gelu_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/gelu_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/ger.py +0 -43
- mindspore/ops/_op_impl/tbe/ger_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/greater.py +0 -43
- mindspore/ops/_op_impl/tbe/greater_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/greater_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad_cell.py +0 -52
- mindspore/ops/_op_impl/tbe/hard_swish.py +0 -37
- mindspore/ops/_op_impl/tbe/hard_swish_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/hard_swish_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/hard_swish_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/histogram_fixed_width.py +0 -40
- mindspore/ops/_op_impl/tbe/hshrink.py +0 -33
- mindspore/ops/_op_impl/tbe/hshrink_grad.py +0 -37
- mindspore/ops/_op_impl/tbe/hsigmoid.py +0 -45
- mindspore/ops/_op_impl/tbe/hsigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/ifmr.py +0 -47
- mindspore/ops/_op_impl/tbe/ifmr_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/im2col.py +0 -42
- mindspore/ops/_op_impl/tbe/in_top_k.py +0 -37
- mindspore/ops/_op_impl/tbe/inplace_add.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +0 -46
- mindspore/ops/_op_impl/tbe/inplace_sub.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/inv.py +0 -38
- mindspore/ops/_op_impl/tbe/inv_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/inv_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/inv_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/invert.py +0 -37
- mindspore/ops/_op_impl/tbe/invert_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/iou.py +0 -38
- mindspore/ops/_op_impl/tbe/iou_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/is_close.py +0 -40
- mindspore/ops/_op_impl/tbe/kl_div_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/kl_div_loss_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/kl_div_loss_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/l2_loss.py +0 -36
- mindspore/ops/_op_impl/tbe/l2_loss_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/l2_normalize.py +0 -38
- mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/lamb_apply_optimizer_assign.py +0 -55
- mindspore/ops/_op_impl/tbe/lamb_apply_weight_assign.py +0 -42
- mindspore/ops/_op_impl/tbe/lamb_next_mv.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_mv_with_decay.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_right.py +0 -44
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr.py +0 -48
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr_v2.py +0 -44
- mindspore/ops/_op_impl/tbe/lars_update.py +0 -50
- mindspore/ops/_op_impl/tbe/lars_update_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/layer_norm.py +0 -46
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/layer_norm_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/layer_norm_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +0 -43
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/lerp.py +0 -38
- mindspore/ops/_op_impl/tbe/less.py +0 -41
- mindspore/ops/_op_impl/tbe/less_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/less_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/less_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/log.py +0 -40
- mindspore/ops/_op_impl/tbe/log1p.py +0 -37
- mindspore/ops/_op_impl/tbe/log1p_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/log_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/logical_and.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_and_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logical_not.py +0 -36
- mindspore/ops/_op_impl/tbe/logical_not_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax.py +0 -37
- mindspore/ops/_op_impl/tbe/logsoftmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/lp_norm.py +0 -40
- mindspore/ops/_op_impl/tbe/lp_norm_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/lstm_input_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/masked_fill.py +0 -40
- mindspore/ops/_op_impl/tbe/masked_fill_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/matmul.py +0 -53
- mindspore/ops/_op_impl/tbe/matmul_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/matmul_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/matrix_diag.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_diag_part.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_set_diag.py +0 -46
- mindspore/ops/_op_impl/tbe/max_pool.py +0 -39
- mindspore/ops/_op_impl/tbe/max_pool3d.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool3d_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool3d_grad_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/max_pool_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad_with_argmax.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_with_argmax.py +0 -42
- mindspore/ops/_op_impl/tbe/max_pool_with_argmax.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum.py +0 -39
- mindspore/ops/_op_impl/tbe/maximum_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/maximum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mem_set.py +0 -38
- mindspore/ops/_op_impl/tbe/minimum.py +0 -40
- mindspore/ops/_op_impl/tbe/minimum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/minimum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/minimum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mish.py +0 -37
- mindspore/ops/_op_impl/tbe/mod.py +0 -41
- mindspore/ops/_op_impl/tbe/mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/mul.py +0 -37
- mindspore/ops/_op_impl/tbe/mul_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/mul_no_nan.py +0 -39
- mindspore/ops/_op_impl/tbe/mul_no_nan_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/multilabel_margin_loss.py +0 -39
- mindspore/ops/_op_impl/tbe/neg.py +0 -39
- mindspore/ops/_op_impl/tbe/neg_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/new_im2col.py +0 -40
- mindspore/ops/_op_impl/tbe/nll_loss.py +0 -41
- mindspore/ops/_op_impl/tbe/nll_loss_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/nms_with_mask.py +0 -39
- mindspore/ops/_op_impl/tbe/not_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/not_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/npu_alloc_float_status.py +0 -34
- mindspore/ops/_op_impl/tbe/npu_clear_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/one_hot.py +0 -48
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/ones_like.py +0 -40
- mindspore/ops/_op_impl/tbe/ones_like_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling.py +0 -40
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/pack.py +0 -58
- mindspore/ops/_op_impl/tbe/pack_ds.py +0 -59
- mindspore/ops/_op_impl/tbe/pad_d.py +0 -40
- mindspore/ops/_op_impl/tbe/pad_d_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/parallel_concat.py +0 -70
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear.py +0 -45
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/pdist.py +0 -36
- mindspore/ops/_op_impl/tbe/pooling.py +0 -46
- mindspore/ops/_op_impl/tbe/population_count.py +0 -38
- mindspore/ops/_op_impl/tbe/pow.py +0 -41
- mindspore/ops/_op_impl/tbe/pow_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/prelu.py +0 -37
- mindspore/ops/_op_impl/tbe/prelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/prelu_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/range.py +0 -39
- mindspore/ops/_op_impl/tbe/real_div.py +0 -38
- mindspore/ops/_op_impl/tbe/real_div_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reciprocal.py +0 -36
- mindspore/ops/_op_impl/tbe/reciprocal_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/reciprocal_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/reciprocal_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_all.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_all_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_any.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_max.py +0 -43
- mindspore/ops/_op_impl/tbe/reduce_max_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_mean.py +0 -40
- mindspore/ops/_op_impl/tbe/reduce_mean_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_min.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_min_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_prod.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_prod_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_std.py +0 -44
- mindspore/ops/_op_impl/tbe/reduce_sum.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6.py +0 -38
- mindspore/ops/_op_impl/tbe/relu6_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/relu6_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/relu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/relu_grad_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/renorm.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_bilinear.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_bilinear_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/resize_bilinear_v2.py +0 -43
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reverse_v2_d.py +0 -37
- mindspore/ops/_op_impl/tbe/rint.py +0 -37
- mindspore/ops/_op_impl/tbe/rint_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/roi_align.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roi_align_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roll.py +0 -42
- mindspore/ops/_op_impl/tbe/round.py +0 -38
- mindspore/ops/_op_impl/tbe/round_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/rsqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/rsqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/rsqrt_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/rsqrt_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_add.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_div.py +0 -46
- mindspore/ops/_op_impl/tbe/scatter_max.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_min.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_mul.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_nd.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/scatter_nd_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_nd_update.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_update_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add.py +0 -39
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/scatter_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_update.py +0 -43
- mindspore/ops/_op_impl/tbe/select.py +0 -38
- mindspore/ops/_op_impl/tbe/select_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/selu.py +0 -39
- mindspore/ops/_op_impl/tbe/selu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sgd.py +0 -62
- mindspore/ops/_op_impl/tbe/sigmoid.py +0 -37
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/sigmoid_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/sigmoid_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sign.py +0 -38
- mindspore/ops/_op_impl/tbe/sign_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/sin.py +0 -37
- mindspore/ops/_op_impl/tbe/sin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sinh.py +0 -37
- mindspore/ops/_op_impl/tbe/sinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/slice.py +0 -58
- mindspore/ops/_op_impl/tbe/smooth_l1_loss.py +0 -45
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/soft_margin_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/soft_margin_loss_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/soft_shrink.py +0 -36
- mindspore/ops/_op_impl/tbe/soft_shrink_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax.py +0 -37
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/softmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +0 -42
- mindspore/ops/_op_impl/tbe/softmax_v2_with_dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/softplus.py +0 -37
- mindspore/ops/_op_impl/tbe/softplus_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softsign.py +0 -37
- mindspore/ops/_op_impl/tbe/softsign_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sort.py +0 -38
- mindspore/ops/_op_impl/tbe/sort_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/space_to_batch.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_depth.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_adadelta.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +0 -45
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d.py +0 -53
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +0 -66
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop.py +0 -57
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/sparse_gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_gather_v2_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/split_d.py +0 -38
- mindspore/ops/_op_impl/tbe/split_d_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/split_v.py +0 -39
- mindspore/ops/_op_impl/tbe/splitv.py +0 -39
- mindspore/ops/_op_impl/tbe/sqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/sqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sqrt_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/sqrt_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/square.py +0 -38
- mindspore/ops/_op_impl/tbe/square_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_all.py +0 -40
- mindspore/ops/_op_impl/tbe/square_sum_all_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/square_sum_v1.py +0 -38
- mindspore/ops/_op_impl/tbe/square_sum_v1_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_v2.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/squeeze.py +0 -37
- mindspore/ops/_op_impl/tbe/strided_read.py +0 -38
- mindspore/ops/_op_impl/tbe/strided_slice_d.py +0 -44
- mindspore/ops/_op_impl/tbe/strided_slice_ds.py +0 -71
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +0 -51
- mindspore/ops/_op_impl/tbe/strided_slice_grad_ds.py +0 -57
- mindspore/ops/_op_impl/tbe/strided_write.py +0 -38
- mindspore/ops/_op_impl/tbe/sub.py +0 -39
- mindspore/ops/_op_impl/tbe/sub_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tan.py +0 -38
- mindspore/ops/_op_impl/tbe/tan_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh.py +0 -37
- mindspore/ops/_op_impl/tbe/tanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/tanh_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tensor_move.py +0 -49
- mindspore/ops/_op_impl/tbe/tensor_move_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/tensor_scatter_update.py +0 -41
- mindspore/ops/_op_impl/tbe/tile.py +0 -37
- mindspore/ops/_op_impl/tbe/tile_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/trans_data.py +0 -167
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +0 -180
- mindspore/ops/_op_impl/tbe/trans_data_rnn.py +0 -44
- mindspore/ops/_op_impl/tbe/transpose.py +0 -60
- mindspore/ops/_op_impl/tbe/transpose_d.py +0 -47
- mindspore/ops/_op_impl/tbe/transpose_nod.py +0 -60
- mindspore/ops/_op_impl/tbe/trunc.py +0 -39
- mindspore/ops/_op_impl/tbe/truncate_div.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/truncate_mod.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/unpack.py +0 -38
- mindspore/ops/_op_impl/tbe/unpack_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/unsorted_segment_max.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_max_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_min.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_min_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/wts_arq.py +0 -40
- mindspore/ops/_op_impl/tbe/xdivy.py +0 -38
- mindspore/ops/_op_impl/tbe/xdivy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/xlogy.py +0 -38
- mindspore/ops/_op_impl/tbe/xlogy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/zeros_like.py +0 -41
- mindspore/ops/_op_impl/tbe/zeros_like_ds.py +0 -42
- mindspore/ops/_tracefunc.py +0 -241
- mindspore/ops/arg_dtype_cast.py +0 -54
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/rewrite/api/tree_node_helper.py +0 -60
- mindspore/rewrite/ast_helpers/ast_creator.py +0 -115
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +0 -267
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +0 -228
- mindspore/rewrite/namespace.py +0 -53
- mindspore-2.2.14.dist-info/RECORD +0 -1924
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
mindspore/context.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2020-
|
|
1
|
+
# Copyright 2020-2024 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -34,6 +34,8 @@ from mindspore.parallel._auto_parallel_context import _set_auto_parallel_context
|
|
|
34
34
|
from mindspore.parallel._ps_context import _set_ps_context, _get_ps_context, _reset_ps_context, \
|
|
35
35
|
_need_reset_device_target_for_ps
|
|
36
36
|
from mindspore.parallel._offload_context import _set_offload_context, _get_offload_context
|
|
37
|
+
from mindspore.hal.device import is_initialized
|
|
38
|
+
from mindspore.common import api
|
|
37
39
|
|
|
38
40
|
__all__ = ['GRAPH_MODE', 'PYNATIVE_MODE', 'STRICT', 'COMPATIBLE', 'LAX', 'set_context', 'get_context',
|
|
39
41
|
'set_auto_parallel_context', 'get_auto_parallel_context', 'reset_auto_parallel_context', 'ParallelMode',
|
|
@@ -50,6 +52,10 @@ STRICT = 0
|
|
|
50
52
|
COMPATIBLE = 1
|
|
51
53
|
LAX = 2
|
|
52
54
|
|
|
55
|
+
# Enumerate for the property 'debug_level'.
|
|
56
|
+
RELEASE = 0
|
|
57
|
+
DEBUG = 1
|
|
58
|
+
|
|
53
59
|
|
|
54
60
|
def _make_directory(path):
|
|
55
61
|
"""Make directory."""
|
|
@@ -63,7 +69,7 @@ def _make_directory(path):
|
|
|
63
69
|
if not os.path.exists(path):
|
|
64
70
|
logger.debug("The directory(%s) doesn't exist, will create it", path)
|
|
65
71
|
try:
|
|
66
|
-
os.makedirs(path)
|
|
72
|
+
os.makedirs(path, mode=0o700)
|
|
67
73
|
except FileExistsError:
|
|
68
74
|
logger.debug("The directory(%s) already exist.", path)
|
|
69
75
|
except PermissionError as e:
|
|
@@ -161,6 +167,12 @@ class _Context:
|
|
|
161
167
|
self._context_switches = _ContextSwitchInfo(False)
|
|
162
168
|
self._context_handle = MSContext.get_instance()
|
|
163
169
|
self._support_binary = False
|
|
170
|
+
self.enable_compile_cache = None
|
|
171
|
+
self._mode = PYNATIVE_MODE
|
|
172
|
+
self.aoe_config = {}
|
|
173
|
+
self.jit_config = {}
|
|
174
|
+
self.ascend_config = {}
|
|
175
|
+
self.gpu_config = {}
|
|
164
176
|
|
|
165
177
|
def __getattribute__(self, attr):
|
|
166
178
|
value = object.__getattribute__(self, attr)
|
|
@@ -176,7 +188,11 @@ class _Context:
|
|
|
176
188
|
|
|
177
189
|
def get_mode(self):
|
|
178
190
|
"""Get current mode."""
|
|
179
|
-
return self.
|
|
191
|
+
return self._mode
|
|
192
|
+
|
|
193
|
+
def get_jit_config(self):
|
|
194
|
+
"""Get current jit_config."""
|
|
195
|
+
return self.jit_config
|
|
180
196
|
|
|
181
197
|
def set_mode(self, mode):
|
|
182
198
|
"""
|
|
@@ -204,6 +220,7 @@ class _Context:
|
|
|
204
220
|
raise ValueError(f"For 'context.set_context', the argument 'mode' should be context.GRAPH_MODE (0) "
|
|
205
221
|
f"or context.PYNATIVE_MODE (1), but got {mode}.")
|
|
206
222
|
self.set_param(ms_ctx_param.mode, mode)
|
|
223
|
+
self._mode = mode
|
|
207
224
|
|
|
208
225
|
def set_jit_syntax_level(self, level):
|
|
209
226
|
""""Set the JIT syntax level for graph compiling"""
|
|
@@ -212,6 +229,13 @@ class _Context:
|
|
|
212
229
|
f"or context.LAX, but got {level}.")
|
|
213
230
|
self.set_param(ms_ctx_param.jit_syntax_level, level)
|
|
214
231
|
|
|
232
|
+
def set_debug_level(self, level):
|
|
233
|
+
""""Set the debug level for graph compiling"""
|
|
234
|
+
if level != RELEASE and level != DEBUG:
|
|
235
|
+
raise ValueError(f"For 'context.set_debug_level', the argument 'level' should be context.RELEASE "
|
|
236
|
+
f"or context.DEBUG, but got {level}.")
|
|
237
|
+
self.set_param(ms_ctx_param.debug_level, level)
|
|
238
|
+
|
|
215
239
|
def set_memory_optimize_level(self, memory_optimize_level):
|
|
216
240
|
"""
|
|
217
241
|
The memory optimize level, support "O0", "O1".
|
|
@@ -228,6 +252,16 @@ class _Context:
|
|
|
228
252
|
else:
|
|
229
253
|
self.set_param(ms_ctx_param.memory_optimize_level, 1)
|
|
230
254
|
|
|
255
|
+
def set_exec_order(self, exec_order):
|
|
256
|
+
"""
|
|
257
|
+
The execution order mode, support "bfs", "dfs", "gpto".
|
|
258
|
+
"""
|
|
259
|
+
exec_order_modes = ["bfs", "dfs", "gpto"]
|
|
260
|
+
if exec_order not in exec_order_modes:
|
|
261
|
+
raise ValueError(f"For 'context.set_context', the argument 'exec_order' must be one of "
|
|
262
|
+
f"{exec_order_modes}, but got {exec_order}.")
|
|
263
|
+
self.set_param(ms_ctx_param.exec_order, exec_order)
|
|
264
|
+
|
|
231
265
|
def set_memory_offload(self, memory_offload):
|
|
232
266
|
"""
|
|
233
267
|
Enable memory offload or not, support "ON", "OFF".
|
|
@@ -257,6 +291,29 @@ class _Context:
|
|
|
257
291
|
f"{deterministic_options}, but got {deterministic}.")
|
|
258
292
|
self.set_param(ms_ctx_param.deterministic, deterministic)
|
|
259
293
|
|
|
294
|
+
hccl_deterministic = os.getenv("HCCL_DETERMINISTIC")
|
|
295
|
+
te_parallel_compiler = os.getenv("TE_PARALLEL_COMPILER")
|
|
296
|
+
if deterministic == "ON":
|
|
297
|
+
if hccl_deterministic and hccl_deterministic != "true":
|
|
298
|
+
logger.warning(f"Environment 'HCCL_DETERMINISTIC' should be 'true' when set deterministic='ON', but "
|
|
299
|
+
f"got '{hccl_deterministic}'. 'HCCL_DETERMINISTIC' will be set to 'true'.")
|
|
300
|
+
if te_parallel_compiler and te_parallel_compiler != "1":
|
|
301
|
+
logger.warning(f"Environment 'TE_PARALLEL_COMPILER' should be '1' when set deterministic='ON', but "
|
|
302
|
+
f"got '{te_parallel_compiler}'. 'TE_PARALLEL_COMPILER' will be set to '1'.")
|
|
303
|
+
os.environ["HCCL_DETERMINISTIC"] = "true"
|
|
304
|
+
os.environ["TE_PARALLEL_COMPILER"] = "1"
|
|
305
|
+
if deterministic == "OFF":
|
|
306
|
+
if hccl_deterministic and hccl_deterministic != "false":
|
|
307
|
+
logger.warning(f"Environment 'HCCL_DETERMINISTIC' should not be set or be 'false' when set "
|
|
308
|
+
f"deterministic='OFF', but got '{hccl_deterministic}'. 'HCCL_DETERMINISTIC' "
|
|
309
|
+
f"will be unset.")
|
|
310
|
+
del os.environ["HCCL_DETERMINISTIC"]
|
|
311
|
+
if te_parallel_compiler and te_parallel_compiler != "0":
|
|
312
|
+
logger.warning(f"Environment 'TE_PARALLEL_COMPILER' should not be set or be '0' when set "
|
|
313
|
+
f"deterministic='OFF', but got '{te_parallel_compiler}'. 'TE_PARALLEL_COMPILER' "
|
|
314
|
+
f"will be unset.")
|
|
315
|
+
del os.environ["TE_PARALLEL_COMPILER"]
|
|
316
|
+
|
|
260
317
|
def set_ascend_config(self, ascend_config):
|
|
261
318
|
"""
|
|
262
319
|
Enable ascend config.
|
|
@@ -268,11 +325,18 @@ class _Context:
|
|
|
268
325
|
"allow_mix_precision_fp16" and "allow_mix_precision_bf16".
|
|
269
326
|
- jit_compile (bool): ``False`` and ``True``.
|
|
270
327
|
- atomic_clean_policy (int): ``0`` and ``1``. Default: ``1`` .
|
|
328
|
+
- op_precision_mode (str): precision mode config file path.
|
|
329
|
+
- op_debug_option (str): Enable debugging options for Ascend operators,
|
|
330
|
+
default not enabled, only supports ``"oom"`` currently.
|
|
331
|
+
``"oom"``: Detect memory out of bounds.
|
|
332
|
+
- ge_options (dict): Global or session CANN options.
|
|
271
333
|
- exception_dump (str): Enable exception dump for Ascend operators. ``"0"`` , ``"1"`` and ``"2"``.
|
|
272
334
|
Default: ``"2"`` .
|
|
273
|
-
- op_precision_mode (str): config file path.
|
|
274
335
|
- parallel_speed_up_json_path(Union[str, None]): The path to the parallel speed up json file.
|
|
275
336
|
If its value is None or '', it does not take effect. Default None.
|
|
337
|
+
- host_scheduling_max_threshold(int): The host scheduling max threshold.
|
|
338
|
+
- hccl_watchdog (bool): Enable a thread to monitor the failure of collective communication.
|
|
339
|
+
Default: ``True`` .
|
|
276
340
|
"""
|
|
277
341
|
ascend_cfg_modes = {
|
|
278
342
|
'precision_mode': ["force_fp16", "allow_fp32_to_fp16", "allow_mix_precision", "must_keep_origin_dtype",
|
|
@@ -284,8 +348,16 @@ class _Context:
|
|
|
284
348
|
'conv_allow_hf32': [True, False],
|
|
285
349
|
'exception_dump': ["0", "1", "2"],
|
|
286
350
|
'op_precision_mode': (str,),
|
|
351
|
+
'ge_options': (dict,),
|
|
287
352
|
'parallel_speed_up_json_path': (str, None),
|
|
288
|
-
'
|
|
353
|
+
'host_scheduling_max_threshold': (int,),
|
|
354
|
+
'cur_step_num': (int,),
|
|
355
|
+
'save_checkpoint_steps': (int,),
|
|
356
|
+
'need_ckpt': (bool,),
|
|
357
|
+
'last_triggered_step': (int,),
|
|
358
|
+
'hccl_watchdog': (bool,),
|
|
359
|
+
'topo_order': (dict,),
|
|
360
|
+
'op_debug_option': (str, None),
|
|
289
361
|
}
|
|
290
362
|
ascend_cfg_setters = {
|
|
291
363
|
'precision_mode': self._get_ascend_config_setter('precision_mode'),
|
|
@@ -294,8 +366,16 @@ class _Context:
|
|
|
294
366
|
'matmul_allow_hf32': self._get_ascend_config_setter('matmul_allow_hf32', lambda v: "1" if v else "0"),
|
|
295
367
|
'conv_allow_hf32': self._get_ascend_config_setter('conv_allow_hf32', lambda v: "1" if v else "0"),
|
|
296
368
|
'exception_dump': self._get_ascend_config_setter('exception_dump'),
|
|
369
|
+
'op_debug_option': self._set_op_debug_option,
|
|
297
370
|
'op_precision_mode': self._set_op_precision_mode,
|
|
371
|
+
'ge_options': self._set_ge_options,
|
|
298
372
|
'parallel_speed_up_json_path': self._set_speedup_config_path,
|
|
373
|
+
'host_scheduling_max_threshold': self._get_ascend_config_setter('host_scheduling_max_threshold', str),
|
|
374
|
+
'cur_step_num': self._set_cur_step_num,
|
|
375
|
+
'save_checkpoint_steps': self._set_save_checkpoint_steps,
|
|
376
|
+
'need_ckpt': self._set_need_ckpt,
|
|
377
|
+
'last_triggered_step': self._set_last_triggered_step,
|
|
378
|
+
'hccl_watchdog': self._set_hccl_watchdog,
|
|
299
379
|
'topo_order': self._set_topo_order
|
|
300
380
|
}
|
|
301
381
|
ascend_cfg_set = tuple(ascend_cfg_modes.keys())
|
|
@@ -312,6 +392,7 @@ class _Context:
|
|
|
312
392
|
f"{supported_modes}, but got {type(ascend_value)}.")
|
|
313
393
|
cfg_setter = ascend_cfg_setters.get(ascend_key)
|
|
314
394
|
cfg_setter(ascend_value)
|
|
395
|
+
self.ascend_config = ascend_config
|
|
315
396
|
|
|
316
397
|
def set_gpu_config(self, gpu_config):
|
|
317
398
|
"""
|
|
@@ -353,6 +434,33 @@ class _Context:
|
|
|
353
434
|
self.set_param(ms_ctx_param.conv_allow_tf32, gpu_config[gpu_key])
|
|
354
435
|
if gpu_key == 'matmul_allow_tf32':
|
|
355
436
|
self.set_param(ms_ctx_param.matmul_allow_tf32, gpu_config[gpu_key])
|
|
437
|
+
self.gpu_config = gpu_config
|
|
438
|
+
|
|
439
|
+
def set_jit_config(self, jit_config):
|
|
440
|
+
"""
|
|
441
|
+
Enable jit config.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
jit_config (dict):
|
|
445
|
+
|
|
446
|
+
- jit_level (str): "O0", "O1" or "O2" to control the compilation optimization level.
|
|
447
|
+
"""
|
|
448
|
+
jit_cfgs = {'jit_level': ["O0", "O1", "O2"], 'infer_boost': ["on", "off"]}
|
|
449
|
+
key_args_map = {'jit_level': ms_ctx_param.jit_level, 'infer_boost': ms_ctx_param.infer_boost}
|
|
450
|
+
for jit_key in jit_config:
|
|
451
|
+
if jit_key not in jit_cfgs:
|
|
452
|
+
raise ValueError(f"For 'context.set_context', the key of argument 'jit_config' must be one of "
|
|
453
|
+
f"{jit_cfgs}, but got {jit_key}.")
|
|
454
|
+
supported_value = jit_cfgs.get(jit_key)
|
|
455
|
+
if jit_config[jit_key] not in supported_value:
|
|
456
|
+
raise ValueError(f"For 'jit_config', the value of argument {jit_key} must be one of "
|
|
457
|
+
f"{supported_value}, but got {jit_config[jit_key]}.")
|
|
458
|
+
self.set_param(key_args_map[jit_key], jit_config[jit_key])
|
|
459
|
+
self.jit_config = jit_config
|
|
460
|
+
|
|
461
|
+
jit_level = jit_config.get("jit_level", None)
|
|
462
|
+
if jit_config.get("infer_boost", None) == "on" and (jit_level == "O1" or jit_level == "O2"):
|
|
463
|
+
raise ValueError(f"Only jit_level set O0 can set infer_boost to on.")
|
|
356
464
|
|
|
357
465
|
def set_backend_policy(self, policy):
|
|
358
466
|
success = self._context_handle.set_backend_policy(policy)
|
|
@@ -424,6 +532,7 @@ class _Context:
|
|
|
424
532
|
f"{supported_value}, but got {aoe_config[aoe_config_key]}.")
|
|
425
533
|
if aoe_config_key == 'job_type':
|
|
426
534
|
self.set_param(ms_ctx_param.aoe_job_type, aoe_config[aoe_config_key])
|
|
535
|
+
self.aoe_config = aoe_config
|
|
427
536
|
|
|
428
537
|
def set_device_id(self, device_id):
|
|
429
538
|
if device_id < 0 or device_id > 4095:
|
|
@@ -475,9 +584,13 @@ class _Context:
|
|
|
475
584
|
|
|
476
585
|
def set_mempool_block_size(self, mempool_block_size):
|
|
477
586
|
"""Set the block size of memory pool."""
|
|
478
|
-
|
|
587
|
+
global_jit_config = get_jit_config()
|
|
588
|
+
is_force_kbk = False
|
|
589
|
+
if global_jit_config:
|
|
590
|
+
is_force_kbk = global_jit_config.get('jit_level') == "O0" or global_jit_config.get('jit_level') == "O1"
|
|
591
|
+
if _get_mode() == GRAPH_MODE and not is_force_kbk:
|
|
479
592
|
logger.warning("Graph mode doesn't support to set parameter 'mempool_block_size' of context currently, "
|
|
480
|
-
"you can use context.set_context to set pynative mode.")
|
|
593
|
+
"you can use context.set_context to set pynative mode or set jit_level=O0/O1.")
|
|
481
594
|
return
|
|
482
595
|
if not Validator.check_str_by_regular(mempool_block_size, _RE_PATTERN):
|
|
483
596
|
raise ValueError("For 'context.set_context', the argument 'mempool_block_size' should be in "
|
|
@@ -558,13 +671,16 @@ class _Context:
|
|
|
558
671
|
'inter_op_parallel_num': set_inter_op_parallel_num,
|
|
559
672
|
'runtime_num_threads': set_runtime_num_threads,
|
|
560
673
|
'memory_optimize_level': set_memory_optimize_level,
|
|
674
|
+
'exec_order': set_exec_order,
|
|
561
675
|
'op_timeout': set_op_timeout,
|
|
562
676
|
'memory_offload': set_memory_offload,
|
|
563
677
|
'deterministic': set_deterministic,
|
|
564
678
|
'ascend_config': set_ascend_config,
|
|
565
679
|
'jit_syntax_level': set_jit_syntax_level,
|
|
680
|
+
'debug_level': set_debug_level,
|
|
566
681
|
'gpu_config': set_gpu_config,
|
|
567
682
|
'aoe_config': set_aoe_config,
|
|
683
|
+
'jit_config': set_jit_config,
|
|
568
684
|
}
|
|
569
685
|
|
|
570
686
|
@property
|
|
@@ -612,6 +728,16 @@ class _Context:
|
|
|
612
728
|
trans_fn = lambda x: x
|
|
613
729
|
return _config_setter
|
|
614
730
|
|
|
731
|
+
def _set_op_debug_option(self, option_value):
|
|
732
|
+
valid_order = {'oom'}
|
|
733
|
+
if not isinstance(option_value, str):
|
|
734
|
+
raise TypeError(f"For 'ascend_config', the type of 'op_debug_option' must be str, "
|
|
735
|
+
f"but got {type(option_value)}.")
|
|
736
|
+
if option_value not in valid_order:
|
|
737
|
+
raise ValueError(f"For 'ascend_config', the 'op_debug_option' supports being set to 'oom' currently, "
|
|
738
|
+
f"but got {option_value}.")
|
|
739
|
+
self.set_param(ms_ctx_param.op_debug_option, option_value)
|
|
740
|
+
|
|
615
741
|
def _set_op_precision_mode(self, ascend_value):
|
|
616
742
|
op_precision_path = ascend_value
|
|
617
743
|
real_path = os.path.realpath(op_precision_path)
|
|
@@ -620,6 +746,28 @@ class _Context:
|
|
|
620
746
|
f"got '{op_precision_path}'.")
|
|
621
747
|
self.set_param(ms_ctx_param.op_precision_mode, ascend_value)
|
|
622
748
|
|
|
749
|
+
def _set_ge_options(self, ge_options):
|
|
750
|
+
"""Set ge options."""
|
|
751
|
+
for level, options in ge_options.items():
|
|
752
|
+
if level not in ['global', 'session']:
|
|
753
|
+
raise ValueError(f"For 'ascend_config', the key of ge_options must be one of "
|
|
754
|
+
f"('global', 'session'), but got {level}.")
|
|
755
|
+
|
|
756
|
+
if not isinstance(options, dict):
|
|
757
|
+
raise TypeError(f"For 'ge_options', the type of {level} options must be dict, "
|
|
758
|
+
f"but got {type(options)}. The error options: {options}.")
|
|
759
|
+
|
|
760
|
+
for key, value in options.items():
|
|
761
|
+
if not isinstance(key, str):
|
|
762
|
+
raise TypeError(f"For 'ge_options', the type of key and value must be str, "
|
|
763
|
+
f"but got {type(key)}. The error key is {key}.")
|
|
764
|
+
if not isinstance(value, str):
|
|
765
|
+
raise TypeError(f"For 'ge_options', the type of key and value must be str, "
|
|
766
|
+
f"but got {type(value)}. The error value is {value}")
|
|
767
|
+
|
|
768
|
+
options_str = json.dumps(ge_options)
|
|
769
|
+
self.set_param(ms_ctx_param.ge_options, options_str)
|
|
770
|
+
|
|
623
771
|
def _set_topo_order(self, topo_order):
|
|
624
772
|
"""
|
|
625
773
|
Set topo order.
|
|
@@ -642,33 +790,81 @@ class _Context:
|
|
|
642
790
|
options_str = json.dumps(topo_order)
|
|
643
791
|
self.set_param(ms_ctx_param.topo_order, options_str)
|
|
644
792
|
|
|
793
|
+
def _set_hccl_watchdog(self, flag):
|
|
794
|
+
"""set hccl watchdog"""
|
|
795
|
+
if not isinstance(flag, bool):
|
|
796
|
+
raise TypeError(f"For 'ascend_config', the type of 'hccl_watchdog' must be bool, but got {type(flag)}.")
|
|
797
|
+
self.set_param(ms_ctx_param.hccl_watchdog, flag)
|
|
798
|
+
|
|
799
|
+
def _set_need_ckpt(self, need_ckpt):
|
|
800
|
+
"""Set need ckpt flag"""
|
|
801
|
+
if not isinstance(need_ckpt, bool):
|
|
802
|
+
raise TypeError(f"For step num, the value type should be int, but got {type(need_ckpt)}, {need_ckpt}")
|
|
803
|
+
self.set_param(ms_ctx_param.need_ckpt, need_ckpt)
|
|
804
|
+
|
|
805
|
+
def _set_cur_step_num(self, step_num):
|
|
806
|
+
"""set current step num at every step begin"""
|
|
807
|
+
if not isinstance(step_num, int):
|
|
808
|
+
raise TypeError(f"For step num, the value type should be int, but got {type(step_num)}, {step_num}")
|
|
809
|
+
self.set_param(ms_ctx_param.cur_step_num, step_num)
|
|
810
|
+
|
|
811
|
+
def _set_save_checkpoint_steps(self, steps):
|
|
812
|
+
"""set save checkpoint steps before run"""
|
|
813
|
+
if not isinstance(steps, int):
|
|
814
|
+
raise TypeError(f"For step num, the value type should be int, but got {type(steps)}, {steps}")
|
|
815
|
+
self.set_param(ms_ctx_param.save_checkpoint_steps, steps)
|
|
816
|
+
|
|
817
|
+
def _set_last_triggered_step(self, step):
|
|
818
|
+
"""set last triggered save ckpt steps before run"""
|
|
819
|
+
if not isinstance(step, int):
|
|
820
|
+
raise TypeError(f"For step num, the value type should be int, but got {type(step)}, {step}")
|
|
821
|
+
self.set_param(ms_ctx_param.last_triggered_step, step)
|
|
822
|
+
|
|
645
823
|
def _set_speedup_config_path(self, speedup_config_path):
|
|
646
824
|
""""Check and set speedup config for auto parallel."""
|
|
647
825
|
if speedup_config_path is None or speedup_config_path == "":
|
|
648
826
|
return
|
|
649
|
-
speedup_config_real_path = os.path.
|
|
827
|
+
speedup_config_real_path = os.path.realpath(speedup_config_path)
|
|
650
828
|
if not os.path.exists(speedup_config_real_path):
|
|
651
829
|
raise ValueError(f"For 'ascend_config', the path to parallel_speed_up_json: "
|
|
652
830
|
f"{speedup_config_real_path} does not exist, please check whether the "
|
|
653
831
|
f"'parallel_speed_up_json_path' is correct.")
|
|
654
832
|
try:
|
|
655
|
-
valid_option = {"recompute_comm_overlap": ms_ctx_param.recompute_comm_overlap,
|
|
656
|
-
"matmul_grad_comm_overlap": ms_ctx_param.matmul_grad_comm_overlap,
|
|
657
|
-
"enable_task_opt": ms_ctx_param.enable_task_opt,
|
|
658
|
-
"enable_grad_comm_opt": ms_ctx_param.enable_grad_comm_opt,
|
|
659
|
-
"
|
|
660
|
-
|
|
661
|
-
"
|
|
833
|
+
valid_option = {"recompute_comm_overlap": (ms_ctx_param.recompute_comm_overlap, bool),
|
|
834
|
+
"matmul_grad_comm_overlap": (ms_ctx_param.matmul_grad_comm_overlap, bool),
|
|
835
|
+
"enable_task_opt": (ms_ctx_param.enable_task_opt, bool),
|
|
836
|
+
"enable_grad_comm_opt": (ms_ctx_param.enable_grad_comm_opt, bool),
|
|
837
|
+
"recompute_allgather_overlap_fagrad":
|
|
838
|
+
(ms_ctx_param.recompute_allgather_overlap_fagrad, bool),
|
|
839
|
+
"interleaved_matmul_comm": (ms_ctx_param.interleaved_matmul_comm, bool),
|
|
840
|
+
"bias_add_comm_swap": (ms_ctx_param.bias_add_comm_swap, bool),
|
|
841
|
+
"enable_opt_shard_comm_opt": (ms_ctx_param.enable_opt_shard_comm_opt, bool),
|
|
842
|
+
"enable_begin_end_inline_opt": (ms_ctx_param.enable_begin_end_inline_opt, bool),
|
|
843
|
+
"enable_concat_eliminate_opt": (ms_ctx_param.enable_concat_eliminate_opt, bool),
|
|
844
|
+
"interleaved_layernorm_comm": (ms_ctx_param.interleaved_layernorm_comm, bool),
|
|
845
|
+
"enable_allreduce_slice_to_reducescatter":
|
|
846
|
+
(ms_ctx_param.enable_allreduce_slice_to_reducescatter, bool),
|
|
847
|
+
"enable_interleave_split_concat_branch":
|
|
848
|
+
(ms_ctx_param.enable_interleave_split_concat_branch, bool),
|
|
849
|
+
"enable_offloading_packed_experts": (ms_ctx_param.enable_offloading_packed_experts, bool),
|
|
850
|
+
"compute_communicate_fusion_level":
|
|
851
|
+
(ms_ctx_param.compute_communicate_fusion_level, int),
|
|
852
|
+
"enable_flash_attention_load_balance":
|
|
853
|
+
(ms_ctx_param.enable_flash_attention_load_balance, bool),
|
|
854
|
+
"dataset_broadcast_opt_level":
|
|
855
|
+
(ms_ctx_param.dataset_broadcast_opt_level, int)}
|
|
662
856
|
with open(speedup_config_real_path, 'r') as f:
|
|
663
857
|
speedup_config = json.load(f)
|
|
664
|
-
for
|
|
665
|
-
if not isinstance(
|
|
666
|
-
raise TypeError("key {} is not a str".format(
|
|
667
|
-
if
|
|
668
|
-
raise ValueError("key {} should be one of {}.".format(
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
858
|
+
for key, value in speedup_config.items():
|
|
859
|
+
if not isinstance(key, str):
|
|
860
|
+
raise TypeError("key {} is not a str".format(key))
|
|
861
|
+
if key not in valid_option:
|
|
862
|
+
raise ValueError("key {} should be one of {}.".format(key, valid_option.keys()))
|
|
863
|
+
set_func, valid_type = valid_option.get(key)
|
|
864
|
+
if not isinstance(value, valid_type):
|
|
865
|
+
raise TypeError(f"The value type of {key} must be {valid_type}, "
|
|
866
|
+
f"but got value is {value} and type is {type(value)}.")
|
|
867
|
+
self.set_param(set_func, value)
|
|
672
868
|
except (TypeError, ValueError) as exo:
|
|
673
869
|
raise ValueError(str(exo) + "\nFor 'context.set_context', "
|
|
674
870
|
"open or load the 'speedup_config_path' file {} "
|
|
@@ -705,8 +901,9 @@ def _context():
|
|
|
705
901
|
auto_parallel_search_mode=str, search_mode=str, parameter_broadcast=bool, strategy_ckpt_load_file=str,
|
|
706
902
|
strategy_ckpt_save_file=str, full_batch=bool, enable_parallel_optimizer=bool, enable_alltoall=bool,
|
|
707
903
|
all_reduce_fusion_config=list, pipeline_stages=int, pipeline_segments=int,
|
|
708
|
-
|
|
709
|
-
|
|
904
|
+
pipeline_result_broadcast=bool, parallel_optimizer_config=dict,
|
|
905
|
+
pipeline_config=dict,
|
|
906
|
+
comm_fusion=dict, strategy_ckpt_config=dict, force_fp32_communication=bool)
|
|
710
907
|
def set_auto_parallel_context(**kwargs):
|
|
711
908
|
r"""
|
|
712
909
|
Set auto parallel context, only data parallel supported on CPU.
|
|
@@ -733,8 +930,12 @@ def set_auto_parallel_context(**kwargs):
|
|
|
733
930
|
parallel_optimizer_config dataset_strategy
|
|
734
931
|
enable_alltoall pipeline_stages
|
|
735
932
|
pipeline_config auto_parallel_search_mode
|
|
933
|
+
force_fp32_communication pipeline_result_broadcast
|
|
736
934
|
\ comm_fusion
|
|
737
935
|
\ strategy_ckpt_config
|
|
936
|
+
\ group_ckpt_save_file
|
|
937
|
+
\ auto_pipeline
|
|
938
|
+
\ dump_local_norm
|
|
738
939
|
=========================== ===========================
|
|
739
940
|
|
|
740
941
|
Args:
|
|
@@ -744,6 +945,8 @@ def set_auto_parallel_context(**kwargs):
|
|
|
744
945
|
"stand_alone" do not support gradients_mean. Default: ``False`` .
|
|
745
946
|
gradient_fp32_sync (bool): Run allreduce of gradients in fp32. "stand_alone", "data_parallel"
|
|
746
947
|
and "hybrid_parallel" do not support gradient_fp32_sync. Default: ``True`` .
|
|
948
|
+
loss_repeated_mean (bool) - Indicates whether the mean operator is executed backwards when the
|
|
949
|
+
calculation is repeated. Default: ``True`` .
|
|
747
950
|
parallel_mode (str): There are five kinds of parallel modes, ``"stand_alone"`` , ``"data_parallel"`` ,
|
|
748
951
|
``"hybrid_parallel"`` , ``"semi_auto_parallel"`` and ``"auto_parallel"`` . Note the pynative mode
|
|
749
952
|
only supports the ``"stand_alone"`` and ``"data_parallel"`` mode. Default: ``"stand_alone"`` .
|
|
@@ -758,15 +961,16 @@ def set_auto_parallel_context(**kwargs):
|
|
|
758
961
|
|
|
759
962
|
- auto_parallel: Achieving parallelism automatically.
|
|
760
963
|
search_mode (str): There are three kinds of shard strategy search modes: ``"recursive_programming"`` ,
|
|
761
|
-
``"
|
|
964
|
+
``"sharding_propagation"`` and ``"dynamic_programming"`` (Not recommended).
|
|
965
|
+
Default: ``"recursive_programming"`` .
|
|
762
966
|
|
|
763
967
|
- recursive_programming: Recursive programming search mode. In order to obtain optimal performance,
|
|
764
968
|
it is recommended that users set the batch size to be greater than or equal to the product of
|
|
765
969
|
the number of devices and the number of multi-copy parallelism.
|
|
766
970
|
|
|
767
|
-
- dynamic_programming: Dynamic programming search mode.
|
|
768
|
-
|
|
769
971
|
- sharding_propagation: Propagate shardings from configured ops to non-configured ops.
|
|
972
|
+
|
|
973
|
+
- dynamic_programming: Dynamic programming search mode.
|
|
770
974
|
auto_parallel_search_mode (str): This is the old version of 'search_mode'. Here, remaining this attribute is
|
|
771
975
|
for forward compatibility, and this attribute will be deleted in a future MindSpore version.
|
|
772
976
|
parameter_broadcast (bool): Whether to broadcast parameters before training. Before training, in order to have
|
|
@@ -792,6 +996,9 @@ def set_auto_parallel_context(**kwargs):
|
|
|
792
996
|
data parallel training in the benefit of time and memory saving. Currently, auto and semi auto
|
|
793
997
|
parallel mode support all optimizers in both Ascend and GPU. Data parallel mode only supports
|
|
794
998
|
`Lamb` and `AdamWeightDecay` in Ascend . Default: ``False`` .
|
|
999
|
+
force_fp32_communication (bool): A switch that determines whether reduce operators (AllReduce, ReduceScatter)
|
|
1000
|
+
are forced to use the fp32 data type for communication during communication. True is the enable
|
|
1001
|
+
switch. Default: ``False`` .
|
|
795
1002
|
enable_alltoall (bool): A switch that allows AllToAll operators to be generated during communication. If its
|
|
796
1003
|
value is ``False`` , there will be a combination of operators such as AllGather, Split and
|
|
797
1004
|
Concat instead of AllToAll. Default: ``False`` .
|
|
@@ -801,6 +1008,8 @@ def set_auto_parallel_context(**kwargs):
|
|
|
801
1008
|
distributed alone in the pipeline. The total devices will be divided into 'pipeline_stags'
|
|
802
1009
|
stages.
|
|
803
1010
|
Default: ``1`` .
|
|
1011
|
+
pipeline_result_broadcast (bool): A switch that broadcast the last stage result to all other stage in pipeline
|
|
1012
|
+
parallel inference. Default: ``False`` .
|
|
804
1013
|
pipeline_config (dict): A dict contains the keys and values for setting the pipeline parallelism configuration.
|
|
805
1014
|
It supports the following keys:
|
|
806
1015
|
|
|
@@ -866,14 +1075,21 @@ def set_auto_parallel_context(**kwargs):
|
|
|
866
1075
|
- load_file (str): The path to load parallel strategy checkpoint. If the file name extension is
|
|
867
1076
|
`.json`, the file is loaded in JSON format. Otherwise, the file is loaded in ProtoBuf
|
|
868
1077
|
format.
|
|
869
|
-
Default: ''
|
|
1078
|
+
Default: ``''``
|
|
870
1079
|
|
|
871
1080
|
- save_file (str): The path to save parallel strategy checkpoint. If the file name extension is
|
|
872
1081
|
`.json`, the file is saved in JSON format. Otherwise, the file is saved in ProtoBuf format.
|
|
873
|
-
Default: ''
|
|
1082
|
+
Default: ``''``
|
|
874
1083
|
|
|
875
1084
|
- only_trainable_params (bool): Only save/load the strategy information for trainable parameter.
|
|
876
1085
|
Default: ``True`` .
|
|
1086
|
+
group_ckpt_save_file (str): The path to save parallel group checkpoint.
|
|
1087
|
+
auto_pipeline (bool): Set the pipeline stage number to automatic. Its value will be selected between 1 and the
|
|
1088
|
+
parameter `pipeline_stages`. This option requires the `parallel_mode` to be ``auto_parallel``
|
|
1089
|
+
and the `search_mode` to be ``recursive_programming``. Default: ``False`` .
|
|
1090
|
+
dump_local_norm (bool): Whether to dump local_norm value, when the `parallel_mode` is set to
|
|
1091
|
+
``semi_auto_parallel`` or ``auto_parallel``.
|
|
1092
|
+
Default: ``False`` .
|
|
877
1093
|
|
|
878
1094
|
Raises:
|
|
879
1095
|
ValueError: If input key is not attribute in auto parallel context.
|
|
@@ -885,8 +1101,8 @@ def set_auto_parallel_context(**kwargs):
|
|
|
885
1101
|
>>> ms.set_auto_parallel_context(gradients_mean=True)
|
|
886
1102
|
>>> ms.set_auto_parallel_context(gradient_fp32_sync=False)
|
|
887
1103
|
>>> ms.set_auto_parallel_context(parallel_mode="auto_parallel")
|
|
888
|
-
>>> ms.set_auto_parallel_context(search_mode="
|
|
889
|
-
>>> ms.set_auto_parallel_context(auto_parallel_search_mode="
|
|
1104
|
+
>>> ms.set_auto_parallel_context(search_mode="recursive_programming")
|
|
1105
|
+
>>> ms.set_auto_parallel_context(auto_parallel_search_mode="recursive_programming")
|
|
890
1106
|
>>> ms.set_auto_parallel_context(parameter_broadcast=False)
|
|
891
1107
|
>>> ms.set_auto_parallel_context(strategy_ckpt_load_file="./strategy_stage1.ckpt")
|
|
892
1108
|
>>> ms.set_auto_parallel_context(strategy_ckpt_save_file="./strategy_stage1.ckpt")
|
|
@@ -895,6 +1111,7 @@ def set_auto_parallel_context(**kwargs):
|
|
|
895
1111
|
>>> ms.set_auto_parallel_context(enable_alltoall=False)
|
|
896
1112
|
>>> ms.set_auto_parallel_context(all_reduce_fusion_config=[8, 160])
|
|
897
1113
|
>>> ms.set_auto_parallel_context(pipeline_stages=2)
|
|
1114
|
+
>>> ms.set_auto_parallel_context(pipeline_stages=2, pipeline_result_broadcast=True)
|
|
898
1115
|
>>> parallel_config = {"gradient_accumulation_shard": True, "parallel_optimizer_threshold": 24,
|
|
899
1116
|
... "optimizer_weight_shard_size": 2}
|
|
900
1117
|
>>> ms.set_auto_parallel_context(parallel_optimizer_config=parallel_config, enable_parallel_optimizer=True)
|
|
@@ -943,15 +1160,20 @@ def reset_auto_parallel_context():
|
|
|
943
1160
|
- strategy_ckpt_save_file: ''.
|
|
944
1161
|
- full_batch: False.
|
|
945
1162
|
- enable_parallel_optimizer: False.
|
|
1163
|
+
- force_fp32_communication: False.
|
|
946
1164
|
- enable_alltoall: False.
|
|
947
1165
|
- pipeline_stages: 1.
|
|
1166
|
+
- pipeline_result_broadcast: False.
|
|
948
1167
|
- fusion_threshold: 64.
|
|
1168
|
+
- dump_local_norm: False.
|
|
1169
|
+
- auto_pipeline: False.
|
|
949
1170
|
|
|
950
1171
|
Examples:
|
|
951
1172
|
>>> import mindspore as ms
|
|
952
1173
|
>>> ms.reset_auto_parallel_context()
|
|
953
1174
|
"""
|
|
954
1175
|
_reset_auto_parallel_context()
|
|
1176
|
+
api.ms_compile_cache.clear()
|
|
955
1177
|
|
|
956
1178
|
|
|
957
1179
|
@args_type_check(offload_config=dict)
|
|
@@ -961,7 +1183,8 @@ def set_offload_context(offload_config):
|
|
|
961
1183
|
|
|
962
1184
|
Note:
|
|
963
1185
|
The offload configuration is only used if the memory offload feature is enabled
|
|
964
|
-
via mindspore.set_context(memory_offload="ON").
|
|
1186
|
+
via mindspore.set_context(memory_offload="ON"), and the memory_optimize_level must be set to O0. On the Ascend
|
|
1187
|
+
hardware platform, the graph compilation level must be O0.
|
|
965
1188
|
|
|
966
1189
|
Args:
|
|
967
1190
|
offload_config (dict): A dict contains the keys and values for setting the offload context
|
|
@@ -1035,6 +1258,23 @@ def _check_target_specific_cfgs(device, arg_key):
|
|
|
1035
1258
|
return False
|
|
1036
1259
|
|
|
1037
1260
|
|
|
1261
|
+
def _check_ascend_device_context_initialized(device_target, settings):
|
|
1262
|
+
if device_target == 'Ascend' and is_initialized(device_target):
|
|
1263
|
+
for key, _ in settings.items():
|
|
1264
|
+
if key in ('ascend_config', 'deterministic', 'jit_compile', 'exception_dump', 'device_id'):
|
|
1265
|
+
logger.warning(f"For 'context.set_context' in Ascend backend, the backend is already initialized, "
|
|
1266
|
+
"please set it before the definition of any Tensor and Parameter, and the "
|
|
1267
|
+
"instantiation and execution of any operation and net, otherwise the settings may not "
|
|
1268
|
+
"take effect. ")
|
|
1269
|
+
break
|
|
1270
|
+
|
|
1271
|
+
|
|
1272
|
+
def _check_key(key):
|
|
1273
|
+
if key in ('precision_mode', 'jit_compile', 'atomic_clean_policy', 'matmul_allow_hf32', 'conv_allow_hf32',
|
|
1274
|
+
'op_precision_mode', 'host_scheduling_max_threshold', 'ge_options', 'op_debug_option'):
|
|
1275
|
+
raise ValueError(f"Please set '{key}' through parameter ascend_config")
|
|
1276
|
+
|
|
1277
|
+
|
|
1038
1278
|
@args_type_check(mode=int, precompile_only=bool, device_target=str, device_id=int, save_graphs=(bool, int),
|
|
1039
1279
|
save_graphs_path=str, enable_dump=bool, aoe_tune_mode=str, aoe_config=dict,
|
|
1040
1280
|
save_dump_path=str, enable_reduce_precision=bool, variable_memory_max_size=str,
|
|
@@ -1043,8 +1283,8 @@ def _check_target_specific_cfgs(device, arg_key):
|
|
|
1043
1283
|
max_device_memory=str, print_file_path=str, max_call_depth=int, env_config_path=str,
|
|
1044
1284
|
graph_kernel_flags=str, save_compile_cache=bool, runtime_num_threads=int, load_compile_cache=bool,
|
|
1045
1285
|
grad_for_scalar=bool, pynative_synchronize=bool, mempool_block_size=str, disable_format_transform=bool,
|
|
1046
|
-
op_timeout=int, deterministic=str, ascend_config=dict, jit_syntax_level=int,
|
|
1047
|
-
jit_enable_inplace_ops=bool, gpu_config=dict)
|
|
1286
|
+
op_timeout=int, deterministic=str, ascend_config=dict, jit_syntax_level=int, debug_level=int,
|
|
1287
|
+
jit_enable_inplace_ops=bool, gpu_config=dict, jit_config=dict, enable_compile_cache=bool)
|
|
1048
1288
|
def set_context(**kwargs):
|
|
1049
1289
|
"""
|
|
1050
1290
|
Set context for running environment.
|
|
@@ -1093,6 +1333,8 @@ def set_context(**kwargs):
|
|
|
1093
1333
|
| | reserve_class_name_in_scope | CPU/GPU/Ascend |
|
|
1094
1334
|
| +------------------------------+----------------------------+
|
|
1095
1335
|
| | pynative_synchronize | CPU/GPU/Ascend |
|
|
1336
|
+
| +------------------------------+----------------------------+
|
|
1337
|
+
| | debug_level | CPU/GPU/Ascend |
|
|
1096
1338
|
+-------------------------+------------------------------+----------------------------+
|
|
1097
1339
|
| Executive Control | mode | CPU/GPU/Ascend |
|
|
1098
1340
|
| +------------------------------+----------------------------+
|
|
@@ -1133,6 +1375,10 @@ def set_context(**kwargs):
|
|
|
1133
1375
|
| | jit_syntax_level | CPU/GPU/Ascend |
|
|
1134
1376
|
| +------------------------------+----------------------------+
|
|
1135
1377
|
| | gpu_config | GPU |
|
|
1378
|
+
| +------------------------------+----------------------------+
|
|
1379
|
+
| | jit_config | CPU/GPU/Ascend |
|
|
1380
|
+
| +------------------------------+----------------------------+
|
|
1381
|
+
| | exec_order | Ascend |
|
|
1136
1382
|
+-------------------------+------------------------------+----------------------------+
|
|
1137
1383
|
|
|
1138
1384
|
Args:
|
|
@@ -1142,15 +1388,21 @@ def set_context(**kwargs):
|
|
|
1142
1388
|
If device target is not set, the version of MindSpore package is used.
|
|
1143
1389
|
max_device_memory (str): Set the maximum memory available for devices. The format is "xxGB".
|
|
1144
1390
|
Default: ``" 1024GB"`` . The actual used memory size is the minimum of the available memory of the device
|
|
1145
|
-
and max_device_memory. 'max_device_memory' should be set before the program runs.
|
|
1391
|
+
and max_device_memory. 'max_device_memory' should be set before the program runs. When virtual memory is
|
|
1392
|
+
enabled, a too small 'max_device_memory' will cause frequent defragmentation, affecting performance.
|
|
1146
1393
|
variable_memory_max_size (str): This parameter is deprecated, and will be removed in a future version.
|
|
1147
1394
|
Please use parameter 'max_device_memory' instead.
|
|
1148
|
-
mempool_block_size (str):
|
|
1149
|
-
The format is "xxGB". Default: ``"1GB"`` . Minimum size is "1G". The actual used memory
|
|
1150
|
-
minimum of the available memory of the device and mempool_block_size.
|
|
1395
|
+
mempool_block_size (str): It takes effect when virtual memory is turned off, set the size of the memory pool
|
|
1396
|
+
block for devices. The format is "xxGB". Default: ``"1GB"`` . Minimum size is "1G". The actual used memory
|
|
1397
|
+
block size is the minimum of the available memory of the device and mempool_block_size. When there is
|
|
1398
|
+
enough memory, the memory will be expanded by this value.
|
|
1151
1399
|
op_timeout (int): Set the maximum duration of executing an operator in seconds.
|
|
1152
|
-
If the execution time exceeds this value, system will terminate the task.
|
|
1153
|
-
|
|
1400
|
+
If the execution time exceeds this value, system will terminate the task.
|
|
1401
|
+
0 means endless wait. The defaults for AI Core and AICPU operators vary on different hardware.
|
|
1402
|
+
For more information,
|
|
1403
|
+
please refer to `Ascend Community document about aclrtSetOpExecuteTimeOut
|
|
1404
|
+
<https://www.hiascend.com/document/detail/en/CANNCommunityEdition/600alphaX/infacldevg/aclcppdevg/aclcppdevg_03_0069.html>`_.
|
|
1405
|
+
Default: ``900`` .
|
|
1154
1406
|
save_graphs (bool or int): Whether to save intermediate compilation graphs. Default: ``0`` .
|
|
1155
1407
|
Available values are:
|
|
1156
1408
|
|
|
@@ -1159,10 +1411,13 @@ def set_context(**kwargs):
|
|
|
1159
1411
|
- True or 2: Generate more ir files related to backend process.
|
|
1160
1412
|
- 3: Generate visualization computing graphs and detailed frontend ir graphs.
|
|
1161
1413
|
|
|
1414
|
+
When the network structure is complex, setting `save_graphs` attribute to ``2`` or ``3`` may take too long.
|
|
1415
|
+
If you need quick problem locating, you can switch to ``1`` first.
|
|
1416
|
+
|
|
1162
1417
|
When the `save_graphs` attribute is set as ``True`` , ``1`` , ``2`` or ``3`` , attribute of
|
|
1163
1418
|
`save_graphs_path` is used to set the intermediate compilation graph storage path. By default, the graphs
|
|
1164
1419
|
are saved in the current directory.
|
|
1165
|
-
save_graphs_path (str): Path to save graphs. Default: "."
|
|
1420
|
+
save_graphs_path (str): Path to save graphs. Default: ``"."``.
|
|
1166
1421
|
If the specified directory does not exist, the system will automatically create the directory.
|
|
1167
1422
|
During distributed training, graphs will be saved to the directory of
|
|
1168
1423
|
`save_graphs_path/rank_${rank_id}/`. `rank_id` is the ID of the current device in the cluster.
|
|
@@ -1182,6 +1437,8 @@ def set_context(**kwargs):
|
|
|
1182
1437
|
If the saved file already exists, the timestamp suffix will be added to the file. Saving data to a file
|
|
1183
1438
|
solves the problem of data loss in screen printing when a large amount of data is generated.
|
|
1184
1439
|
If it is not set, an error will be reported: prompt to set the upper absolute path.
|
|
1440
|
+
When print data to file, the total output bytes of single print must be less then 2GB(limited by
|
|
1441
|
+
protobuf).
|
|
1185
1442
|
env_config_path (str): Config path for DFX.
|
|
1186
1443
|
Through mindspore.set_context(env_config_path="./mindspore_config.json")
|
|
1187
1444
|
|
|
@@ -1226,7 +1483,7 @@ def set_context(**kwargs):
|
|
|
1226
1483
|
If enable_graph_kernel is set to ``True`` , acceleration can be enabled.
|
|
1227
1484
|
For details of graph kernel fusion, please check
|
|
1228
1485
|
`Enabling Graph Kernel Fusion
|
|
1229
|
-
<https://www.mindspore.cn/
|
|
1486
|
+
<https://www.mindspore.cn/docs/en/master/model_train/optimize/graph_fusion_engine.html>`_.
|
|
1230
1487
|
graph_kernel_flags (str):
|
|
1231
1488
|
Optimization options of graph kernel fusion, and the priority is higher when it conflicts
|
|
1232
1489
|
with enable_graph_kernel. Only for experienced users.
|
|
@@ -1251,6 +1508,11 @@ def set_context(**kwargs):
|
|
|
1251
1508
|
Be caution when using this level.
|
|
1252
1509
|
|
|
1253
1510
|
- dump_as_text: dumps detail info as text files. Default: ``False`` .
|
|
1511
|
+
- enable_cluster_ops: Add user-specified operator to the set of operators involved in fusion. For example,
|
|
1512
|
+
by setting ``--enable_cluster_ops=MatMul``, MatMul operator can be included in the fusion process.
|
|
1513
|
+
- enable_pass/disable_pass: Enable/disable user-specified custom fusion passes. See details in
|
|
1514
|
+
`Custom Fusion Pass
|
|
1515
|
+
<https://www.mindspore.cn/docs/en/master/model_train/custom_program/fusion_pass.html>`_.
|
|
1254
1516
|
|
|
1255
1517
|
enable_reduce_precision (bool): Whether to enable precision reduction.
|
|
1256
1518
|
If the operator does not support the user-specified precision, the precision will
|
|
@@ -1281,8 +1543,9 @@ def set_context(**kwargs):
|
|
|
1281
1543
|
if enable_compile_cache is still set to ``True`` and the network scripts are not changed,
|
|
1282
1544
|
the compile cache is loaded. Note that only limited automatic detection for the changes of
|
|
1283
1545
|
python scripts is supported by now, which means that there is a correctness risk. Default: ``False`` .
|
|
1546
|
+
Currently, do not support the graph which is larger than 2G after compiled.
|
|
1284
1547
|
This is an experimental prototype that is subject to change and/or deletion.
|
|
1285
|
-
compile_cache_path (str): Path to save the compile cache. Default: "."
|
|
1548
|
+
compile_cache_path (str): Path to save the compile cache. Default: ``"."``.
|
|
1286
1549
|
If the specified directory does not exist, the system will automatically create the directory.
|
|
1287
1550
|
The cache will be saved to the directory of `compile_cache_path/rank_${rank_id}/`. The `rank_id` is
|
|
1288
1551
|
the ID of the current device in the cluster.
|
|
@@ -1290,7 +1553,8 @@ def set_context(**kwargs):
|
|
|
1290
1553
|
which means use the default num.
|
|
1291
1554
|
runtime_num_threads(int): The thread pool number of cpu kernel used in runtime,
|
|
1292
1555
|
which must bigger than or equal to 0. Default value is ``30`` , if you run many processes at
|
|
1293
|
-
the same time, you should set the value smaller to avoid thread contention.
|
|
1556
|
+
the same time, you should set the value smaller to avoid thread contention. If set runtime_num_threads to 1,
|
|
1557
|
+
the runtime asynchronous pipeline capability cannot be enabled, which may affect performance.
|
|
1294
1558
|
disable_format_transform (bool): Whether to disable the automatic format transform function from NCHW to NHWC.
|
|
1295
1559
|
When the network training performance of fp16 is worse than fp32, `disable_format_transform` can be set to
|
|
1296
1560
|
``True`` to try to improve training performance. Default: ``False`` .
|
|
@@ -1299,16 +1563,18 @@ def set_context(**kwargs):
|
|
|
1299
1563
|
of the interfaces would be compiled by MindSpore to the interfaces definition .py file that should be
|
|
1300
1564
|
guaranteed to be writable. Then compile the .py file to the .pyc or .so file, and could run in Graph mode.
|
|
1301
1565
|
memory_optimize_level (str): The memory optimize level.
|
|
1302
|
-
|
|
1566
|
+
On Ascend hardware platform, default: ``O1``, on other hardware platforms, default: ``O0``.
|
|
1567
|
+
The value must be in ['O0', 'O1'].
|
|
1303
1568
|
|
|
1304
|
-
- O0: priority performance option, disable SOMAS (Safe Optimized Memory Allocation Solver)
|
|
1305
|
-
|
|
1569
|
+
- O0: priority performance option, disable SOMAS (Safe Optimized Memory Allocation Solver)
|
|
1570
|
+
and some other memory optimizations.
|
|
1571
|
+
- O1: priority memory option, enable SOMAS and some other memory optimizations.
|
|
1306
1572
|
memory_offload (str): Whether to enable the memory offload function. When it is enabled, the idle data will be
|
|
1307
1573
|
temporarily copied to the host side in the case of insufficient device memory. The value must be in the
|
|
1308
1574
|
range of ['ON', 'OFF'], and the default value is ``'OFF'`` .
|
|
1309
1575
|
|
|
1310
1576
|
- ON: Enable the memory Offload function. On Ascend hardware platform, this parameter does not take effect
|
|
1311
|
-
when the
|
|
1577
|
+
when the graph compilation level is not 'O0'; This parameter does not take effect when
|
|
1312
1578
|
memory_optimize_level is set 'O1'.
|
|
1313
1579
|
- OFF: Turn off the memory Offload function.
|
|
1314
1580
|
ascend_config (dict): Set the parameters specific to Ascend hardware platform. It is not set by default.
|
|
@@ -1319,22 +1585,27 @@ def set_context(**kwargs):
|
|
|
1319
1585
|
is ``force_fp16`` . The value range is as follows:
|
|
1320
1586
|
|
|
1321
1587
|
- force_fp16: When the operator supports both float16 and float32, select float16 directly.
|
|
1322
|
-
- allow_fp32_to_fp16:
|
|
1323
|
-
the
|
|
1588
|
+
- allow_fp32_to_fp16: For cube operators, use the float16. For vector operators,
|
|
1589
|
+
prefer to keep the origin dtype, if the operator in model can support float32,
|
|
1590
|
+
it will keep original dtype, otherwise it will reduce to float16.
|
|
1324
1591
|
- allow_mix_precision: Automatic mixing precision, facing the whole network operator, according
|
|
1325
1592
|
to the built-in optimization strategy, automatically reduces the precision of some operators
|
|
1326
1593
|
to float16 or bfloat16.
|
|
1327
1594
|
- must_keep_origin_dtype: Keep the accuracy of the original drawing.
|
|
1328
1595
|
- force_fp32: When the input of the matrix calculation operator is float16 and the output supports
|
|
1329
1596
|
float16 and float32, output is forced to float32.
|
|
1330
|
-
- allow_fp32_to_bf16:
|
|
1331
|
-
the
|
|
1597
|
+
- allow_fp32_to_bf16: For cube operators, use the bfloat16. For vector operators,
|
|
1598
|
+
prefer to keep the origin dtype, if the operator in model can support float32,
|
|
1599
|
+
it will keep original dtype, otherwise it will reduce to bfloat16.
|
|
1332
1600
|
- allow_mix_precision_fp16: Automatic mixing precision, facing the whole network operator, automatically
|
|
1333
1601
|
reduces the precision of some operators to float16 according to the built-in optimization strategy.
|
|
1334
1602
|
- allow_mix_precision_bf16: Automatic mixing precision, facing the whole network operator, according to
|
|
1335
1603
|
the built-in optimization strategy, automatically reduces the precision of some operators to bfloat16.
|
|
1336
1604
|
|
|
1337
|
-
- jit_compile (bool): Whether to select online compilation.
|
|
1605
|
+
- jit_compile (bool): Whether to select online compilation. When set to 'True', online compilation is
|
|
1606
|
+
prioritized. When set to 'False', compiled operator binary files are prioritized to improve compilation
|
|
1607
|
+
performance. The default settings are online compilation for static shape, and compiled operator binary
|
|
1608
|
+
files for dynamic shape.
|
|
1338
1609
|
- atomic_clean_policy (int): The policy for cleaning memory occupied by atomic operators in the network.
|
|
1339
1610
|
Default: ``1`` .
|
|
1340
1611
|
|
|
@@ -1350,24 +1621,76 @@ def set_context(**kwargs):
|
|
|
1350
1621
|
For detailed information, please refer to `Ascend community <https://www.hiascend.com/>`_ .
|
|
1351
1622
|
- exception_dump (str): Enable exception dump for Ascend operators, providing the input and output data for
|
|
1352
1623
|
failing Ascend operators. The value can be ``"0"`` , ``"1"`` and ``"2"``. For ``"0"`` , exception dump is
|
|
1353
|
-
turned off; for ``"1"``, all inputs and outputs will be dumped for AICore
|
|
1354
|
-
for ``"2"``, inputs will be dumped for AICore exception operators
|
|
1624
|
+
turned off; for ``"1"``, all inputs and outputs will be dumped for AICore exception operators;
|
|
1625
|
+
for ``"2"``, inputs will be dumped for AICore exception operators, reducing the saved information
|
|
1626
|
+
but improving performance. Default: ``"2"`` .
|
|
1355
1627
|
- op_precision_mode (str): Path to config file of op precision mode. For detailed information, please refer
|
|
1356
1628
|
to `Ascend community <https://www.hiascend.com/>`_ .
|
|
1629
|
+
- op_debug_option (str): Enable debugging options for Ascend operators, default not enabled.
|
|
1630
|
+
The value currently only supports being set to ``"oom"``.
|
|
1631
|
+
|
|
1632
|
+
- ``"oom"``: When there is a memory out of bounds during the execution of an operator,
|
|
1633
|
+
AscendCL will return an error code of ``EZ9999``.
|
|
1634
|
+
|
|
1635
|
+
- ge_options (dict): Set options for CANN. The options are divided into two categories: global and session.
|
|
1636
|
+
This is an experimental prototype that is subject to change and/or deletion.
|
|
1637
|
+
For detailed information, please refer to `Ascend community <https://www.hiascend.com/document/detail/zh/canncommercial/70RC1/inferapplicationdev/graphdevg/atlasgeapi_07_0119.html>`_ .
|
|
1638
|
+
The configuration options in `ge_options` may be duplicated with the options in `ascend_config`. If the
|
|
1639
|
+
same configuration options are set in both `ascend_config` and `ge_options`, the one set in `ge_options`
|
|
1640
|
+
shall prevail.
|
|
1641
|
+
|
|
1642
|
+
- global (dict): Set global options.
|
|
1643
|
+
- session (dict): Set session options.
|
|
1644
|
+
|
|
1357
1645
|
- parallel_speed_up_json_path(Union[str, None]): The path to the parallel speed up json file, configuration
|
|
1358
1646
|
can refer to `parallel_speed_up.json
|
|
1359
|
-
<https://gitee.com/mindspore/mindspore/blob/
|
|
1647
|
+
<https://gitee.com/mindspore/mindspore/blob/master/config/parallel_speed_up.json>`_ .
|
|
1360
1648
|
If its value is None or '', it does not take effect. Default None.
|
|
1361
1649
|
|
|
1362
1650
|
- recompute_comm_overlap (bool): Enable overlap between recompute ops and communication ops if True.
|
|
1363
1651
|
Default: False.
|
|
1364
|
-
- matmul_grad_comm_overlap (bool): Enable overlap between
|
|
1652
|
+
- matmul_grad_comm_overlap (bool): Enable overlap between dw matmul and
|
|
1653
|
+
tensor parallel communication ops if True. Default: False.
|
|
1654
|
+
- recompute_allgather_overlap_fagrad (bool): Enable overlap between duplicated allgather by recomputing
|
|
1655
|
+
in sequence parallel and flashattentionscoregrad ops if True. Default: False.
|
|
1656
|
+
- enable_task_opt (bool): Enable communication fusion to optimize the number of communication operator
|
|
1657
|
+
tasks if True.
|
|
1365
1658
|
Default: False.
|
|
1366
|
-
-
|
|
1659
|
+
- enable_grad_comm_opt (bool): Enable overlap between dx ops and data parallel communication ops if True.
|
|
1660
|
+
Currently, do not support
|
|
1661
|
+
`LazyInline <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.lazy_inline.html>`
|
|
1367
1662
|
Default: False.
|
|
1368
|
-
-
|
|
1369
|
-
|
|
1663
|
+
- enable_opt_shard_comm_opt (bool): Enable overlap between forward ops
|
|
1664
|
+
and optimizer parallel allgather communication if True. Currently, do not support
|
|
1665
|
+
`LazyInline <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.lazy_inline.html>`
|
|
1370
1666
|
Default: False.
|
|
1667
|
+
- compute_communicate_fusion_level (int): Enable the fusion between compute and communicate.
|
|
1668
|
+
Default: ``0``. Note: This function must be used with Ascend Training Solution 24.0.RC2 or later.
|
|
1669
|
+
|
|
1670
|
+
- 0: Disable fusion.
|
|
1671
|
+
|
|
1672
|
+
- 1: Apply fusion to forward nodes.
|
|
1673
|
+
|
|
1674
|
+
- 2: Apply fusion to backward nodes.
|
|
1675
|
+
|
|
1676
|
+
- 3: Apply fusion to all nodes.
|
|
1677
|
+
- dataset_broadcast_opt_level (int): Optimize the scenario that the dataset repeated reading. Only
|
|
1678
|
+
support O0/O1 jit level. It doesn't work in O2 mode. Default: ``0``.
|
|
1679
|
+
|
|
1680
|
+
- 0: Disable this optimize.
|
|
1681
|
+
|
|
1682
|
+
- 1: Optimize dataset reader between pipeline stage.
|
|
1683
|
+
|
|
1684
|
+
- 2: Optimize dataset reader within pipeline stage.
|
|
1685
|
+
|
|
1686
|
+
- 3: Optimize dataset reader with all scenes.
|
|
1687
|
+
- bias_add_comm_swap (bool): Enable node execution order swap communication operators and add operators
|
|
1688
|
+
if ``True``. Only 1-dimension bias node is supported. Default: ``False``.
|
|
1689
|
+
- host_scheduling_max_threshold(int): The max threshold to control whether the dynamic shape process is
|
|
1690
|
+
used when run the static graph, the default value is 0. When the number of operations in the static graph
|
|
1691
|
+
is less than the max threshold, this graph will be executed in dynamic shape process. In large model
|
|
1692
|
+
scenarios, this approach can save stream resources. If the number of operations in the static graph is
|
|
1693
|
+
greater than the maximum threshold, this graph will be executed in original static process.
|
|
1371
1694
|
|
|
1372
1695
|
jit_syntax_level (int): Set JIT syntax level for graph compiling, triggered by GRAPH_MODE and @jit decorator.
|
|
1373
1696
|
The value must be ``STRICT`` or ``LAX`` . Default: ``LAX`` . All levels support all backends.
|
|
@@ -1378,6 +1701,12 @@ def set_context(**kwargs):
|
|
|
1378
1701
|
affected and not optimal. Cannot be used for MindIR load and export due to some syntax that may not be
|
|
1379
1702
|
able to be exported.
|
|
1380
1703
|
|
|
1704
|
+
debug_level (int): Set config for debugging. Default value: ``RELEASE``.
|
|
1705
|
+
|
|
1706
|
+
- ``RELEASE``: Used for normally running, and some debug information will be discard to get a better
|
|
1707
|
+
compiling performance.
|
|
1708
|
+
- ``DEBUG``: Used for debugging when errors occur, more information will be record in compiling process.
|
|
1709
|
+
|
|
1381
1710
|
gpu_config (dict): Set the parameters specific to gpu hardware platform. It is not set by default.
|
|
1382
1711
|
Currently, only setting `conv_fprop_algo` and `conv_dgrad_algo` and `conv_wgrad_algo` and `conv_allow_tf32`
|
|
1383
1712
|
and `matmul_allow_tf32` are supported on GPU hardware platform.
|
|
@@ -1449,6 +1778,39 @@ def set_context(**kwargs):
|
|
|
1449
1778
|
- matmul_allow_tf32 (bool): The flag below controls to allow Tensor core TF32 computation on CUBLAS and the
|
|
1450
1779
|
default value is ``False``.
|
|
1451
1780
|
|
|
1781
|
+
jit_config (dict): Set the global jit config for compile, take effect in network defined in Cell or jit
|
|
1782
|
+
decorators. It is not set by default.
|
|
1783
|
+
The setting in context is the global jit config, while JitConfig is the local network's jit config.
|
|
1784
|
+
When both exist simultaneously, the global jit config will not overwrite the local network's jit config.
|
|
1785
|
+
|
|
1786
|
+
- jit_level (str): Used to control the compilation optimization level. Default: ``""`` , The framework
|
|
1787
|
+
automatically selects the execution method based on product, Altas training product is O2, and all other
|
|
1788
|
+
products are O0. In addition, The option of the dynamic shape must be O0 or O1, O2 is not supported.
|
|
1789
|
+
The value range is as follows:
|
|
1790
|
+
|
|
1791
|
+
- ``"O0"``: Except for optimizations that may affect functionality, all other optimizations are turned
|
|
1792
|
+
off, adopt KernelByKernel execution mode.
|
|
1793
|
+
- ``"O1"``: Using commonly used optimizations and automatic operator fusion optimizations,
|
|
1794
|
+
adopt KernelByKernel execution mode. This optimization level is experimental and is being improved.
|
|
1795
|
+
- ``"O2"``: Ultimate performance optimization, adopt Sink execution mode.
|
|
1796
|
+
|
|
1797
|
+
- infer_boost (str): Used to control the infer mode. Default: ``"off"`` . The value range is as follows:
|
|
1798
|
+
|
|
1799
|
+
- ``"on"``: Enable infer mode, get better infer performance.
|
|
1800
|
+
- ``"off"``: Disable infer mode, use forward to infer, performance is not good.
|
|
1801
|
+
|
|
1802
|
+
exec_order (str): Set the sorting method for operator execution in GRAPH_MODE Currently, only three sorting
|
|
1803
|
+
methods are supported: bfs and gpto, and the default method is bfs.
|
|
1804
|
+
|
|
1805
|
+
- ``"bfs"``: The default sorting method, breadth priority, good communication masking, relatively good
|
|
1806
|
+
performance.
|
|
1807
|
+
- ``"dfs"``: An optional sorting method, depth-first sorting. The performance is relatively worse than that
|
|
1808
|
+
of bfs execution order, but it occupies less memory. It is recommended to try dfs in scenarios where other
|
|
1809
|
+
execution orders run out of memory (OOM).
|
|
1810
|
+
- ``"gpto"``: An optional sorting method. This method combines multiple execution orders and selects a
|
|
1811
|
+
method with relatively good performance. There may be some performance gains in scenarios with multiple
|
|
1812
|
+
replicas running in parallel.
|
|
1813
|
+
|
|
1452
1814
|
Raises:
|
|
1453
1815
|
ValueError: If input key is not an attribute in context.
|
|
1454
1816
|
|
|
@@ -1482,16 +1844,24 @@ def set_context(**kwargs):
|
|
|
1482
1844
|
>>> ms.set_context(memory_offload='ON')
|
|
1483
1845
|
>>> ms.set_context(deterministic='ON')
|
|
1484
1846
|
>>> ms.set_context(ascend_config={"precision_mode": "force_fp16", "jit_compile": True,
|
|
1485
|
-
... "atomic_clean_policy": 1, "op_precision_mode": "./op_precision_config_file"
|
|
1847
|
+
... "atomic_clean_policy": 1, "op_precision_mode": "./op_precision_config_file",
|
|
1848
|
+
... "op_debug_option": "oom",
|
|
1849
|
+
... "ge_options": {"global": {"ge.opSelectImplmode": "high_precision"},
|
|
1850
|
+
... "session": {"ge.exec.atomicCleanPolicy": "0"}}})
|
|
1486
1851
|
>>> ms.set_context(jit_syntax_level=ms.STRICT)
|
|
1852
|
+
>>> ms.set_context(debug_level=ms.context.DEBUG)
|
|
1487
1853
|
>>> ms.set_context(gpu_config={"conv_fprop_algo": "performance", "conv_allow_tf32": True,
|
|
1488
1854
|
... "matmul_allow_tf32": True})
|
|
1855
|
+
>>> ms.set_context(jit_config={"jit_level": "O0"})
|
|
1856
|
+
>>> ms.set_context(exec_order="gpto")
|
|
1489
1857
|
"""
|
|
1490
1858
|
ctx = _context()
|
|
1491
1859
|
# set device target first
|
|
1492
1860
|
if 'device_target' in kwargs:
|
|
1493
1861
|
ctx.set_device_target(kwargs['device_target'])
|
|
1494
1862
|
device = ctx.get_param(ms_ctx_param.device_target)
|
|
1863
|
+
_check_ascend_device_context_initialized(device, kwargs)
|
|
1864
|
+
|
|
1495
1865
|
for key, value in kwargs.items():
|
|
1496
1866
|
if key in ('enable_sparse', 'auto_tune_mode'):
|
|
1497
1867
|
logger.warning(f"For 'context.set_context', '{key}' parameter is deprecated, "
|
|
@@ -1501,9 +1871,7 @@ def set_context(**kwargs):
|
|
|
1501
1871
|
logger.warning(f"For 'context.set_context', '{key}' parameter is deprecated. "
|
|
1502
1872
|
"For details, please see the interface parameter API comments")
|
|
1503
1873
|
continue
|
|
1504
|
-
|
|
1505
|
-
'op_precision_mode'):
|
|
1506
|
-
raise ValueError(f"Please set '{key}' through parameter ascend_config")
|
|
1874
|
+
_check_key(key)
|
|
1507
1875
|
if key == 'save_graphs':
|
|
1508
1876
|
if value is True:
|
|
1509
1877
|
value = 2
|
|
@@ -1514,14 +1882,21 @@ def set_context(**kwargs):
|
|
|
1514
1882
|
if key == 'jit_syntax_level' and value not in (STRICT, COMPATIBLE, LAX):
|
|
1515
1883
|
raise ValueError(f"For 'jit_syntax_level', the value should be context.STRICT"
|
|
1516
1884
|
f" or context.LAX, but got {value}.")
|
|
1517
|
-
if not
|
|
1518
|
-
|
|
1519
|
-
|
|
1885
|
+
if key == 'debug_level' and value not in (RELEASE, DEBUG):
|
|
1886
|
+
raise ValueError(f"For 'debug_level', the value should be context.DEBUG"
|
|
1887
|
+
f" or context.RELEASE, but got {value}.")
|
|
1888
|
+
if key == 'enable_compile_cache':
|
|
1520
1889
|
setattr(ctx, key, value)
|
|
1890
|
+
ctx.set_param(ms_ctx_param.__members__[key], int(value))
|
|
1891
|
+
continue
|
|
1892
|
+
if not _check_target_specific_cfgs(device, key):
|
|
1521
1893
|
continue
|
|
1522
1894
|
if key in ctx.setters:
|
|
1523
1895
|
ctx.setters[key](ctx, value)
|
|
1524
1896
|
continue
|
|
1897
|
+
if hasattr(ctx, key):
|
|
1898
|
+
setattr(ctx, key, value)
|
|
1899
|
+
continue
|
|
1525
1900
|
# enum variables beginning with '_' are for internal use
|
|
1526
1901
|
if key in ms_ctx_param.__members__ and key[0] != '_':
|
|
1527
1902
|
ctx.set_param(ms_ctx_param.__members__[key], value)
|
|
@@ -1571,6 +1946,17 @@ def _get_mode():
|
|
|
1571
1946
|
return ctx.get_mode()
|
|
1572
1947
|
|
|
1573
1948
|
|
|
1949
|
+
def get_jit_config():
|
|
1950
|
+
"""
|
|
1951
|
+
Get global jit config.
|
|
1952
|
+
|
|
1953
|
+
Returns:
|
|
1954
|
+
Object: The Value of jit config.
|
|
1955
|
+
"""
|
|
1956
|
+
ctx = _context()
|
|
1957
|
+
return ctx.get_jit_config()
|
|
1958
|
+
|
|
1959
|
+
|
|
1574
1960
|
class ParallelMode:
|
|
1575
1961
|
"""
|
|
1576
1962
|
Parallel mode options.
|
|
@@ -1668,9 +2054,7 @@ def get_ps_context(attr_key):
|
|
|
1668
2054
|
|
|
1669
2055
|
def reset_ps_context():
|
|
1670
2056
|
"""
|
|
1671
|
-
Reset parameter server training mode context attributes to the default values
|
|
1672
|
-
|
|
1673
|
-
- enable_ps: False.
|
|
2057
|
+
Reset parameter server training mode context attributes to the default values.
|
|
1674
2058
|
|
|
1675
2059
|
Meaning of each field and its default value refer to :func:`mindspore.set_ps_context`.
|
|
1676
2060
|
|