mindspore 2.2.14__cp39-cp39-win_amd64.whl → 2.3.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +6 -5
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +76 -18
- mindspore/_extends/builtin_operations.py +2 -1
- mindspore/_extends/graph_kernel/model/graph_parallel.py +16 -6
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +3 -16
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +16 -4
- mindspore/_extends/parallel_compile/akg_compiler/compiler.py +1 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +2 -1
- mindspore/_extends/parallel_compile/akg_compiler/util.py +5 -2
- mindspore/_extends/parse/__init__.py +18 -14
- mindspore/_extends/parse/compile_config.py +258 -0
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +174 -62
- mindspore/_extends/parse/resources.py +45 -14
- mindspore/_extends/parse/standard_method.py +142 -240
- mindspore/{ops/_op_impl/tbe/atomic_addr_clean.py → _extends/pijit/__init__.py} +6 -16
- mindspore/_extends/pijit/pijit_func_white_list.py +343 -0
- mindspore/_extends/remote/kernel_build_server.py +2 -0
- mindspore/_profiler.py +30 -0
- mindspore/amp.py +51 -24
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +2 -2
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/boost/less_batch_normalization.py +9 -6
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +15 -4
- mindspore/common/_jit_fallback_utils.py +2 -3
- mindspore/common/_register_for_adapter.py +7 -0
- mindspore/common/_register_for_recompute.py +48 -0
- mindspore/common/_register_for_tensor.py +8 -9
- mindspore/common/_stub_tensor.py +7 -1
- mindspore/common/_utils.py +5 -17
- mindspore/common/api.py +411 -106
- mindspore/common/auto_dynamic_shape.py +27 -14
- mindspore/common/dtype.py +17 -10
- mindspore/common/dump.py +6 -8
- mindspore/common/file_system.py +48 -0
- mindspore/common/generator.py +260 -0
- mindspore/common/hook_handle.py +51 -4
- mindspore/common/initializer.py +1 -1
- mindspore/common/jit_config.py +34 -14
- mindspore/common/lazy_inline.py +72 -19
- mindspore/common/mindir_util.py +12 -2
- mindspore/common/mutable.py +79 -14
- mindspore/common/no_inline.py +54 -0
- mindspore/common/np_dtype.py +25 -0
- mindspore/common/parameter.py +30 -11
- mindspore/common/recompute.py +262 -0
- mindspore/common/seed.py +9 -9
- mindspore/common/sparse_tensor.py +272 -24
- mindspore/common/symbol.py +122 -0
- mindspore/common/tensor.py +468 -494
- mindspore/communication/__init__.py +6 -11
- mindspore/communication/_comm_helper.py +5 -0
- mindspore/communication/comm_func.py +1140 -0
- mindspore/communication/management.py +115 -102
- mindspore/config/op_info.config +22 -54
- mindspore/context.py +346 -63
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +6 -6
- mindspore/dataset/audio/transforms.py +711 -158
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/engine/cache_client.py +2 -2
- mindspore/dataset/engine/datasets.py +140 -83
- mindspore/dataset/engine/datasets_audio.py +14 -14
- mindspore/dataset/engine/datasets_standard_format.py +33 -3
- mindspore/dataset/engine/datasets_text.py +38 -38
- mindspore/dataset/engine/datasets_user_defined.py +78 -59
- mindspore/dataset/engine/datasets_vision.py +77 -73
- mindspore/dataset/engine/offload.py +5 -7
- mindspore/dataset/engine/queue.py +56 -38
- mindspore/dataset/engine/validators.py +11 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +408 -121
- mindspore/dataset/text/utils.py +9 -9
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/transforms.py +261 -76
- mindspore/dataset/utils/browse_dataset.py +9 -9
- mindspore/dataset/vision/__init__.py +8 -8
- mindspore/dataset/vision/c_transforms.py +10 -10
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +2844 -549
- mindspore/dataset/vision/utils.py +161 -10
- mindspore/dataset/vision/validators.py +14 -2
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/optim/__init__.py +12 -2
- mindspore/experimental/optim/adadelta.py +161 -0
- mindspore/experimental/optim/adagrad.py +168 -0
- mindspore/experimental/optim/adam.py +35 -34
- mindspore/experimental/optim/adamax.py +170 -0
- mindspore/experimental/optim/adamw.py +40 -16
- mindspore/experimental/optim/asgd.py +153 -0
- mindspore/experimental/optim/lr_scheduler.py +66 -121
- mindspore/experimental/optim/nadam.py +157 -0
- mindspore/experimental/optim/optimizer.py +15 -8
- mindspore/experimental/optim/radam.py +194 -0
- mindspore/experimental/optim/rmsprop.py +154 -0
- mindspore/experimental/optim/rprop.py +164 -0
- mindspore/experimental/optim/sgd.py +28 -19
- mindspore/hal/__init__.py +40 -0
- mindspore/hal/_ascend.py +57 -0
- mindspore/hal/_base.py +57 -0
- mindspore/hal/_cpu.py +56 -0
- mindspore/hal/_gpu.py +57 -0
- mindspore/hal/device.py +356 -0
- mindspore/hal/event.py +179 -0
- mindspore/hal/memory.py +326 -0
- mindspore/hal/stream.py +339 -0
- mindspore/include/api/data_type.h +2 -2
- mindspore/include/api/dual_abi_helper.h +16 -3
- mindspore/include/api/model.h +4 -3
- mindspore/include/api/status.h +14 -0
- mindspore/include/c_api/model_c.h +173 -0
- mindspore/include/c_api/ms/base/types.h +1 -0
- mindspore/include/c_api/types_c.h +19 -0
- mindspore/include/dataset/execute.h +1 -3
- mindspore/include/dataset/vision.h +54 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +2 -2
- mindspore/mindrecord/__init__.py +5 -1
- mindspore/mindrecord/config.py +809 -0
- mindspore/mindrecord/filereader.py +25 -0
- mindspore/mindrecord/filewriter.py +76 -58
- mindspore/mindrecord/mindpage.py +40 -6
- mindspore/mindrecord/shardutils.py +3 -2
- mindspore/mindrecord/shardwriter.py +7 -0
- mindspore/mindrecord/tools/cifar100_to_mr.py +8 -13
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -15
- mindspore/mindrecord/tools/csv_to_mr.py +4 -9
- mindspore/mindrecord/tools/imagenet_to_mr.py +3 -8
- mindspore/mindrecord/tools/mnist_to_mr.py +7 -12
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/mint/__init__.py +1137 -0
- mindspore/{rewrite/ast_transformers → mint/linalg}/__init__.py +9 -4
- mindspore/mint/nn/__init__.py +512 -0
- mindspore/mint/nn/functional.py +573 -0
- mindspore/mint/optim/__init__.py +24 -0
- mindspore/mint/optim/adamw.py +185 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +72 -0
- mindspore/nn/__init__.py +1 -0
- mindspore/nn/cell.py +213 -257
- mindspore/nn/dynamic_lr.py +2 -2
- mindspore/nn/extend/__init__.py +29 -0
- mindspore/nn/extend/basic.py +140 -0
- mindspore/nn/extend/embedding.py +143 -0
- mindspore/{rewrite/ast_creator_register.py → nn/extend/layer/__init__.py} +9 -19
- mindspore/nn/extend/layer/normalization.py +109 -0
- mindspore/nn/extend/pooling.py +117 -0
- mindspore/nn/layer/activation.py +83 -93
- mindspore/nn/layer/basic.py +177 -82
- mindspore/nn/layer/channel_shuffle.py +3 -16
- mindspore/nn/layer/container.py +3 -3
- mindspore/nn/layer/conv.py +75 -66
- mindspore/nn/layer/embedding.py +101 -43
- mindspore/nn/layer/embedding_service.py +531 -0
- mindspore/nn/layer/embedding_service_layer.py +393 -0
- mindspore/nn/layer/image.py +4 -7
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +52 -66
- mindspore/nn/layer/padding.py +30 -39
- mindspore/nn/layer/pooling.py +18 -9
- mindspore/nn/layer/rnn_cells.py +6 -16
- mindspore/nn/layer/rnns.py +6 -5
- mindspore/nn/layer/thor_layer.py +1 -2
- mindspore/nn/layer/timedistributed.py +1 -1
- mindspore/nn/layer/transformer.py +52 -50
- mindspore/nn/learning_rate_schedule.py +6 -5
- mindspore/nn/loss/loss.py +62 -83
- mindspore/nn/optim/ada_grad.py +4 -2
- mindspore/nn/optim/adadelta.py +3 -1
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +102 -181
- mindspore/nn/optim/adamax.py +4 -2
- mindspore/nn/optim/adasum.py +3 -3
- mindspore/nn/optim/asgd.py +4 -2
- mindspore/nn/optim/ftrl.py +31 -61
- mindspore/nn/optim/lamb.py +5 -3
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +6 -4
- mindspore/nn/optim/momentum.py +13 -25
- mindspore/nn/optim/optimizer.py +6 -3
- mindspore/nn/optim/proximal_ada_grad.py +4 -2
- mindspore/nn/optim/rmsprop.py +9 -3
- mindspore/nn/optim/rprop.py +4 -2
- mindspore/nn/optim/sgd.py +5 -3
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/custom_ops.py +2 -2
- mindspore/nn/probability/distribution/beta.py +2 -2
- mindspore/nn/probability/distribution/categorical.py +4 -6
- mindspore/nn/probability/distribution/cauchy.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +2 -2
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +13 -1
- mindspore/nn/wrap/__init__.py +2 -1
- mindspore/nn/wrap/cell_wrapper.py +58 -13
- mindspore/nn/wrap/grad_reducer.py +148 -8
- mindspore/nn/wrap/loss_scale.py +32 -9
- mindspore/numpy/__init__.py +2 -0
- mindspore/numpy/array_creations.py +2 -0
- mindspore/numpy/array_ops.py +6 -6
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/fft.py +431 -0
- mindspore/numpy/math_ops.py +62 -68
- mindspore/numpy/utils.py +3 -0
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +6 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +4 -129
- mindspore/ops/_grad_experimental/grad_comm_ops.py +89 -34
- mindspore/ops/_grad_experimental/grad_math_ops.py +68 -283
- mindspore/ops/_grad_experimental/grad_nn_ops.py +0 -53
- mindspore/ops/_grad_experimental/grad_quant_ops.py +3 -3
- mindspore/ops/_grad_experimental/grad_sparse.py +1 -1
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/__init__.py +0 -1
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -0
- mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +1 -1
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +1 -3
- mindspore/ops/_op_impl/aicpu/poisson.py +2 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -3
- mindspore/ops/_op_impl/cpu/adam.py +2 -2
- mindspore/ops/_op_impl/cpu/adam_weight_decay.py +3 -2
- mindspore/ops/_op_impl/cpu/maximum_grad.py +16 -14
- mindspore/ops/_op_impl/cpu/minimum_grad.py +8 -0
- mindspore/ops/_vmap/vmap_array_ops.py +164 -101
- mindspore/ops/_vmap/vmap_base.py +8 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +95 -9
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +143 -58
- mindspore/ops/_vmap/vmap_image_ops.py +70 -13
- mindspore/ops/_vmap/vmap_math_ops.py +130 -58
- mindspore/ops/_vmap/vmap_nn_ops.py +249 -115
- mindspore/ops/_vmap/vmap_other_ops.py +1 -1
- mindspore/ops/auto_generate/__init__.py +31 -0
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +231 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +250 -0
- mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
- mindspore/ops/auto_generate/gen_extend_func.py +980 -0
- mindspore/ops/auto_generate/gen_ops_def.py +6443 -0
- mindspore/ops/auto_generate/gen_ops_prim.py +13167 -0
- mindspore/ops/auto_generate/pyboost_inner_prim.py +429 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +121 -23
- mindspore/ops/composite/math_ops.py +10 -49
- mindspore/ops/composite/multitype_ops/_compile_utils.py +191 -618
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +25 -134
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +8 -2
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +9 -3
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/pow_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +32 -21
- mindspore/ops/composite/multitype_ops/sub_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +6 -3
- mindspore/ops/deprecated.py +14 -3
- mindspore/ops/extend/__init__.py +53 -0
- mindspore/ops/extend/array_func.py +218 -0
- mindspore/ops/extend/math_func.py +76 -0
- mindspore/ops/extend/nn_func.py +308 -0
- mindspore/ops/function/__init__.py +31 -11
- mindspore/ops/function/array_func.py +846 -1735
- mindspore/ops/function/clip_func.py +19 -31
- mindspore/ops/function/debug_func.py +1 -4
- mindspore/ops/function/fft_func.py +31 -0
- mindspore/ops/function/grad/grad_func.py +27 -20
- mindspore/ops/function/image_func.py +27 -21
- mindspore/ops/function/linalg_func.py +35 -68
- mindspore/ops/function/math_func.py +913 -2791
- mindspore/ops/function/nn_func.py +1439 -885
- mindspore/ops/function/other_func.py +6 -7
- mindspore/ops/function/parameter_func.py +5 -93
- mindspore/ops/function/random_func.py +254 -108
- mindspore/ops/function/reshard_func.py +102 -0
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/function/sparse_unary_func.py +9 -16
- mindspore/ops/function/spectral_func.py +1 -1
- mindspore/ops/function/vmap_func.py +14 -14
- mindspore/ops/functional.py +342 -343
- mindspore/ops/op_info_register.py +16 -43
- mindspore/ops/operations/__init__.py +32 -23
- mindspore/ops/operations/_grad_ops.py +21 -853
- mindspore/ops/operations/_infer_ops.py +19 -0
- mindspore/ops/operations/_inner_ops.py +107 -518
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_scalar_ops.py +5 -480
- mindspore/ops/operations/_sequence_ops.py +6 -36
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +108 -2705
- mindspore/ops/operations/comm_ops.py +801 -118
- mindspore/ops/operations/custom_ops.py +61 -120
- mindspore/ops/operations/debug_ops.py +104 -35
- mindspore/ops/operations/image_ops.py +1 -217
- mindspore/ops/operations/inner_ops.py +5 -40
- mindspore/ops/operations/linalg_ops.py +1 -49
- mindspore/ops/operations/manually_defined/__init__.py +24 -0
- mindspore/ops/operations/manually_defined/_inner.py +61 -0
- mindspore/ops/operations/manually_defined/ops_def.py +2016 -0
- mindspore/ops/operations/math_ops.py +572 -4667
- mindspore/ops/operations/nn_ops.py +248 -2162
- mindspore/ops/operations/other_ops.py +53 -45
- mindspore/ops/operations/random_ops.py +4 -53
- mindspore/ops/operations/reshard_ops.py +53 -0
- mindspore/ops/operations/sparse_ops.py +4 -4
- mindspore/ops/primitive.py +204 -103
- mindspore/ops/silent_check.py +5 -5
- mindspore/ops_generate/__init__.py +27 -0
- mindspore/ops_generate/arg_dtype_cast.py +250 -0
- mindspore/ops_generate/arg_handler.py +197 -0
- mindspore/ops_generate/gen_aclnn_implement.py +263 -0
- mindspore/ops_generate/gen_ops.py +1084 -0
- mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
- mindspore/ops_generate/gen_pyboost_func.py +968 -0
- mindspore/ops_generate/gen_utils.py +209 -0
- mindspore/ops_generate/op_proto.py +138 -0
- mindspore/ops_generate/pyboost_utils.py +354 -0
- mindspore/ops_generate/template.py +239 -0
- mindspore/parallel/__init__.py +6 -4
- mindspore/parallel/_auto_parallel_context.py +73 -3
- mindspore/parallel/_cell_wrapper.py +16 -9
- mindspore/parallel/_cost_model_context.py +1 -1
- mindspore/parallel/_dp_allreduce_fusion.py +159 -159
- mindspore/parallel/_parallel_serialization.py +29 -13
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +18 -11
- mindspore/parallel/_transformer/__init__.py +1 -1
- mindspore/parallel/_transformer/layers.py +1 -1
- mindspore/parallel/_transformer/loss.py +1 -1
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/op_parallel_config.py +1 -1
- mindspore/parallel/_transformer/transformer.py +2 -2
- mindspore/parallel/_utils.py +161 -6
- mindspore/parallel/algo_parameter_config.py +6 -8
- mindspore/parallel/checkpoint_transform.py +191 -32
- mindspore/parallel/cluster/__init__.py +15 -0
- mindspore/parallel/cluster/process_entity/__init__.py +18 -0
- mindspore/parallel/cluster/process_entity/_api.py +344 -0
- mindspore/parallel/cluster/process_entity/_utils.py +126 -0
- mindspore/parallel/cluster/run.py +136 -0
- mindspore/parallel/mpi/__init__.py +1 -1
- mindspore/parallel/mpi/_mpi_config.py +1 -1
- mindspore/parallel/parameter_broadcast.py +152 -0
- mindspore/parallel/shard.py +128 -17
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +3 -2
- mindspore/profiler/common/process_pool.py +41 -0
- mindspore/profiler/common/singleton.py +28 -0
- mindspore/profiler/common/util.py +125 -0
- mindspore/profiler/envprofiling.py +2 -2
- mindspore/{_extends/parallel_compile/tbe_compiler → profiler/parser/ascend_analysis}/__init__.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +53 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +159 -0
- mindspore/profiler/parser/ascend_analysis/function_event.py +161 -0
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +131 -0
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +85 -0
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +57 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +116 -0
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +68 -0
- mindspore/profiler/parser/ascend_cluster_generator.py +14 -9
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -1
- mindspore/profiler/parser/ascend_flops_generator.py +20 -4
- mindspore/profiler/parser/ascend_hccl_generator.py +29 -278
- mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
- mindspore/profiler/parser/ascend_memory_generator.py +185 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +147 -146
- mindspore/profiler/parser/ascend_msprof_generator.py +73 -283
- mindspore/profiler/parser/ascend_op_generator.py +92 -42
- mindspore/profiler/parser/ascend_timeline_generator.py +296 -133
- mindspore/profiler/parser/base_timeline_generator.py +6 -0
- mindspore/profiler/parser/framework_parser.py +3 -2
- mindspore/profiler/parser/integrator.py +3 -1
- mindspore/profiler/parser/minddata_parser.py +72 -3
- mindspore/profiler/parser/msadvisor_analyzer.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +16 -1
- mindspore/profiler/profiling.py +445 -190
- mindspore/rewrite/__init__.py +2 -13
- mindspore/rewrite/api/node.py +122 -36
- mindspore/rewrite/api/pattern_engine.py +2 -3
- mindspore/rewrite/api/scoped_value.py +16 -15
- mindspore/rewrite/api/symbol_tree.py +45 -29
- mindspore/rewrite/ast_helpers/__init__.py +3 -6
- mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +48 -0
- mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +160 -92
- mindspore/rewrite/common/__init__.py +1 -2
- mindspore/rewrite/common/config.py +24 -0
- mindspore/rewrite/common/{rewrite_elog.py → error_log.py} +39 -39
- mindspore/rewrite/{namer.py → common/namer.py} +63 -18
- mindspore/rewrite/common/namespace.py +118 -0
- mindspore/rewrite/node/__init__.py +5 -5
- mindspore/rewrite/node/call_function.py +23 -7
- mindspore/rewrite/node/cell_container.py +7 -3
- mindspore/rewrite/node/control_flow.py +53 -28
- mindspore/rewrite/node/node.py +212 -196
- mindspore/rewrite/node/node_manager.py +51 -22
- mindspore/rewrite/node/node_topological_manager.py +3 -23
- mindspore/rewrite/parsers/__init__.py +12 -0
- mindspore/rewrite/parsers/arguments_parser.py +8 -9
- mindspore/rewrite/parsers/assign_parser.py +637 -413
- mindspore/rewrite/parsers/attribute_parser.py +3 -4
- mindspore/rewrite/parsers/class_def_parser.py +115 -148
- mindspore/rewrite/parsers/constant_parser.py +5 -5
- mindspore/rewrite/parsers/container_parser.py +4 -6
- mindspore/rewrite/parsers/expr_parser.py +55 -0
- mindspore/rewrite/parsers/for_parser.py +31 -98
- mindspore/rewrite/parsers/function_def_parser.py +13 -5
- mindspore/rewrite/parsers/if_parser.py +28 -10
- mindspore/rewrite/parsers/module_parser.py +8 -182
- mindspore/rewrite/parsers/parser.py +1 -5
- mindspore/rewrite/parsers/parser_register.py +1 -1
- mindspore/rewrite/parsers/return_parser.py +5 -10
- mindspore/rewrite/parsers/while_parser.py +59 -0
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree/__init__.py +20 -0
- mindspore/rewrite/{symbol_tree.py → symbol_tree/symbol_tree.py} +704 -185
- mindspore/rewrite/{symbol_tree_builder.py → symbol_tree/symbol_tree_builder.py} +8 -8
- mindspore/rewrite/{symbol_tree_dumper.py → symbol_tree/symbol_tree_dumper.py} +4 -4
- mindspore/run_check/_check_version.py +6 -14
- mindspore/run_check/run_check.py +1 -1
- mindspore/safeguard/rewrite_obfuscation.py +9 -19
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +6 -5
- mindspore/train/_utils.py +178 -4
- mindspore/train/amp.py +167 -245
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/__init__.py +5 -2
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +143 -29
- mindspore/train/callback/_cluster_monitor.py +201 -0
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_flops_collector.py +238 -0
- mindspore/train/callback/_landscape.py +15 -9
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_mindio_ttp.py +443 -0
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
- mindspore/train/callback/_summary_collector.py +7 -7
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/data_sink.py +6 -5
- mindspore/train/dataset_helper.py +60 -21
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/accuracy.py +7 -7
- mindspore/train/metrics/confusion_matrix.py +8 -6
- mindspore/train/metrics/cosine_similarity.py +6 -4
- mindspore/train/metrics/error.py +2 -2
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/perplexity.py +2 -1
- mindspore/train/metrics/topk.py +2 -2
- mindspore/train/mind_ir_pb2.py +89 -15
- mindspore/train/model.py +290 -60
- mindspore/train/serialization.py +495 -220
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +51 -28
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.2.14.dist-info → mindspore-2.3.0.dist-info}/METADATA +3 -3
- mindspore-2.3.0.dist-info/RECORD +1400 -0
- {mindspore-2.2.14.dist-info → mindspore-2.3.0.dist-info}/entry_points.txt +1 -0
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +0 -662
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +0 -377
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +0 -201
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +0 -515
- mindspore/gen_ops.py +0 -273
- mindspore/nn/layer/flash_attention.py +0 -189
- mindspore/ops/_op_impl/cpu/concat.py +0 -39
- mindspore/ops/_op_impl/cpu/tensor_shape.py +0 -42
- mindspore/ops/_op_impl/tbe/__init__.py +0 -47
- mindspore/ops/_op_impl/tbe/abs.py +0 -38
- mindspore/ops/_op_impl/tbe/abs_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/abs_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/abs_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/accumulate_n_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/accumulate_n_v2_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/acos.py +0 -37
- mindspore/ops/_op_impl/tbe/acos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acos_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acos_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/acosh.py +0 -37
- mindspore/ops/_op_impl/tbe/acosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acosh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acosh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_max_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_min_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/acts_ulq.py +0 -45
- mindspore/ops/_op_impl/tbe/acts_ulq_input_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/adam_apply_one.py +0 -50
- mindspore/ops/_op_impl/tbe/adam_apply_one_assign.py +0 -53
- mindspore/ops/_op_impl/tbe/adam_apply_one_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_assign.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/adaptive_max_pool2d.py +0 -37
- mindspore/ops/_op_impl/tbe/add.py +0 -42
- mindspore/ops/_op_impl/tbe/add_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/add_n.py +0 -39
- mindspore/ops/_op_impl/tbe/add_n_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/addcdiv.py +0 -41
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/addcmul.py +0 -43
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_ada_max.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_ada_max_ds.py +0 -69
- mindspore/ops/_op_impl/tbe/apply_adadelta.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_adadelta_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_adagrad_d_a.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_adam.py +0 -79
- mindspore/ops/_op_impl/tbe/apply_adam_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad.py +0 -60
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad_ds.py +0 -61
- mindspore/ops/_op_impl/tbe/apply_add_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_add_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop.py +0 -77
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop_ds.py +0 -78
- mindspore/ops/_op_impl/tbe/apply_ftrl.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_ftrl_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_gradient_descent.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_gradient_descent_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/apply_keras_momentum.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_momentum.py +0 -64
- mindspore/ops/_op_impl/tbe/apply_momentum_ds.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +0 -57
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py +0 -54
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_rms_prop.py +0 -52
- mindspore/ops/_op_impl/tbe/approximate_equal.py +0 -39
- mindspore/ops/_op_impl/tbe/approximate_equal_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_max.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/arg_min.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_v2_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_min_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/asin.py +0 -37
- mindspore/ops/_op_impl/tbe/asin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asin_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asin_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/asinh.py +0 -37
- mindspore/ops/_op_impl/tbe/asinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asinh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asinh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/assign.py +0 -79
- mindspore/ops/_op_impl/tbe/assign_add.py +0 -59
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +0 -60
- mindspore/ops/_op_impl/tbe/assign_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/assign_sub.py +0 -55
- mindspore/ops/_op_impl/tbe/assign_sub_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/atan.py +0 -37
- mindspore/ops/_op_impl/tbe/atan2.py +0 -38
- mindspore/ops/_op_impl/tbe/atan2_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/atan_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/atan_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/atan_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/atanh.py +0 -37
- mindspore/ops/_op_impl/tbe/atanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/avg_pool.py +0 -43
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +0 -45
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell.py +0 -57
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad.py +0 -50
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -51
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_input_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_weight_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul.py +0 -42
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul_v2.py +0 -47
- mindspore/ops/_op_impl/tbe/batch_to_space.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/batchnorm.py +0 -58
- mindspore/ops/_op_impl/tbe/batchnorm_grad.py +0 -58
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +0 -42
- mindspore/ops/_op_impl/tbe/bessel_i0e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i0e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bessel_i1e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i1e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +0 -53
- mindspore/ops/_op_impl/tbe/binary_cross_entropy.py +0 -39
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bitwise_and.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_and_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_or.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_or_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_xor.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_xor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_infer.py +0 -43
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_inference.py +0 -50
- mindspore/ops/_op_impl/tbe/bn_training_reduce.py +0 -38
- mindspore/ops/_op_impl/tbe/bn_training_reduce_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -52
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -53
- mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/bn_training_update_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/bn_training_update_v3.py +0 -51
- mindspore/ops/_op_impl/tbe/bounding_box_decode.py +0 -41
- mindspore/ops/_op_impl/tbe/bounding_box_decode_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/bounding_box_encode.py +0 -38
- mindspore/ops/_op_impl/tbe/broadcast_to.py +0 -40
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cast.py +0 -55
- mindspore/ops/_op_impl/tbe/cast_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/cdist.py +0 -38
- mindspore/ops/_op_impl/tbe/cdist_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/ceil.py +0 -37
- mindspore/ops/_op_impl/tbe/ceil_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/celu.py +0 -39
- mindspore/ops/_op_impl/tbe/centralization.py +0 -39
- mindspore/ops/_op_impl/tbe/check_valid.py +0 -38
- mindspore/ops/_op_impl/tbe/check_valid_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/clip_by_value.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_value_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/concat.py +0 -40
- mindspore/ops/_op_impl/tbe/concat_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/confusion_matrix.py +0 -63
- mindspore/ops/_op_impl/tbe/confusion_mul_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/confusion_softmax_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/confusion_transpose_d.py +0 -39
- mindspore/ops/_op_impl/tbe/conv2d.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/conv2d_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_transpose.py +0 -48
- mindspore/ops/_op_impl/tbe/conv3d.py +0 -45
- mindspore/ops/_op_impl/tbe/conv3d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_transpose.py +0 -47
- mindspore/ops/_op_impl/tbe/conv3d_transpose_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/cos.py +0 -37
- mindspore/ops/_op_impl/tbe/cos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/cosh.py +0 -37
- mindspore/ops/_op_impl/tbe/cosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -42
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/cum_sum.py +0 -42
- mindspore/ops/_op_impl/tbe/cum_sum_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cummin.py +0 -41
- mindspore/ops/_op_impl/tbe/cumprod.py +0 -42
- mindspore/ops/_op_impl/tbe/data_format_dim_map.py +0 -38
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +0 -45
- mindspore/ops/_op_impl/tbe/deformable_offsets_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/depth_to_space_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +0 -44
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_filter.py +0 -41
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_input.py +0 -41
- mindspore/ops/_op_impl/tbe/diag.py +0 -38
- mindspore/ops/_op_impl/tbe/diag_part.py +0 -38
- mindspore/ops/_op_impl/tbe/dilation.py +0 -40
- mindspore/ops/_op_impl/tbe/div.py +0 -41
- mindspore/ops/_op_impl/tbe/div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/div_no_nan.py +0 -41
- mindspore/ops/_op_impl/tbe/div_no_nan_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/dropout_do_mask.py +0 -38
- mindspore/ops/_op_impl/tbe/dropout_do_mask_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +0 -34
- mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +0 -95
- mindspore/ops/_op_impl/tbe/dynamic_rnn.py +0 -82
- mindspore/ops/_op_impl/tbe/elu.py +0 -38
- mindspore/ops/_op_impl/tbe/elu_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/elu_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/elu_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/equal.py +0 -42
- mindspore/ops/_op_impl/tbe/equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/erf.py +0 -37
- mindspore/ops/_op_impl/tbe/erf_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfc.py +0 -37
- mindspore/ops/_op_impl/tbe/erfc_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfinv.py +0 -36
- mindspore/ops/_op_impl/tbe/exp.py +0 -40
- mindspore/ops/_op_impl/tbe/exp_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/expand_dims.py +0 -38
- mindspore/ops/_op_impl/tbe/expm1.py +0 -37
- mindspore/ops/_op_impl/tbe/expm1_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/extract_image_patches.py +0 -41
- mindspore/ops/_op_impl/tbe/extract_volume_patches.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fast_gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/fast_gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/fast_gelu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/fill.py +0 -56
- mindspore/ops/_op_impl/tbe/fill_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/flatten.py +0 -48
- mindspore/ops/_op_impl/tbe/floor.py +0 -37
- mindspore/ops/_op_impl/tbe/floor_div.py +0 -41
- mindspore/ops/_op_impl/tbe/floor_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/floor_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/floor_mod.py +0 -39
- mindspore/ops/_op_impl/tbe/floor_mod_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/fused_dbn_dw.py +0 -52
- mindspore/ops/_op_impl/tbe/fused_mul_add.py +0 -38
- mindspore/ops/_op_impl/tbe/fused_mul_add_n.py +0 -48
- mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py +0 -53
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum.py +0 -57
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py +0 -67
- mindspore/ops/_op_impl/tbe/gather_nd.py +0 -52
- mindspore/ops/_op_impl/tbe/gather_nd_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/gather_v2_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/gelu_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/gelu_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/ger.py +0 -43
- mindspore/ops/_op_impl/tbe/ger_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/greater.py +0 -43
- mindspore/ops/_op_impl/tbe/greater_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/greater_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad_cell.py +0 -52
- mindspore/ops/_op_impl/tbe/hard_swish.py +0 -37
- mindspore/ops/_op_impl/tbe/hard_swish_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/hard_swish_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/hard_swish_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/histogram_fixed_width.py +0 -40
- mindspore/ops/_op_impl/tbe/hshrink.py +0 -33
- mindspore/ops/_op_impl/tbe/hshrink_grad.py +0 -37
- mindspore/ops/_op_impl/tbe/hsigmoid.py +0 -45
- mindspore/ops/_op_impl/tbe/hsigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/ifmr.py +0 -47
- mindspore/ops/_op_impl/tbe/ifmr_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/im2col.py +0 -42
- mindspore/ops/_op_impl/tbe/in_top_k.py +0 -37
- mindspore/ops/_op_impl/tbe/inplace_add.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +0 -46
- mindspore/ops/_op_impl/tbe/inplace_sub.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/inv.py +0 -38
- mindspore/ops/_op_impl/tbe/inv_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/inv_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/inv_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/invert.py +0 -37
- mindspore/ops/_op_impl/tbe/invert_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/iou.py +0 -38
- mindspore/ops/_op_impl/tbe/iou_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/is_close.py +0 -40
- mindspore/ops/_op_impl/tbe/kl_div_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/kl_div_loss_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/kl_div_loss_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/l2_loss.py +0 -36
- mindspore/ops/_op_impl/tbe/l2_loss_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/l2_normalize.py +0 -38
- mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/lamb_apply_optimizer_assign.py +0 -55
- mindspore/ops/_op_impl/tbe/lamb_apply_weight_assign.py +0 -42
- mindspore/ops/_op_impl/tbe/lamb_next_mv.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_mv_with_decay.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_right.py +0 -44
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr.py +0 -48
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr_v2.py +0 -44
- mindspore/ops/_op_impl/tbe/lars_update.py +0 -50
- mindspore/ops/_op_impl/tbe/lars_update_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/layer_norm.py +0 -46
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/layer_norm_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/layer_norm_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +0 -43
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/lerp.py +0 -38
- mindspore/ops/_op_impl/tbe/less.py +0 -41
- mindspore/ops/_op_impl/tbe/less_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/less_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/less_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/log.py +0 -40
- mindspore/ops/_op_impl/tbe/log1p.py +0 -37
- mindspore/ops/_op_impl/tbe/log1p_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/log_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/logical_and.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_and_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logical_not.py +0 -36
- mindspore/ops/_op_impl/tbe/logical_not_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax.py +0 -37
- mindspore/ops/_op_impl/tbe/logsoftmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/lp_norm.py +0 -40
- mindspore/ops/_op_impl/tbe/lp_norm_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/lstm_input_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/masked_fill.py +0 -40
- mindspore/ops/_op_impl/tbe/masked_fill_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/matmul.py +0 -53
- mindspore/ops/_op_impl/tbe/matmul_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/matmul_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/matrix_diag.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_diag_part.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_set_diag.py +0 -46
- mindspore/ops/_op_impl/tbe/max_pool.py +0 -39
- mindspore/ops/_op_impl/tbe/max_pool3d.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool3d_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool3d_grad_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/max_pool_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad_with_argmax.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_with_argmax.py +0 -42
- mindspore/ops/_op_impl/tbe/max_pool_with_argmax.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum.py +0 -39
- mindspore/ops/_op_impl/tbe/maximum_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/maximum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mem_set.py +0 -38
- mindspore/ops/_op_impl/tbe/minimum.py +0 -40
- mindspore/ops/_op_impl/tbe/minimum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/minimum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/minimum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mish.py +0 -37
- mindspore/ops/_op_impl/tbe/mod.py +0 -41
- mindspore/ops/_op_impl/tbe/mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/mul.py +0 -37
- mindspore/ops/_op_impl/tbe/mul_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/mul_no_nan.py +0 -39
- mindspore/ops/_op_impl/tbe/mul_no_nan_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/multilabel_margin_loss.py +0 -39
- mindspore/ops/_op_impl/tbe/neg.py +0 -39
- mindspore/ops/_op_impl/tbe/neg_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/new_im2col.py +0 -40
- mindspore/ops/_op_impl/tbe/nll_loss.py +0 -41
- mindspore/ops/_op_impl/tbe/nll_loss_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/nms_with_mask.py +0 -39
- mindspore/ops/_op_impl/tbe/not_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/not_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/npu_alloc_float_status.py +0 -34
- mindspore/ops/_op_impl/tbe/npu_clear_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/one_hot.py +0 -48
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/ones_like.py +0 -40
- mindspore/ops/_op_impl/tbe/ones_like_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling.py +0 -40
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/pack.py +0 -58
- mindspore/ops/_op_impl/tbe/pack_ds.py +0 -59
- mindspore/ops/_op_impl/tbe/pad_d.py +0 -40
- mindspore/ops/_op_impl/tbe/pad_d_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/parallel_concat.py +0 -70
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear.py +0 -45
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/pdist.py +0 -36
- mindspore/ops/_op_impl/tbe/pooling.py +0 -46
- mindspore/ops/_op_impl/tbe/population_count.py +0 -38
- mindspore/ops/_op_impl/tbe/pow.py +0 -41
- mindspore/ops/_op_impl/tbe/pow_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/prelu.py +0 -37
- mindspore/ops/_op_impl/tbe/prelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/prelu_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/range.py +0 -39
- mindspore/ops/_op_impl/tbe/real_div.py +0 -38
- mindspore/ops/_op_impl/tbe/real_div_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reciprocal.py +0 -36
- mindspore/ops/_op_impl/tbe/reciprocal_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/reciprocal_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/reciprocal_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_all.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_all_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_any.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_max.py +0 -43
- mindspore/ops/_op_impl/tbe/reduce_max_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_mean.py +0 -40
- mindspore/ops/_op_impl/tbe/reduce_mean_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_min.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_min_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_prod.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_prod_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_std.py +0 -44
- mindspore/ops/_op_impl/tbe/reduce_sum.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6.py +0 -38
- mindspore/ops/_op_impl/tbe/relu6_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/relu6_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/relu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/relu_grad_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/renorm.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_bilinear.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_bilinear_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/resize_bilinear_v2.py +0 -43
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reverse_v2_d.py +0 -37
- mindspore/ops/_op_impl/tbe/rint.py +0 -37
- mindspore/ops/_op_impl/tbe/rint_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/roi_align.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roi_align_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roll.py +0 -42
- mindspore/ops/_op_impl/tbe/round.py +0 -38
- mindspore/ops/_op_impl/tbe/round_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/rsqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/rsqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/rsqrt_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/rsqrt_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_add.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_div.py +0 -46
- mindspore/ops/_op_impl/tbe/scatter_max.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_min.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_mul.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_nd.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/scatter_nd_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_nd_update.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_update_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add.py +0 -39
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/scatter_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_update.py +0 -43
- mindspore/ops/_op_impl/tbe/select.py +0 -38
- mindspore/ops/_op_impl/tbe/select_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/selu.py +0 -39
- mindspore/ops/_op_impl/tbe/selu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sgd.py +0 -62
- mindspore/ops/_op_impl/tbe/sigmoid.py +0 -37
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/sigmoid_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/sigmoid_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sign.py +0 -38
- mindspore/ops/_op_impl/tbe/sign_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/sin.py +0 -37
- mindspore/ops/_op_impl/tbe/sin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sinh.py +0 -37
- mindspore/ops/_op_impl/tbe/sinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/slice.py +0 -58
- mindspore/ops/_op_impl/tbe/smooth_l1_loss.py +0 -45
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/soft_margin_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/soft_margin_loss_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/soft_shrink.py +0 -36
- mindspore/ops/_op_impl/tbe/soft_shrink_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax.py +0 -37
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/softmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +0 -42
- mindspore/ops/_op_impl/tbe/softmax_v2_with_dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/softplus.py +0 -37
- mindspore/ops/_op_impl/tbe/softplus_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softsign.py +0 -37
- mindspore/ops/_op_impl/tbe/softsign_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sort.py +0 -38
- mindspore/ops/_op_impl/tbe/sort_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/space_to_batch.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_depth.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_adadelta.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +0 -45
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d.py +0 -53
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +0 -66
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop.py +0 -57
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/sparse_gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_gather_v2_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/split_d.py +0 -38
- mindspore/ops/_op_impl/tbe/split_d_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/split_v.py +0 -39
- mindspore/ops/_op_impl/tbe/splitv.py +0 -39
- mindspore/ops/_op_impl/tbe/sqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/sqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sqrt_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/sqrt_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/square.py +0 -38
- mindspore/ops/_op_impl/tbe/square_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_all.py +0 -40
- mindspore/ops/_op_impl/tbe/square_sum_all_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/square_sum_v1.py +0 -38
- mindspore/ops/_op_impl/tbe/square_sum_v1_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_v2.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/squeeze.py +0 -37
- mindspore/ops/_op_impl/tbe/strided_read.py +0 -38
- mindspore/ops/_op_impl/tbe/strided_slice_d.py +0 -44
- mindspore/ops/_op_impl/tbe/strided_slice_ds.py +0 -71
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +0 -51
- mindspore/ops/_op_impl/tbe/strided_slice_grad_ds.py +0 -57
- mindspore/ops/_op_impl/tbe/strided_write.py +0 -38
- mindspore/ops/_op_impl/tbe/sub.py +0 -39
- mindspore/ops/_op_impl/tbe/sub_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tan.py +0 -38
- mindspore/ops/_op_impl/tbe/tan_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh.py +0 -37
- mindspore/ops/_op_impl/tbe/tanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/tanh_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tensor_move.py +0 -49
- mindspore/ops/_op_impl/tbe/tensor_move_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/tensor_scatter_update.py +0 -41
- mindspore/ops/_op_impl/tbe/tile.py +0 -37
- mindspore/ops/_op_impl/tbe/tile_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/trans_data.py +0 -167
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +0 -180
- mindspore/ops/_op_impl/tbe/trans_data_rnn.py +0 -44
- mindspore/ops/_op_impl/tbe/transpose.py +0 -60
- mindspore/ops/_op_impl/tbe/transpose_d.py +0 -47
- mindspore/ops/_op_impl/tbe/transpose_nod.py +0 -60
- mindspore/ops/_op_impl/tbe/trunc.py +0 -39
- mindspore/ops/_op_impl/tbe/truncate_div.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/truncate_mod.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/unpack.py +0 -38
- mindspore/ops/_op_impl/tbe/unpack_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/unsorted_segment_max.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_max_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_min.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_min_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/wts_arq.py +0 -40
- mindspore/ops/_op_impl/tbe/xdivy.py +0 -38
- mindspore/ops/_op_impl/tbe/xdivy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/xlogy.py +0 -38
- mindspore/ops/_op_impl/tbe/xlogy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/zeros_like.py +0 -41
- mindspore/ops/_op_impl/tbe/zeros_like_ds.py +0 -42
- mindspore/ops/_tracefunc.py +0 -241
- mindspore/ops/arg_dtype_cast.py +0 -54
- mindspore/rewrite/api/tree_node_helper.py +0 -60
- mindspore/rewrite/ast_helpers/ast_creator.py +0 -115
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +0 -267
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +0 -228
- mindspore/rewrite/namespace.py +0 -53
- mindspore-2.2.14.dist-info/RECORD +0 -1924
- {mindspore-2.2.14.dist-info → mindspore-2.3.0.dist-info}/WHEEL +0 -0
- {mindspore-2.2.14.dist-info → mindspore-2.3.0.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -14,15 +14,19 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Profiling api file."""
|
|
16
16
|
import os
|
|
17
|
+
import re
|
|
18
|
+
import shutil
|
|
17
19
|
import stat
|
|
18
20
|
import time
|
|
19
21
|
import json
|
|
22
|
+
from json import JSONDecodeError
|
|
20
23
|
import glob
|
|
21
24
|
import subprocess
|
|
22
25
|
import csv
|
|
23
26
|
import socket
|
|
24
|
-
import shutil
|
|
25
27
|
from enum import Enum
|
|
28
|
+
from multiprocessing import Process
|
|
29
|
+
from typing import List
|
|
26
30
|
import numpy as np
|
|
27
31
|
|
|
28
32
|
from mindspore import log as logger, context
|
|
@@ -30,14 +34,17 @@ from mindspore.context import get_auto_parallel_context
|
|
|
30
34
|
from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
|
|
31
35
|
import mindspore._c_expression as c_expression
|
|
32
36
|
import mindspore._c_dataengine as cde
|
|
37
|
+
from mindspore._c_expression import _framework_profiler_enable_mi
|
|
33
38
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
|
34
|
-
ProfilerIOException, ProfilerException, ProfilerRawFileException
|
|
39
|
+
ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
|
|
35
40
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
|
|
36
41
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
|
|
37
|
-
from mindspore.profiler.common.util import get_file_path
|
|
42
|
+
from mindspore.profiler.common.util import get_file_path, ProfilerPathManager
|
|
43
|
+
from mindspore.profiler.common.process_pool import MultiProcessPool
|
|
38
44
|
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
39
45
|
from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
|
|
40
46
|
from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
47
|
+
from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
|
|
41
48
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
42
49
|
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
43
50
|
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
@@ -50,14 +57,16 @@ from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
|
50
57
|
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
51
58
|
from mindspore.common.api import _pynative_executor
|
|
52
59
|
from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
|
|
53
|
-
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
60
|
+
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
54
61
|
from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
|
|
55
62
|
from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
|
|
56
63
|
from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
|
|
57
64
|
from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
|
|
58
65
|
from mindspore.profiler.parser.ascend_cluster_generator import AscendClusterGenerator
|
|
59
|
-
from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
66
|
+
from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
60
67
|
from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
|
|
68
|
+
from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
|
|
69
|
+
from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
|
|
61
70
|
|
|
62
71
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
63
72
|
|
|
@@ -68,10 +77,24 @@ AICORE_METRICS_DICT = {
|
|
|
68
77
|
3: "MemoryL0",
|
|
69
78
|
4: "ResourceConflictRatio",
|
|
70
79
|
5: "MemoryUB",
|
|
80
|
+
6: "L2Cache",
|
|
71
81
|
-1: "None"
|
|
72
82
|
}
|
|
73
83
|
|
|
74
84
|
|
|
85
|
+
class ModelTraingMode(Enum):
|
|
86
|
+
PYNATIVE = 0
|
|
87
|
+
GRAPH = 1
|
|
88
|
+
KERNEL_BY_KERNEL = 2
|
|
89
|
+
UNKNOWN = 3
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ProfilerLevel(Enum):
|
|
93
|
+
Level0 = "Level0"
|
|
94
|
+
Level1 = "Level1"
|
|
95
|
+
Level2 = "Level2"
|
|
96
|
+
|
|
97
|
+
|
|
75
98
|
class DeviceSupportParam(Enum):
|
|
76
99
|
"""The device target enum."""
|
|
77
100
|
CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
|
|
@@ -81,17 +104,22 @@ class DeviceSupportParam(Enum):
|
|
|
81
104
|
]
|
|
82
105
|
ASCEND = [
|
|
83
106
|
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
84
|
-
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', '
|
|
85
|
-
'profile_framework'
|
|
107
|
+
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
|
|
108
|
+
'ascend_job_id', 'profile_framework', 'host_stack', 'profiler_level', 'data_simplification'
|
|
86
109
|
]
|
|
87
110
|
|
|
88
111
|
|
|
89
112
|
ALWAYS_VALID_PARAM = [
|
|
90
113
|
'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
|
|
91
|
-
'ascend_job_id', 'op_time', 'profile_framework'
|
|
114
|
+
'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
|
|
92
115
|
]
|
|
93
116
|
|
|
94
117
|
|
|
118
|
+
ANALYSIS_ASYNC_MODE = 'async'
|
|
119
|
+
ANALYSIS_SYNC_MODE = 'sync'
|
|
120
|
+
DEFAULT_MODEL_ID = 4294967295
|
|
121
|
+
|
|
122
|
+
|
|
95
123
|
def _environment_check():
|
|
96
124
|
if c_expression.security.enable_security():
|
|
97
125
|
raise RuntimeError("Profiler is not supported when MindSpore is compiled with \'-s on\'.")
|
|
@@ -277,41 +305,36 @@ def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_de
|
|
|
277
305
|
logger.warning("No valid time_stamp is record in file: %s", input_file)
|
|
278
306
|
|
|
279
307
|
|
|
280
|
-
def _ascend_graph_msprof_generator(
|
|
308
|
+
def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
|
|
281
309
|
"""Executing the msprof export mode."""
|
|
282
310
|
try:
|
|
283
311
|
ProfilerInfo.set_export_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
284
|
-
msprof_exporter = AscendMsprofExporter(
|
|
312
|
+
msprof_exporter = AscendMsprofExporter(mindstudio_profiler_output)
|
|
285
313
|
flag = msprof_exporter.export(model_iteration_dict)
|
|
286
314
|
ProfilerInfo.set_export_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
287
315
|
return flag
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
logger.warning(err.message)
|
|
316
|
+
except (ProfilerException, TimeoutError, FileNotFoundError, RuntimeError) as err:
|
|
317
|
+
logger.warning(str(err))
|
|
291
318
|
return False
|
|
292
319
|
|
|
293
320
|
|
|
294
|
-
def _ascend_graph_msprof_analyse(
|
|
321
|
+
def _ascend_graph_msprof_analyse(mindstudio_profiler_output):
|
|
295
322
|
"""
|
|
296
323
|
Ascend graph model msprof data analyse.
|
|
297
324
|
|
|
298
325
|
Returns:
|
|
299
|
-
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace
|
|
326
|
+
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace, df_step_trace_model
|
|
300
327
|
"""
|
|
301
|
-
|
|
302
|
-
df_op_statistic = []
|
|
303
|
-
df_step_trace = []
|
|
328
|
+
res = ([], [], [], [])
|
|
304
329
|
try:
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
msprof_analyser = AscendMsprofDataGeneratorOld(os.path.join(source_path, 'summary'))
|
|
309
|
-
df_op_summary, df_op_statistic, df_step_trace = msprof_analyser.parse()
|
|
330
|
+
msprof_analyser = AscendMsprofDataGenerator(mindstudio_profiler_output)
|
|
331
|
+
res = msprof_analyser.parse()
|
|
332
|
+
return res
|
|
310
333
|
except ProfilerException as err:
|
|
311
334
|
logger.warning(err.message)
|
|
312
335
|
finally:
|
|
313
336
|
pass
|
|
314
|
-
return
|
|
337
|
+
return res
|
|
315
338
|
|
|
316
339
|
|
|
317
340
|
class Profiler:
|
|
@@ -320,15 +343,23 @@ class Profiler:
|
|
|
320
343
|
MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
|
|
321
344
|
and use Profiler.analyse() to stop profiling and analyse the results.
|
|
322
345
|
Users can visualize the results using the `MindSpore Insight
|
|
323
|
-
<https://www.mindspore.cn/mindinsight/docs/en/
|
|
346
|
+
<https://www.mindspore.cn/mindinsight/docs/en/master/index.html>`_ tool.
|
|
324
347
|
Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
|
|
325
348
|
correspondence, cluster, etc data analysis.
|
|
326
349
|
|
|
327
350
|
Args:
|
|
328
351
|
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
352
|
+
profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
|
|
353
|
+
|
|
354
|
+
- Profiler.Level0: Leanest level of profiling data collection, collects information about the elapsed
|
|
355
|
+
time of the computational operators on the NPU and communication large operator information.
|
|
356
|
+
- Profiler.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and communication
|
|
357
|
+
mini operator information based on Level0.
|
|
358
|
+
- Profiler.Level2: Collect GE and Runtime information in CANN layer on top of Level1
|
|
359
|
+
|
|
329
360
|
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
|
|
330
361
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
331
|
-
a multi devices training,collect when True. Setting this parameter has no effect during single
|
|
362
|
+
a multi devices training,collect when True. Setting this parameter has no effect during single card
|
|
332
363
|
training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
|
|
333
364
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
334
365
|
When using this parameter, `op_time` must be set to True. Default: ``False`` .
|
|
@@ -337,7 +368,7 @@ class Profiler:
|
|
|
337
368
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
338
369
|
data collection based on conditions. Default: ``True`` .
|
|
339
370
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
340
|
-
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5],
|
|
371
|
+
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5, 6],
|
|
341
372
|
Default: ``0`` , the data items contained in each metric are as follows:
|
|
342
373
|
|
|
343
374
|
- -1: Does not collect AICORE data.
|
|
@@ -348,9 +379,14 @@ class Profiler:
|
|
|
348
379
|
- 3: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
|
|
349
380
|
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
350
381
|
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
382
|
+
- 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
|
|
351
383
|
|
|
352
384
|
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
353
385
|
Default: ``False`` .
|
|
386
|
+
hbm_ddr (bool, optional): (Ascend only) Whether to collect HBM/DDR read and write rate data, collect when True.
|
|
387
|
+
Default: ``False`` .
|
|
388
|
+
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
389
|
+
Default: ``False`` .
|
|
354
390
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
355
391
|
Default: ``True`` .
|
|
356
392
|
|
|
@@ -372,6 +408,12 @@ class Profiler:
|
|
|
372
408
|
- "time": Only record host timestamp.
|
|
373
409
|
- "memory": Only record host memory usage.
|
|
374
410
|
- None: Not record host information.
|
|
411
|
+
data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
|
|
412
|
+
If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
|
|
413
|
+
directory are retained to save disk space.
|
|
414
|
+
Default value: ``True`` .
|
|
415
|
+
host_stack (bool, optional): (Ascend) Whether to collect frame host call stack data.
|
|
416
|
+
Default value: ``True`` .
|
|
375
417
|
|
|
376
418
|
Raises:
|
|
377
419
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
@@ -428,8 +470,13 @@ class Profiler:
|
|
|
428
470
|
_has_initialized = False
|
|
429
471
|
_ascend_profiling_options = ""
|
|
430
472
|
_ascend_job_id = ""
|
|
473
|
+
ENABLE_STATUS = "on"
|
|
474
|
+
DISABLE_STATUS = "off"
|
|
431
475
|
|
|
432
476
|
def __init__(self, **kwargs):
|
|
477
|
+
if os.getenv("PROFILING_MODE"):
|
|
478
|
+
raise RuntimeError("Profiling is already enabled by PROFILING_MODE env.")
|
|
479
|
+
|
|
433
480
|
self._dev_id = None
|
|
434
481
|
self._cpu_profiler = None
|
|
435
482
|
self._gpu_profiler = None
|
|
@@ -448,10 +495,13 @@ class Profiler:
|
|
|
448
495
|
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
449
496
|
self._parallel_strategy = True
|
|
450
497
|
self._model_iteration_dict = None
|
|
498
|
+
self._analyse_mode = ANALYSIS_SYNC_MODE
|
|
451
499
|
_environment_check()
|
|
452
500
|
# default aicore_metrics type is ArithmeticUtilization
|
|
453
501
|
self._aicore_metrics_id = 0
|
|
454
|
-
self._l2_cache =
|
|
502
|
+
self._l2_cache = self.DISABLE_STATUS
|
|
503
|
+
self._hbm_ddr = self.DISABLE_STATUS
|
|
504
|
+
self._pcie = self.DISABLE_STATUS
|
|
455
505
|
self._data_process = True
|
|
456
506
|
self._op_time = True
|
|
457
507
|
self._profile_communication = False
|
|
@@ -464,23 +514,45 @@ class Profiler:
|
|
|
464
514
|
self._dynamic_status = False
|
|
465
515
|
self._profile_framework = "all"
|
|
466
516
|
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
517
|
+
self.profiler_level = None
|
|
467
518
|
self._pretty_json = False
|
|
519
|
+
self._analyse_only = kwargs.get("analyse_only", False)
|
|
520
|
+
self._data_simplification = kwargs.get("data_simplification", True)
|
|
521
|
+
self._host_stack = True
|
|
468
522
|
if self._msprof_enable:
|
|
469
523
|
return
|
|
470
|
-
self._start_time = int(time.time() *
|
|
524
|
+
self._start_time = int(time.time() * 1e6) # us
|
|
525
|
+
self._monotonic_time = int(time.monotonic() * 1e6) # us
|
|
471
526
|
logger.info("Profiling: start time: %d", self._start_time)
|
|
472
527
|
if kwargs.get("env_enable"):
|
|
473
528
|
self._profiler_init(kwargs)
|
|
474
529
|
return
|
|
475
|
-
|
|
476
530
|
Profiler._has_initialized = True
|
|
477
531
|
# get device_id and device_target
|
|
478
|
-
self.
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
self.
|
|
532
|
+
if self._analyse_only:
|
|
533
|
+
self._device_target = DeviceTarget.ASCEND.value
|
|
534
|
+
self._rank_id = kwargs.get("rank_id", 0)
|
|
535
|
+
else:
|
|
536
|
+
self._get_devid_rankid_and_devtarget()
|
|
537
|
+
self._parser_kwargs(kwargs)
|
|
538
|
+
self._get_output_path(kwargs)
|
|
539
|
+
self._decide_device_target(kwargs)
|
|
540
|
+
if self.start_profile:
|
|
541
|
+
self.start()
|
|
542
|
+
|
|
543
|
+
@staticmethod
|
|
544
|
+
def _get_prof_rank(prof_path: str):
|
|
545
|
+
"""get rank id."""
|
|
546
|
+
sub_dirs = os.listdir(os.path.realpath(prof_path))
|
|
547
|
+
info_json_path = ""
|
|
548
|
+
for sub_dir in sub_dirs:
|
|
549
|
+
if sub_dir.startswith("device_"):
|
|
550
|
+
device_id = sub_dir.split("_")[-1]
|
|
551
|
+
info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
|
|
552
|
+
if not os.path.exists(info_json_path):
|
|
553
|
+
return -1
|
|
554
|
+
rank_id, _ = Profiler._parse_info_json(info_json_path)
|
|
555
|
+
return rank_id
|
|
484
556
|
|
|
485
557
|
@staticmethod
|
|
486
558
|
def _check_output_path(output_path):
|
|
@@ -496,9 +568,9 @@ class Profiler:
|
|
|
496
568
|
return output_path
|
|
497
569
|
|
|
498
570
|
@staticmethod
|
|
499
|
-
def
|
|
571
|
+
def _parse_job_start_time(prof_dir):
|
|
500
572
|
"""
|
|
501
|
-
|
|
573
|
+
Get the start time of the job.
|
|
502
574
|
|
|
503
575
|
Args:
|
|
504
576
|
input_file (str): The file path of the host start log file.
|
|
@@ -506,12 +578,29 @@ class Profiler:
|
|
|
506
578
|
Returns:
|
|
507
579
|
str, job start time.
|
|
508
580
|
"""
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
581
|
+
try:
|
|
582
|
+
AscendMsprofExporter.check_msprof_env()
|
|
583
|
+
script_path = AscendMsprofExporter.get_msprof_info_path()
|
|
584
|
+
if not script_path:
|
|
585
|
+
logger.warning("Can`t find get_msprof_info.py path, use single-export mode instead.")
|
|
586
|
+
return None
|
|
587
|
+
logger.info("get_msprof_info.py path is : %s", script_path)
|
|
588
|
+
host_dir = os.path.join(prof_dir, 'host')
|
|
589
|
+
cmd = ['python', script_path, '-dir', host_dir]
|
|
590
|
+
outs, _ = AscendMsprofExporter.run_cmd(cmd)
|
|
591
|
+
if not outs:
|
|
592
|
+
logger.warning('Can`t find the msprof info result')
|
|
593
|
+
return None
|
|
594
|
+
result = json.loads(outs)
|
|
595
|
+
if result.get('status', 1) == 1:
|
|
596
|
+
return None
|
|
597
|
+
jor_start_time = result.get('data', {}).get('collection_info', {}).get('Collection start time', None)
|
|
598
|
+
if jor_start_time is not None:
|
|
599
|
+
return float(jor_start_time.strip())
|
|
600
|
+
return None
|
|
601
|
+
except (RuntimeError, JSONDecodeError, AttributeError, TimeoutError, FileNotFoundError) as err:
|
|
602
|
+
logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
|
|
603
|
+
return None
|
|
515
604
|
|
|
516
605
|
@staticmethod
|
|
517
606
|
def _parse_info_json(info_file):
|
|
@@ -535,6 +624,47 @@ class Profiler:
|
|
|
535
624
|
|
|
536
625
|
return str(rank_id), str(dev_id)
|
|
537
626
|
|
|
627
|
+
@classmethod
|
|
628
|
+
def offline_analyse(cls, path: str, pretty=False, step_list=None):
|
|
629
|
+
"""
|
|
630
|
+
Analyze training performance data offline, which is invoked after performance data collection is completed.
|
|
631
|
+
|
|
632
|
+
Args:
|
|
633
|
+
path (str): The profiling data path which need to be analyzed offline.
|
|
634
|
+
There needs to be a profiler directory in this path.
|
|
635
|
+
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
636
|
+
step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
|
|
637
|
+
By default, all steps will be analyzed.
|
|
638
|
+
|
|
639
|
+
Examples:
|
|
640
|
+
>>> from mindspore import Profiler
|
|
641
|
+
>>> Profiler.offline_analyse("./profiling_path")
|
|
642
|
+
"""
|
|
643
|
+
profiler_path = os.path.join(path, "profiler")
|
|
644
|
+
if not os.path.exists(profiler_path):
|
|
645
|
+
raise ProfilerPathErrorException(f'There must be a profiler folder in the data path: {path}.')
|
|
646
|
+
|
|
647
|
+
rank_set = set()
|
|
648
|
+
sub_dirs = os.listdir(os.path.realpath(profiler_path))
|
|
649
|
+
for sub_dir in sub_dirs:
|
|
650
|
+
sub_path = os.path.join(profiler_path, sub_dir)
|
|
651
|
+
if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z0-9]+", sub_dir):
|
|
652
|
+
rank = cls._get_prof_rank(sub_path)
|
|
653
|
+
rank_set.add(rank)
|
|
654
|
+
if not rank_set:
|
|
655
|
+
return
|
|
656
|
+
|
|
657
|
+
process_list = []
|
|
658
|
+
for rank_id in rank_set:
|
|
659
|
+
profiler = cls(analyse_only=True, rank_id=rank_id)
|
|
660
|
+
process = Process(target=profiler.analyse,
|
|
661
|
+
args=(path, pretty, step_list))
|
|
662
|
+
process.start()
|
|
663
|
+
process_list.append(process)
|
|
664
|
+
|
|
665
|
+
for process in process_list:
|
|
666
|
+
process.join()
|
|
667
|
+
|
|
538
668
|
def op_analyse(self, op_name, device_id=None):
|
|
539
669
|
"""
|
|
540
670
|
Profiler users can use this interface to obtain operator performance data.
|
|
@@ -560,12 +690,12 @@ class Profiler:
|
|
|
560
690
|
>>> # Profiler init.
|
|
561
691
|
>>> profiler = Profiler()
|
|
562
692
|
>>> # Train Model or eval Model, taking LeNet5 as an example.
|
|
563
|
-
>>> # Refer to https://gitee.com/mindspore/docs/blob/
|
|
693
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
|
|
564
694
|
>>> net = LeNet5()
|
|
565
695
|
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
566
696
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
567
697
|
>>> # Create the dataset taking MNIST as an example.
|
|
568
|
-
>>> # Refer to https://gitee.com/mindspore/docs/blob/
|
|
698
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
|
|
569
699
|
>>> dataloader = create_dataset()
|
|
570
700
|
>>> model = Model(net, loss, optimizer)
|
|
571
701
|
>>> model.train(5, dataloader, dataset_sink_mode=False)
|
|
@@ -600,20 +730,49 @@ class Profiler:
|
|
|
600
730
|
return message
|
|
601
731
|
return op_info
|
|
602
732
|
|
|
603
|
-
def analyse(self, offline_path=None, pretty=False):
|
|
733
|
+
def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync"):
|
|
604
734
|
"""
|
|
605
735
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
606
736
|
|
|
607
737
|
Args:
|
|
608
|
-
offline_path (Union[str, None], optional): The data path which need to be
|
|
738
|
+
offline_path (Union[str, None], optional): The data path which need to be analyzed with offline mode.
|
|
609
739
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
610
740
|
for online mode. Default: ``None``.
|
|
611
741
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
742
|
+
step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
|
|
743
|
+
By default, all steps will be analyzed.
|
|
744
|
+
mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
|
|
745
|
+
|
|
746
|
+
- sync: analyse data in current process, it will block the current process.
|
|
747
|
+
- async: analyse data in subprocess, it will not the current process.Since the parsing process
|
|
748
|
+
will take up extra CPU resources, please enable this mode according to the actual resource situation.
|
|
615
749
|
|
|
616
|
-
|
|
750
|
+
"""
|
|
751
|
+
try:
|
|
752
|
+
if isinstance(pretty, bool):
|
|
753
|
+
self._pretty_json = pretty
|
|
754
|
+
if mode not in [ANALYSIS_SYNC_MODE, ANALYSIS_ASYNC_MODE]:
|
|
755
|
+
logger.warning("For analyse, the parameter mode must be one of ['sync', 'async'], "
|
|
756
|
+
"it will be set to 'sync'.")
|
|
757
|
+
mode = ANALYSIS_SYNC_MODE
|
|
758
|
+
model_iteration_dict = {}
|
|
759
|
+
if step_list is not None and not isinstance(step_list, list):
|
|
760
|
+
raise ProfilerParamTypeErrorException("Parameter step_list must be a list.")
|
|
761
|
+
if step_list:
|
|
762
|
+
if not all(isinstance(step_id, int) for step_id in step_list):
|
|
763
|
+
raise ProfilerParamTypeErrorException("The elements of the parameter step_list must be integers.")
|
|
764
|
+
step_list.sort()
|
|
765
|
+
if step_list[-1] - step_list[0] != len(step_list) - 1:
|
|
766
|
+
err_msg = "The elements of the parameter step_list must be continuous integers."
|
|
767
|
+
raise ProfilerParamTypeErrorException(err_msg)
|
|
768
|
+
model_iteration_dict[DEFAULT_MODEL_ID] = step_list
|
|
769
|
+
if offline_path is not None and not isinstance(offline_path, str):
|
|
770
|
+
raise ProfilerParamTypeErrorException("For analyse, the type of parameter offline_path must be str.")
|
|
771
|
+
self._analyse(offline_path=offline_path, model_iteration_dict=model_iteration_dict, mode=mode)
|
|
772
|
+
except (ProfilerException, RuntimeError, OSError, TypeError, NameError) as err:
|
|
773
|
+
logger.error("Profiler analyse failed: %s", str(err))
|
|
774
|
+
|
|
775
|
+
def _analyse(self, offline_path=None, model_iteration_dict=None, mode=ANALYSIS_SYNC_MODE):
|
|
617
776
|
"""
|
|
618
777
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
619
778
|
|
|
@@ -622,23 +781,22 @@ class Profiler:
|
|
|
622
781
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
623
782
|
for online mode. Default: ``None``.
|
|
624
783
|
model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
|
|
784
|
+
mode (str, optional): Analysis mode. Whether to analyse data in subprocess. Default: ``sync``.
|
|
785
|
+
By default, analyse data in current process.
|
|
625
786
|
"""
|
|
626
787
|
self._model_iteration_dict = model_iteration_dict
|
|
627
|
-
|
|
628
788
|
self._init_profiler_info()
|
|
629
789
|
self._is_support_step_info_collect()
|
|
790
|
+
self._analyse_mode = mode
|
|
630
791
|
parallel_mode = get_auto_parallel_context("parallel_mode")
|
|
631
792
|
stage_num = get_auto_parallel_context("pipeline_stages")
|
|
632
793
|
|
|
633
794
|
ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
|
|
634
|
-
ProfilerInfo.set_rank_size(self._rank_size)
|
|
635
|
-
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
636
795
|
if offline_path:
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
ProfilerInfo.save(self._output_path)
|
|
796
|
+
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
797
|
+
self._ascend_graph_analyse(offline_path=offline_path)
|
|
798
|
+
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
799
|
+
ProfilerInfo.save(self._output_path)
|
|
642
800
|
_offline_parse(offline_path)
|
|
643
801
|
return
|
|
644
802
|
if self._msprof_enable:
|
|
@@ -654,6 +812,8 @@ class Profiler:
|
|
|
654
812
|
cpu_op_file = glob.glob(os.path.join(self._output_path, 'cpu_op_type_info_*'))
|
|
655
813
|
if self._device_target and self._device_target != DeviceTarget.CPU.value and cpu_op_file:
|
|
656
814
|
self._is_heterogeneous = True
|
|
815
|
+
|
|
816
|
+
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
657
817
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
658
818
|
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
|
659
819
|
self._cpu_analyse()
|
|
@@ -715,13 +875,11 @@ class Profiler:
|
|
|
715
875
|
else:
|
|
716
876
|
raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
|
|
717
877
|
|
|
718
|
-
# No need to start anything if parse profiling data offline
|
|
719
|
-
if self._is_offline_parser():
|
|
720
|
-
return
|
|
721
|
-
|
|
722
878
|
self._cpu_profiler.step_profiling_enable(True)
|
|
723
879
|
if self._op_time:
|
|
724
880
|
self._cpu_profiler.enable_op_time()
|
|
881
|
+
if self._profile_memory:
|
|
882
|
+
self._cpu_profiler.enable_profile_memory()
|
|
725
883
|
|
|
726
884
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
727
885
|
if self._data_process:
|
|
@@ -736,6 +894,9 @@ class Profiler:
|
|
|
736
894
|
self._md_profiler.start()
|
|
737
895
|
self._ascend_graph_start()
|
|
738
896
|
ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
897
|
+
ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
|
|
898
|
+
ProfilerInfo.set_system_time(int(c_expression.get_clock_time() * 1e3)) # cast us to ns
|
|
899
|
+
_framework_profiler_enable_mi()
|
|
739
900
|
|
|
740
901
|
def stop(self):
|
|
741
902
|
"""
|
|
@@ -778,10 +939,6 @@ class Profiler:
|
|
|
778
939
|
raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
|
|
779
940
|
"before calling the stop() method.")
|
|
780
941
|
|
|
781
|
-
# No need to stop anything if parse profiling data offline
|
|
782
|
-
if self._is_offline_parser():
|
|
783
|
-
return
|
|
784
|
-
|
|
785
942
|
# Stop data collection after all operators are executed.
|
|
786
943
|
_pynative_executor.sync()
|
|
787
944
|
|
|
@@ -798,9 +955,21 @@ class Profiler:
|
|
|
798
955
|
self._stop_time = int(time.time() * 10000000)
|
|
799
956
|
ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
800
957
|
self._init_profiler_info()
|
|
958
|
+
ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
|
|
801
959
|
ProfilerInfo.save(self._output_path)
|
|
802
960
|
logger.info("Profiling: stop time: %d", self._stop_time)
|
|
803
961
|
|
|
962
|
+
def _set_ascend_job_id(self, ascend_job_id):
|
|
963
|
+
"""Set output_path for offline parsing performance data."""
|
|
964
|
+
if not ascend_job_id:
|
|
965
|
+
return
|
|
966
|
+
self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
|
|
967
|
+
if not os.path.exists(self._ascend_job_id):
|
|
968
|
+
msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
|
|
969
|
+
logger.critical(msg)
|
|
970
|
+
raise ValueError(msg)
|
|
971
|
+
self._output_path, _ = os.path.split(self._ascend_job_id)
|
|
972
|
+
|
|
804
973
|
def _profiler_init(self, kwargs):
|
|
805
974
|
"""Initialize variables when profiler is enabled by environment variables."""
|
|
806
975
|
options = kwargs.get("env_enable")
|
|
@@ -919,16 +1088,20 @@ class Profiler:
|
|
|
919
1088
|
"output": self._output_path,
|
|
920
1089
|
"fp_point": fp_point,
|
|
921
1090
|
"bp_point": bp_point,
|
|
922
|
-
"training_trace":
|
|
923
|
-
"task_trace":
|
|
1091
|
+
"training_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1092
|
+
"task_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
924
1093
|
"aic_metrics": AICORE_METRICS_DICT.get(self._aicore_metrics_id, "ArithmeticUtilization"),
|
|
925
|
-
"aicpu":
|
|
926
|
-
"profile_memory":
|
|
927
|
-
"hccl":
|
|
1094
|
+
"aicpu": self.ENABLE_STATUS if self._data_process or self._op_time else self.DISABLE_STATUS,
|
|
1095
|
+
"profile_memory": self.ENABLE_STATUS if self._op_time and self._profile_memory else self.DISABLE_STATUS,
|
|
1096
|
+
"hccl": self.ENABLE_STATUS if self._op_time and self._profile_communication else self.DISABLE_STATUS,
|
|
928
1097
|
"l2_cache": self._l2_cache,
|
|
929
|
-
"
|
|
930
|
-
"
|
|
931
|
-
"
|
|
1098
|
+
"hbm_ddr": self._hbm_ddr,
|
|
1099
|
+
"pcie": self._pcie,
|
|
1100
|
+
"parallel_strategy": self.ENABLE_STATUS if self._parallel_strategy else self.DISABLE_STATUS,
|
|
1101
|
+
"op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1102
|
+
"profile_framework": self._profile_framework,
|
|
1103
|
+
"profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
|
|
1104
|
+
"host_stack": "on" if self._host_stack else "off"
|
|
932
1105
|
}
|
|
933
1106
|
|
|
934
1107
|
return profiling_options
|
|
@@ -961,7 +1134,7 @@ class Profiler:
|
|
|
961
1134
|
self._profile_communication = False
|
|
962
1135
|
|
|
963
1136
|
if self._profile_communication:
|
|
964
|
-
hccl_option = {"output": self._output_path, "task_trace":
|
|
1137
|
+
hccl_option = {"output": self._output_path, "task_trace": self.ENABLE_STATUS}
|
|
965
1138
|
os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
|
|
966
1139
|
|
|
967
1140
|
self._profile_memory = kwargs.pop("profile_memory", False)
|
|
@@ -978,7 +1151,7 @@ class Profiler:
|
|
|
978
1151
|
|
|
979
1152
|
if self._aicore_metrics_id not in AICORE_METRICS_DICT:
|
|
980
1153
|
logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be in "
|
|
981
|
-
f"[-1, 0, 1, 2, 3, 4, 5], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
1154
|
+
f"[-1, 0, 1, 2, 3, 4, 5, 6], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
982
1155
|
self._aicore_metrics_id = 0
|
|
983
1156
|
|
|
984
1157
|
l2_cache_enable = kwargs.pop("l2_cache", False)
|
|
@@ -986,10 +1159,21 @@ class Profiler:
|
|
|
986
1159
|
logger.warning(f"For '{self.__class__.__name__}', the parameter l2_cache must be bool, "
|
|
987
1160
|
f"but got type {type(l2_cache_enable)}, it will be set to False.")
|
|
988
1161
|
l2_cache_enable = False
|
|
989
|
-
if l2_cache_enable
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1162
|
+
self._l2_cache = self.ENABLE_STATUS if l2_cache_enable else self.DISABLE_STATUS
|
|
1163
|
+
|
|
1164
|
+
hbm_ddr_enable = kwargs.pop("hbm_ddr", False)
|
|
1165
|
+
if not isinstance(hbm_ddr_enable, bool):
|
|
1166
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter hbm_ddr must be bool, "
|
|
1167
|
+
f"but got type {type(hbm_ddr_enable)}, it will be set to False.")
|
|
1168
|
+
hbm_ddr_enable = False
|
|
1169
|
+
self._hbm_ddr = self.ENABLE_STATUS if hbm_ddr_enable else self.DISABLE_STATUS
|
|
1170
|
+
|
|
1171
|
+
pcie_enable = kwargs.pop("pcie", False)
|
|
1172
|
+
if not isinstance(pcie_enable, bool):
|
|
1173
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter pcie must be bool, "
|
|
1174
|
+
f"but got type {type(pcie_enable)}, it will be set to False.")
|
|
1175
|
+
pcie_enable = False
|
|
1176
|
+
self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
|
|
993
1177
|
|
|
994
1178
|
self._parallel_strategy = kwargs.pop("parallel_strategy", True)
|
|
995
1179
|
if not isinstance(self._parallel_strategy, bool):
|
|
@@ -997,27 +1181,21 @@ class Profiler:
|
|
|
997
1181
|
f"but got type {type(self._parallel_strategy)}, it will be set to True.")
|
|
998
1182
|
self._parallel_strategy = True
|
|
999
1183
|
|
|
1000
|
-
|
|
1001
|
-
if
|
|
1002
|
-
logger.warning(f"For '{self.__class__.__name__}',
|
|
1003
|
-
f"
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
def _is_offline_parser(self):
|
|
1017
|
-
"""Return whether offline parser or online parser."""
|
|
1018
|
-
if self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
1019
|
-
return bool(self._ascend_job_id)
|
|
1020
|
-
return False
|
|
1184
|
+
self.profiler_level = kwargs.pop("profiler_level", None)
|
|
1185
|
+
if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
|
|
1186
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profiler_level must be one of "
|
|
1187
|
+
f"[ProfilerLevel.Level0, ProfilerLevel.Level1, ProfilerLevel.Level2], but got type "
|
|
1188
|
+
f"{type(self.profiler_level)}, it will be set to ProfilerLevel.Level0.")
|
|
1189
|
+
self.profiler_level = ProfilerLevel.Level0
|
|
1190
|
+
elif self.profiler_level == ProfilerLevel.Level0:
|
|
1191
|
+
self._data_process = False
|
|
1192
|
+
self._aicore_metrics_id = -1
|
|
1193
|
+
logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level0, data_process will be set "
|
|
1194
|
+
f"to False and aicore_metrics set to -1.")
|
|
1195
|
+
elif self.profiler_level == ProfilerLevel.Level1:
|
|
1196
|
+
self._data_process = False
|
|
1197
|
+
logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level1, data_process will be set "
|
|
1198
|
+
f"to False.")
|
|
1021
1199
|
|
|
1022
1200
|
def _ascend_analyse(self):
|
|
1023
1201
|
"""Collect and analyse ascend performance data."""
|
|
@@ -1029,23 +1207,21 @@ class Profiler:
|
|
|
1029
1207
|
self._rank_size = get_group_size()
|
|
1030
1208
|
else:
|
|
1031
1209
|
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
1210
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
1032
1211
|
|
|
1033
1212
|
if self._has_started:
|
|
1034
1213
|
self.stop()
|
|
1035
1214
|
else:
|
|
1036
1215
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1216
|
+
self._ascend_profiler.finalize()
|
|
1037
1217
|
# export op data before analyse
|
|
1038
1218
|
self._ascend_graph_analyse()
|
|
1039
1219
|
|
|
1040
|
-
def _minddata_analyse(self
|
|
1220
|
+
def _minddata_analyse(self):
|
|
1041
1221
|
"""Analyse mindadata for ascend graph model."""
|
|
1042
1222
|
if not self._data_process:
|
|
1043
1223
|
return
|
|
1044
1224
|
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1045
|
-
# Parsing minddata AICPU profiling
|
|
1046
|
-
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1047
|
-
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1048
|
-
MinddataParser.execute(source_path, self._output_path, store_id)
|
|
1049
1225
|
|
|
1050
1226
|
# parse minddata pipeline operator and queue
|
|
1051
1227
|
try:
|
|
@@ -1065,6 +1241,16 @@ class Profiler:
|
|
|
1065
1241
|
finally:
|
|
1066
1242
|
pass
|
|
1067
1243
|
|
|
1244
|
+
def _minddata_aicpu_analyse(self, source_path, job_id):
|
|
1245
|
+
"""Analyse minddata aicpu after ascend."""
|
|
1246
|
+
if not self._data_process:
|
|
1247
|
+
return
|
|
1248
|
+
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1249
|
+
# Parsing minddata AICPU profiling
|
|
1250
|
+
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1251
|
+
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1252
|
+
MinddataParser.execute(source_path, self._output_path, job_id, store_id)
|
|
1253
|
+
|
|
1068
1254
|
def _ascend_fpbp_analyse(self, op_summary, steptrace):
|
|
1069
1255
|
"""
|
|
1070
1256
|
Ascned graph model op analyse.
|
|
@@ -1088,7 +1274,7 @@ class Profiler:
|
|
|
1088
1274
|
pass
|
|
1089
1275
|
return points
|
|
1090
1276
|
|
|
1091
|
-
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status):
|
|
1277
|
+
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status, launch_ops: List):
|
|
1092
1278
|
"""
|
|
1093
1279
|
Ascend graph model hwts analyse.
|
|
1094
1280
|
|
|
@@ -1115,12 +1301,12 @@ class Profiler:
|
|
|
1115
1301
|
else:
|
|
1116
1302
|
output_timeline_data_path = None
|
|
1117
1303
|
|
|
1118
|
-
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status)
|
|
1304
|
+
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status, launch_ops)
|
|
1119
1305
|
op_analyser.parse()
|
|
1120
1306
|
op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
|
|
1121
1307
|
aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
|
|
1122
|
-
except ProfilerException as err:
|
|
1123
|
-
logger.warning(err
|
|
1308
|
+
except (ProfilerException, RuntimeError) as err:
|
|
1309
|
+
logger.warning(str(err))
|
|
1124
1310
|
finally:
|
|
1125
1311
|
pass
|
|
1126
1312
|
|
|
@@ -1142,19 +1328,22 @@ class Profiler:
|
|
|
1142
1328
|
finally:
|
|
1143
1329
|
pass
|
|
1144
1330
|
|
|
1145
|
-
def _ascend_timeline_analyse(self, op_summary, steptrace):
|
|
1331
|
+
def _ascend_timeline_analyse(self, op_summary, steptrace, source_path, mindstudio_profiler_output) -> List:
|
|
1146
1332
|
"""Analyse timeline info."""
|
|
1147
1333
|
try:
|
|
1148
1334
|
logger.info("Profiling: analyzing the timeline data")
|
|
1149
|
-
timeline_analyser = AscendTimelineGenerator(self._output_path,
|
|
1150
|
-
context.get_context('mode')
|
|
1151
|
-
|
|
1152
|
-
timeline_analyser.
|
|
1335
|
+
timeline_analyser = AscendTimelineGenerator(self._output_path, source_path, mindstudio_profiler_output,
|
|
1336
|
+
self._rank_id, self._rank_size, context.get_context('mode'),
|
|
1337
|
+
self._model_iteration_dict.get(DEFAULT_MODEL_ID))
|
|
1338
|
+
timeline_analyser.parse_cluster_data(op_summary, steptrace)
|
|
1339
|
+
timeline_analyser.parse_timeline_data(pretty=self._pretty_json)
|
|
1340
|
+
timeline_analyser.write_timeline_display()
|
|
1153
1341
|
timeline_analyser.write_timeline_summary()
|
|
1154
1342
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1155
1343
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1156
1344
|
finally:
|
|
1157
1345
|
pass
|
|
1346
|
+
return timeline_analyser.get_kernel_event_list()
|
|
1158
1347
|
|
|
1159
1348
|
def _ascend_dynamic_net_analyse(self, op_summary):
|
|
1160
1349
|
"""Analyse dynamic shape network info."""
|
|
@@ -1168,7 +1357,7 @@ class Profiler:
|
|
|
1168
1357
|
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1169
1358
|
dynamic_parser.write_dynamic_shape_data(op_summary)
|
|
1170
1359
|
|
|
1171
|
-
def _ascend_flops_analyse(self, op_summary):
|
|
1360
|
+
def _ascend_flops_analyse(self, op_summary, launch_ops):
|
|
1172
1361
|
"""Get op FLOPs from op_summary, write output_op_flops_x.csv."""
|
|
1173
1362
|
if 'vector_fops' not in op_summary.dtype.names and 'cube_fops' not in op_summary.dtype.names:
|
|
1174
1363
|
logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
|
|
@@ -1183,12 +1372,12 @@ class Profiler:
|
|
|
1183
1372
|
flops_path = validate_and_normalize_path(flops_path)
|
|
1184
1373
|
flops_summary_path = validate_and_normalize_path(flops_summary_path)
|
|
1185
1374
|
|
|
1186
|
-
flops_analyser = AscendFlopsGenerator(op_summary, pretty=self._pretty_json)
|
|
1375
|
+
flops_analyser = AscendFlopsGenerator(op_summary, launch_ops, pretty=self._pretty_json)
|
|
1187
1376
|
flops_analyser.parse()
|
|
1188
1377
|
flops_analyser.write(flops_path, flops_summary_path)
|
|
1189
1378
|
|
|
1190
|
-
except ProfilerException as err:
|
|
1191
|
-
logger.warning(err
|
|
1379
|
+
except (ProfilerException, RuntimeError) as err:
|
|
1380
|
+
logger.warning(str(err))
|
|
1192
1381
|
finally:
|
|
1193
1382
|
pass
|
|
1194
1383
|
|
|
@@ -1209,30 +1398,43 @@ class Profiler:
|
|
|
1209
1398
|
|
|
1210
1399
|
def _ascend_ms_analyze(self, source_path):
|
|
1211
1400
|
"""Ascend ms generate"""
|
|
1212
|
-
|
|
1401
|
+
|
|
1402
|
+
timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
|
|
1213
1403
|
if self._rank_id:
|
|
1214
|
-
ascend_ms_path = f"rank-{self._rank_id}_{
|
|
1404
|
+
ascend_ms_path = f"rank-{self._rank_id}_{timestamp}_ascend_ms"
|
|
1215
1405
|
else:
|
|
1216
|
-
ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{
|
|
1217
|
-
|
|
1218
|
-
if not os.path.exists(self._ascend_ms_path):
|
|
1219
|
-
os.makedirs(self._ascend_ms_path, exist_ok=True)
|
|
1220
|
-
os.chmod(self._ascend_ms_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1406
|
+
ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{timestamp}_ascend_ms"
|
|
1407
|
+
ascend_ms_path = os.path.join(self._output_path, ascend_ms_path)
|
|
1221
1408
|
|
|
1222
1409
|
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1223
|
-
ascend_profiler_output_path = os.path.join(
|
|
1410
|
+
ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
|
|
1224
1411
|
os.makedirs(ascend_profiler_output_path, exist_ok=True)
|
|
1225
1412
|
|
|
1226
1413
|
source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
|
|
1227
|
-
target_profiler_info_path = os.path.join(
|
|
1414
|
+
target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
|
|
1228
1415
|
shutil.copy(source_profiler_info_path, target_profiler_info_path)
|
|
1229
1416
|
|
|
1230
1417
|
source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
|
|
1231
1418
|
target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
|
|
1232
1419
|
shutil.copy(source_timeline_path, target_timeline_path)
|
|
1233
1420
|
|
|
1421
|
+
src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
|
|
1422
|
+
if os.path.exists(src_op_mem_file):
|
|
1423
|
+
dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
|
|
1424
|
+
shutil.copy(src_op_mem_file, dst_op_mem_file)
|
|
1425
|
+
|
|
1426
|
+
ms_output_path = os.path.abspath(
|
|
1427
|
+
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1428
|
+
static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
|
|
1429
|
+
src_static_op_mem_path = glob.glob(static_op_mem_path)
|
|
1430
|
+
if src_static_op_mem_path:
|
|
1431
|
+
dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
|
|
1432
|
+
shutil.copy(src_static_op_mem_path[0], dst_static_op_mem_file)
|
|
1433
|
+
|
|
1234
1434
|
self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
|
|
1235
1435
|
self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
|
|
1436
|
+
AscendIntegrateGenerator(source_path, ascend_profiler_output_path).parse()
|
|
1437
|
+
AscendMemoryGenerator(self._output_path, self._rank_id, source_path, ascend_profiler_output_path).parse()
|
|
1236
1438
|
|
|
1237
1439
|
def _ascend_graph_cluster_analyse(self, source_path, ascend_profiler_output_path):
|
|
1238
1440
|
"""Analyse step trace time info"""
|
|
@@ -1243,7 +1445,7 @@ class Profiler:
|
|
|
1243
1445
|
step_trace_time_path = os.path.join(ascend_profiler_output_path, f'step_trace_time.csv')
|
|
1244
1446
|
step_trace_time_path = validate_and_normalize_path(step_trace_time_path)
|
|
1245
1447
|
|
|
1246
|
-
cluster_analyse = AscendClusterGenerator(
|
|
1448
|
+
cluster_analyse = AscendClusterGenerator(source_path)
|
|
1247
1449
|
cluster_analyse.parse()
|
|
1248
1450
|
cluster_analyse.write(step_trace_time_path)
|
|
1249
1451
|
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
@@ -1262,10 +1464,11 @@ class Profiler:
|
|
|
1262
1464
|
communication_file_path = os.path.join(ascend_profiler_output_path, f'communication.json')
|
|
1263
1465
|
communication_file_path = validate_and_normalize_path(communication_file_path)
|
|
1264
1466
|
|
|
1265
|
-
communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
|
|
1467
|
+
communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
|
|
1468
|
+
f"communication_matrix.json")
|
|
1266
1469
|
communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
|
|
1267
1470
|
|
|
1268
|
-
analyze_path = os.path.
|
|
1471
|
+
analyze_path = os.path.abspath(os.path.join(source_path, os.path.pardir, 'analyze'))
|
|
1269
1472
|
communicate_analyser = AscendCommunicationGenerator(analyze_path)
|
|
1270
1473
|
communicate_analyser.parse()
|
|
1271
1474
|
communicate_analyser.write(communication_file_path, communication_matrix_file_path)
|
|
@@ -1274,7 +1477,7 @@ class Profiler:
|
|
|
1274
1477
|
finally:
|
|
1275
1478
|
pass
|
|
1276
1479
|
|
|
1277
|
-
def _ascend_graph_hccl_analyse(self,
|
|
1480
|
+
def _ascend_graph_hccl_analyse(self, mindstudio_profiler_output, steptrace):
|
|
1278
1481
|
"""Analyse hccl profiler info."""
|
|
1279
1482
|
if not self._profile_communication:
|
|
1280
1483
|
return
|
|
@@ -1288,10 +1491,7 @@ class Profiler:
|
|
|
1288
1491
|
|
|
1289
1492
|
hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
|
|
1290
1493
|
hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
|
|
1291
|
-
|
|
1292
|
-
hccl_analyse = AscendHCCLGenerator(os.path.join(source_path, 'timeline'), steptrace)
|
|
1293
|
-
else:
|
|
1294
|
-
hccl_analyse = AscendHCCLGeneratorOld(os.path.join(source_path, 'timeline'))
|
|
1494
|
+
hccl_analyse = AscendHCCLGenerator(mindstudio_profiler_output, steptrace)
|
|
1295
1495
|
hccl_analyse.parse()
|
|
1296
1496
|
hccl_analyse.write(hccl_raw_path)
|
|
1297
1497
|
|
|
@@ -1320,42 +1520,85 @@ class Profiler:
|
|
|
1320
1520
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
1321
1521
|
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
1322
1522
|
|
|
1323
|
-
def
|
|
1324
|
-
"""
|
|
1325
|
-
|
|
1523
|
+
def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
|
|
1524
|
+
"""Get the mapping between framework operator and device kernel."""
|
|
1525
|
+
if not kernels:
|
|
1526
|
+
return []
|
|
1527
|
+
kernel_map = {}
|
|
1528
|
+
for kernel in kernels:
|
|
1529
|
+
key = kernel.name if kernel.name.startswith('hcom_') else (kernel.name, str(kernel.ts))
|
|
1530
|
+
kernel_map[key] = kernel.parent
|
|
1531
|
+
launch_ops = [None] * len(op_summary)
|
|
1532
|
+
for index, summary in enumerate(op_summary):
|
|
1533
|
+
ts = str(summary['Task Start Time(us)']).strip("\t")
|
|
1534
|
+
name = summary['Op Name']
|
|
1535
|
+
key = name if name.startswith("hcom_") else (name, ts)
|
|
1536
|
+
launch_op = kernel_map.get(key)
|
|
1537
|
+
if not launch_op:
|
|
1538
|
+
if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
|
|
1539
|
+
logger.warning(f"Failed to get launch operator for {name}!")
|
|
1540
|
+
continue
|
|
1541
|
+
launch_ops[index] = launch_op.name
|
|
1542
|
+
return launch_ops
|
|
1326
1543
|
|
|
1327
|
-
|
|
1544
|
+
def _ascend_graph_analyse(self, offline_path=None):
|
|
1545
|
+
if offline_path or self._analyse_mode == ANALYSIS_SYNC_MODE:
|
|
1546
|
+
self._ascend_graph_analyse_inner(offline_path)
|
|
1547
|
+
else:
|
|
1548
|
+
MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
|
|
1549
|
+
|
|
1550
|
+
def _ascend_graph_analyse_inner(self, offline_path=None):
|
|
1551
|
+
"""Ascend graph mode analyse."""
|
|
1552
|
+
job_id = self._get_profiling_job_id(offline_path)
|
|
1328
1553
|
if not job_id:
|
|
1329
1554
|
return
|
|
1330
1555
|
logger.info("Profiling: job id is %s ", job_id)
|
|
1331
1556
|
|
|
1332
1557
|
self._check_output_path(output_path=self._output_path)
|
|
1333
1558
|
source_path = os.path.join(self._output_path, job_id)
|
|
1334
|
-
self._minddata_analyse(
|
|
1559
|
+
self._minddata_analyse()
|
|
1335
1560
|
if self._op_time:
|
|
1336
|
-
|
|
1561
|
+
mindstudio_profiler_output = os.path.abspath(
|
|
1562
|
+
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1563
|
+
flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
|
|
1337
1564
|
if not flag:
|
|
1338
1565
|
logger.warning('Current driver package not support all export mode, use single export mode, '
|
|
1339
1566
|
'this may lead to performance degradation. Suggest upgrading the driver package.')
|
|
1340
1567
|
ProfilerInfo.set_export_flag(flag)
|
|
1341
|
-
op_summary, op_statistic, steptrace
|
|
1342
|
-
|
|
1343
|
-
|
|
1568
|
+
op_summary, op_statistic, steptrace, steptrace_model \
|
|
1569
|
+
= _ascend_graph_msprof_analyse(mindstudio_profiler_output)
|
|
1570
|
+
if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
|
|
1571
|
+
not isinstance(op_statistic, np.ndarray) and not op_statistic:
|
|
1572
|
+
return
|
|
1573
|
+
kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
|
|
1574
|
+
launch_ops = self._get_kernel_op_map(op_summary, kernels)
|
|
1575
|
+
self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
|
|
1344
1576
|
graph_ids = np.unique(op_summary['Model ID']).tolist()
|
|
1345
1577
|
points = self._ascend_fpbp_analyse(op_summary, steptrace)
|
|
1346
1578
|
if len(graph_ids) == 1:
|
|
1347
1579
|
self._ascend_step_trace_analyse(steptrace)
|
|
1580
|
+
else:
|
|
1581
|
+
self._ascend_step_trace_analyse(steptrace_model)
|
|
1348
1582
|
if self._dynamic_status:
|
|
1349
1583
|
self._ascend_dynamic_net_analyse(op_summary)
|
|
1350
|
-
self._ascend_flops_analyse(op_summary)
|
|
1584
|
+
self._ascend_flops_analyse(op_summary, launch_ops)
|
|
1351
1585
|
self._ascend_graph_memory_analyse(points)
|
|
1352
|
-
self._ascend_ms_analyze(
|
|
1353
|
-
self._ascend_graph_hccl_analyse(
|
|
1586
|
+
self._ascend_ms_analyze(mindstudio_profiler_output)
|
|
1587
|
+
self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
|
|
1354
1588
|
self._ascend_graph_msadvisor_analyse(job_id)
|
|
1589
|
+
self._minddata_aicpu_analyse(self._output_path, job_id)
|
|
1355
1590
|
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1591
|
+
try:
|
|
1592
|
+
ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
|
|
1593
|
+
except RuntimeError as err:
|
|
1594
|
+
logger.error('Profilier simplify data failed, %s', str(err))
|
|
1356
1595
|
|
|
1357
1596
|
def _ascend_graph_start(self):
|
|
1358
1597
|
"""Ascend graph mode start profiling."""
|
|
1598
|
+
op_range_file = os.path.join(self._framework_path, "op_range_" + str(self._rank_id))
|
|
1599
|
+
if os.path.exists(op_range_file):
|
|
1600
|
+
os.remove(op_range_file)
|
|
1601
|
+
logger.info("Clear old op range filer.")
|
|
1359
1602
|
self._ascend_profiler.start()
|
|
1360
1603
|
|
|
1361
1604
|
def _gpu_analyse(self):
|
|
@@ -1370,12 +1613,14 @@ class Profiler:
|
|
|
1370
1613
|
else:
|
|
1371
1614
|
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
1372
1615
|
|
|
1616
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
1617
|
+
|
|
1373
1618
|
if self._has_started:
|
|
1374
1619
|
self.stop()
|
|
1375
1620
|
else:
|
|
1376
1621
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1377
1622
|
|
|
1378
|
-
self._minddata_analyse(
|
|
1623
|
+
self._minddata_analyse()
|
|
1379
1624
|
|
|
1380
1625
|
try:
|
|
1381
1626
|
self._analyse_step_relation_info()
|
|
@@ -1438,7 +1683,8 @@ class Profiler:
|
|
|
1438
1683
|
if self._has_started:
|
|
1439
1684
|
self.stop()
|
|
1440
1685
|
else:
|
|
1441
|
-
logger.info("No need to stop profiler because profiler has been stopped
|
|
1686
|
+
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1687
|
+
|
|
1442
1688
|
if not self._op_time:
|
|
1443
1689
|
return
|
|
1444
1690
|
try:
|
|
@@ -1537,23 +1783,15 @@ class Profiler:
|
|
|
1537
1783
|
memory_parser.init_memory_usage_info(aicore_detail_data, points)
|
|
1538
1784
|
memory_parser.write_memory_files()
|
|
1539
1785
|
|
|
1540
|
-
def _get_profiling_job_id(self):
|
|
1786
|
+
def _get_profiling_job_id(self, offline_path):
|
|
1541
1787
|
"""Get profiling job id, which was generated by ada service.
|
|
1542
1788
|
|
|
1543
1789
|
Returns:
|
|
1544
1790
|
str, profiling job id.
|
|
1545
1791
|
"""
|
|
1546
1792
|
|
|
1547
|
-
if
|
|
1548
|
-
|
|
1549
|
-
job_id = self._ascend_job_id.rstrip('/').split('/')[-1]
|
|
1550
|
-
if job_id.startswith('PROF'):
|
|
1551
|
-
device_dir = [dir for dir in os.listdir(self._ascend_job_id) if dir.startswith('device')]
|
|
1552
|
-
info_file_path = get_file_path(os.path.join(self._ascend_job_id, device_dir[0]), "info.json")
|
|
1553
|
-
training_rank_id, _ = self._parse_info_json(info_file_path)
|
|
1554
|
-
self._rank_id = int(training_rank_id)
|
|
1555
|
-
return os.path.join(job_id, device_dir[0])
|
|
1556
|
-
return job_id
|
|
1793
|
+
if offline_path:
|
|
1794
|
+
self._output_path = os.path.join(offline_path, 'profiler')
|
|
1557
1795
|
|
|
1558
1796
|
job_id = ""
|
|
1559
1797
|
job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
|
|
@@ -1562,16 +1800,12 @@ class Profiler:
|
|
|
1562
1800
|
job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
|
|
1563
1801
|
|
|
1564
1802
|
for dir_name in sorted_job_dirs:
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
|
|
1570
|
-
else:
|
|
1571
|
-
job_dir = os.path.join(self._output_path, dir_name)
|
|
1803
|
+
prof_dir = os.path.join(self._output_path, dir_name)
|
|
1804
|
+
device_dir = [dir for dir in os.listdir(prof_dir) \
|
|
1805
|
+
if dir.startswith('device') and os.path.isdir(os.path.join(prof_dir, dir))]
|
|
1806
|
+
job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
|
|
1572
1807
|
|
|
1573
|
-
|
|
1574
|
-
if start_file_path is None:
|
|
1808
|
+
if get_file_path(job_dir, "start_info") is None:
|
|
1575
1809
|
logger.warning("Find profiling job path %s, but host_start.log not exist, "
|
|
1576
1810
|
"profiler will ignore this job dir.", job_dir)
|
|
1577
1811
|
continue
|
|
@@ -1582,25 +1816,27 @@ class Profiler:
|
|
|
1582
1816
|
"profiler will ignore this job dir.", job_dir)
|
|
1583
1817
|
continue
|
|
1584
1818
|
|
|
1585
|
-
|
|
1586
|
-
|
|
1819
|
+
prof_rank_id, prof_device_id = self._parse_info_json(info_file_path)
|
|
1820
|
+
job_start_time = self._parse_job_start_time(prof_dir)
|
|
1587
1821
|
|
|
1588
|
-
if
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1822
|
+
if offline_path:
|
|
1823
|
+
if self._rank_id != prof_rank_id:
|
|
1824
|
+
continue
|
|
1825
|
+
self._start_time = int(job_start_time)
|
|
1826
|
+
else:
|
|
1827
|
+
if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
|
|
1828
|
+
logger.debug("Find profiling find job path %s, but not current training device id. "
|
|
1829
|
+
"Current training rank id %s, but job path rank id: %s, "
|
|
1830
|
+
"profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
|
|
1831
|
+
continue
|
|
1593
1832
|
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1833
|
+
if job_start_time < self._start_time:
|
|
1834
|
+
logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
|
|
1835
|
+
"start_time(%d), profiler will ignore this job dir.",
|
|
1836
|
+
job_dir, job_start_time, self._start_time)
|
|
1837
|
+
continue
|
|
1599
1838
|
|
|
1600
|
-
|
|
1601
|
-
job_id = os.path.join(dir_name, device_dir[0])
|
|
1602
|
-
else:
|
|
1603
|
-
job_id = dir_name
|
|
1839
|
+
job_id = os.path.join(dir_name, device_dir[0])
|
|
1604
1840
|
break
|
|
1605
1841
|
|
|
1606
1842
|
if not job_id:
|
|
@@ -1709,6 +1945,10 @@ class Profiler:
|
|
|
1709
1945
|
else:
|
|
1710
1946
|
logger.warning("The target dir already exists. "
|
|
1711
1947
|
"There may be some old profiling data, and they will be rewritten in the end.")
|
|
1948
|
+
self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
|
|
1949
|
+
if not os.path.exists(self._framework_path):
|
|
1950
|
+
os.makedirs(self._framework_path, exist_ok=True)
|
|
1951
|
+
os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1712
1952
|
|
|
1713
1953
|
def _parser_kwargs(self, kwargs):
|
|
1714
1954
|
"""Parse kwargs vale."""
|
|
@@ -1750,6 +1990,21 @@ class Profiler:
|
|
|
1750
1990
|
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
|
|
1751
1991
|
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
|
|
1752
1992
|
self._profile_framework = "all"
|
|
1993
|
+
if not isinstance(self._data_simplification, bool):
|
|
1994
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
1995
|
+
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
1996
|
+
self._data_simplification = True
|
|
1997
|
+
|
|
1998
|
+
if not isinstance(self._data_simplification, bool):
|
|
1999
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
2000
|
+
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
2001
|
+
self._data_simplification = True
|
|
2002
|
+
|
|
2003
|
+
self._host_stack = kwargs.pop("host_stack", True)
|
|
2004
|
+
if not isinstance(self._host_stack, bool):
|
|
2005
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter host_stack must be bool, but got "
|
|
2006
|
+
f"type {type(self._host_stack)}, it will be set to True.")
|
|
2007
|
+
self._host_stack = True
|
|
1753
2008
|
|
|
1754
2009
|
def _host_info_analyse(self):
|
|
1755
2010
|
"""
|
|
@@ -1759,7 +2014,7 @@ class Profiler:
|
|
|
1759
2014
|
host_dir = os.path.join(self._output_path, 'host_info')
|
|
1760
2015
|
host_dir = validate_and_normalize_path(host_dir)
|
|
1761
2016
|
if not os.path.exists(host_dir):
|
|
1762
|
-
logger.
|
|
2017
|
+
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
1763
2018
|
return
|
|
1764
2019
|
csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
|
|
1765
2020
|
json_file_name = 'timeline_' + str(self._rank_id) + '.json'
|
|
@@ -1780,7 +2035,7 @@ def _offline_parse(offline_path):
|
|
|
1780
2035
|
host_dir = os.path.join(offline_path, 'profiler', 'host_info')
|
|
1781
2036
|
host_dir = validate_and_normalize_path(host_dir)
|
|
1782
2037
|
if not os.path.exists(host_dir):
|
|
1783
|
-
logger.
|
|
2038
|
+
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
1784
2039
|
return
|
|
1785
2040
|
files = os.listdir(host_dir)
|
|
1786
2041
|
for file in files:
|