mindspore 2.2.11__cp39-cp39-win_amd64.whl → 2.3.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +7 -5
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +76 -18
- mindspore/_extends/builtin_operations.py +2 -1
- mindspore/_extends/graph_kernel/model/graph_parallel.py +16 -6
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +3 -16
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +16 -4
- mindspore/_extends/parallel_compile/akg_compiler/compiler.py +1 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +2 -1
- mindspore/_extends/parallel_compile/akg_compiler/util.py +5 -2
- mindspore/_extends/parse/__init__.py +18 -14
- mindspore/_extends/parse/compile_config.py +258 -0
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +174 -62
- mindspore/_extends/parse/resources.py +45 -14
- mindspore/_extends/parse/standard_method.py +142 -240
- mindspore/{ops/_op_impl/tbe/atomic_addr_clean.py → _extends/pijit/__init__.py} +6 -16
- mindspore/_extends/pijit/pijit_func_white_list.py +343 -0
- mindspore/_extends/remote/kernel_build_server.py +2 -0
- mindspore/_profiler.py +30 -0
- mindspore/amp.py +51 -24
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +2 -2
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/boost/less_batch_normalization.py +9 -6
- mindspore/common/__init__.py +15 -4
- mindspore/common/_jit_fallback_utils.py +2 -3
- mindspore/common/_register_for_adapter.py +7 -0
- mindspore/common/_register_for_recompute.py +48 -0
- mindspore/common/_register_for_tensor.py +8 -9
- mindspore/common/_stub_tensor.py +7 -1
- mindspore/common/_utils.py +5 -17
- mindspore/common/api.py +411 -106
- mindspore/common/auto_dynamic_shape.py +27 -14
- mindspore/common/dtype.py +17 -10
- mindspore/common/dump.py +6 -8
- mindspore/common/file_system.py +48 -0
- mindspore/common/generator.py +260 -0
- mindspore/common/hook_handle.py +51 -4
- mindspore/common/initializer.py +1 -1
- mindspore/common/jit_config.py +34 -14
- mindspore/common/lazy_inline.py +72 -19
- mindspore/common/mindir_util.py +12 -2
- mindspore/common/mutable.py +79 -14
- mindspore/common/no_inline.py +54 -0
- mindspore/common/np_dtype.py +25 -0
- mindspore/common/parameter.py +30 -11
- mindspore/common/recompute.py +262 -0
- mindspore/common/seed.py +9 -9
- mindspore/common/sparse_tensor.py +272 -24
- mindspore/common/symbol.py +122 -0
- mindspore/common/tensor.py +468 -496
- mindspore/communication/__init__.py +6 -11
- mindspore/communication/_comm_helper.py +5 -0
- mindspore/communication/comm_func.py +1140 -0
- mindspore/communication/management.py +118 -102
- mindspore/config/op_info.config +22 -54
- mindspore/context.py +378 -65
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +6 -6
- mindspore/dataset/audio/transforms.py +711 -158
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/engine/cache_client.py +2 -2
- mindspore/dataset/engine/datasets.py +163 -83
- mindspore/dataset/engine/datasets_audio.py +14 -14
- mindspore/dataset/engine/datasets_standard_format.py +33 -3
- mindspore/dataset/engine/datasets_text.py +38 -38
- mindspore/dataset/engine/datasets_user_defined.py +78 -59
- mindspore/dataset/engine/datasets_vision.py +77 -73
- mindspore/dataset/engine/offload.py +5 -7
- mindspore/dataset/engine/queue.py +56 -38
- mindspore/dataset/engine/validators.py +11 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +408 -121
- mindspore/dataset/text/utils.py +9 -9
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/transforms.py +261 -76
- mindspore/dataset/utils/browse_dataset.py +9 -9
- mindspore/dataset/vision/__init__.py +8 -8
- mindspore/dataset/vision/c_transforms.py +10 -10
- mindspore/dataset/vision/py_transforms_util.py +3 -3
- mindspore/dataset/vision/transforms.py +2844 -549
- mindspore/dataset/vision/utils.py +161 -10
- mindspore/dataset/vision/validators.py +14 -2
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/optim/__init__.py +12 -2
- mindspore/experimental/optim/adadelta.py +161 -0
- mindspore/experimental/optim/adagrad.py +168 -0
- mindspore/experimental/optim/adam.py +35 -34
- mindspore/experimental/optim/adamax.py +170 -0
- mindspore/experimental/optim/adamw.py +40 -16
- mindspore/experimental/optim/asgd.py +153 -0
- mindspore/experimental/optim/lr_scheduler.py +71 -127
- mindspore/experimental/optim/nadam.py +157 -0
- mindspore/experimental/optim/optimizer.py +15 -8
- mindspore/experimental/optim/radam.py +194 -0
- mindspore/experimental/optim/rmsprop.py +154 -0
- mindspore/experimental/optim/rprop.py +164 -0
- mindspore/experimental/optim/sgd.py +28 -19
- mindspore/hal/__init__.py +40 -0
- mindspore/hal/_ascend.py +57 -0
- mindspore/hal/_base.py +57 -0
- mindspore/hal/_cpu.py +56 -0
- mindspore/hal/_gpu.py +57 -0
- mindspore/hal/device.py +356 -0
- mindspore/hal/event.py +179 -0
- mindspore/hal/memory.py +326 -0
- mindspore/hal/stream.py +339 -0
- mindspore/include/api/data_type.h +2 -2
- mindspore/include/api/dual_abi_helper.h +16 -3
- mindspore/include/api/model.h +4 -3
- mindspore/include/api/status.h +14 -0
- mindspore/include/c_api/model_c.h +173 -0
- mindspore/include/c_api/ms/base/types.h +1 -0
- mindspore/include/c_api/types_c.h +19 -0
- mindspore/include/dataset/execute.h +1 -3
- mindspore/include/dataset/vision.h +54 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +2 -2
- mindspore/mindrecord/__init__.py +5 -1
- mindspore/mindrecord/config.py +809 -0
- mindspore/mindrecord/filereader.py +25 -0
- mindspore/mindrecord/filewriter.py +76 -58
- mindspore/mindrecord/mindpage.py +40 -6
- mindspore/mindrecord/shardutils.py +3 -2
- mindspore/mindrecord/shardwriter.py +7 -0
- mindspore/mindrecord/tools/cifar100_to_mr.py +53 -66
- mindspore/mindrecord/tools/cifar10_to_mr.py +48 -63
- mindspore/mindrecord/tools/csv_to_mr.py +7 -17
- mindspore/mindrecord/tools/imagenet_to_mr.py +3 -8
- mindspore/mindrecord/tools/mnist_to_mr.py +11 -21
- mindspore/mindrecord/tools/tfrecord_to_mr.py +2 -10
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/mint/__init__.py +1137 -0
- mindspore/{rewrite/ast_transformers → mint/linalg}/__init__.py +9 -4
- mindspore/mint/nn/__init__.py +512 -0
- mindspore/mint/nn/functional.py +573 -0
- mindspore/mint/optim/__init__.py +24 -0
- mindspore/mint/optim/adamw.py +185 -0
- mindspore/multiprocessing/__init__.py +72 -0
- mindspore/nn/__init__.py +1 -0
- mindspore/nn/cell.py +213 -257
- mindspore/nn/dynamic_lr.py +2 -2
- mindspore/nn/extend/__init__.py +29 -0
- mindspore/nn/extend/basic.py +140 -0
- mindspore/nn/extend/embedding.py +143 -0
- mindspore/{rewrite/ast_creator_register.py → nn/extend/layer/__init__.py} +9 -19
- mindspore/nn/extend/layer/normalization.py +109 -0
- mindspore/nn/extend/pooling.py +117 -0
- mindspore/nn/layer/activation.py +84 -94
- mindspore/nn/layer/basic.py +177 -82
- mindspore/nn/layer/channel_shuffle.py +3 -16
- mindspore/nn/layer/container.py +3 -3
- mindspore/nn/layer/conv.py +75 -66
- mindspore/nn/layer/embedding.py +103 -45
- mindspore/nn/layer/embedding_service.py +531 -0
- mindspore/nn/layer/embedding_service_layer.py +393 -0
- mindspore/nn/layer/image.py +4 -7
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +52 -66
- mindspore/nn/layer/padding.py +30 -39
- mindspore/nn/layer/pooling.py +18 -9
- mindspore/nn/layer/rnn_cells.py +6 -16
- mindspore/nn/layer/rnns.py +6 -5
- mindspore/nn/layer/thor_layer.py +1 -2
- mindspore/nn/layer/timedistributed.py +1 -1
- mindspore/nn/layer/transformer.py +52 -50
- mindspore/nn/learning_rate_schedule.py +6 -5
- mindspore/nn/loss/loss.py +63 -84
- mindspore/nn/optim/ada_grad.py +6 -4
- mindspore/nn/optim/adadelta.py +3 -1
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +102 -181
- mindspore/nn/optim/adamax.py +4 -2
- mindspore/nn/optim/adasum.py +3 -3
- mindspore/nn/optim/asgd.py +4 -2
- mindspore/nn/optim/ftrl.py +31 -61
- mindspore/nn/optim/lamb.py +5 -3
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +6 -4
- mindspore/nn/optim/momentum.py +13 -25
- mindspore/nn/optim/optimizer.py +6 -3
- mindspore/nn/optim/proximal_ada_grad.py +4 -2
- mindspore/nn/optim/rmsprop.py +9 -3
- mindspore/nn/optim/rprop.py +4 -2
- mindspore/nn/optim/sgd.py +7 -4
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/custom_ops.py +2 -2
- mindspore/nn/probability/distribution/beta.py +2 -2
- mindspore/nn/probability/distribution/categorical.py +4 -6
- mindspore/nn/probability/distribution/cauchy.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +2 -2
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +13 -1
- mindspore/nn/wrap/__init__.py +2 -1
- mindspore/nn/wrap/cell_wrapper.py +58 -13
- mindspore/nn/wrap/grad_reducer.py +148 -8
- mindspore/nn/wrap/loss_scale.py +32 -9
- mindspore/numpy/__init__.py +2 -0
- mindspore/numpy/array_creations.py +2 -0
- mindspore/numpy/array_ops.py +6 -6
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/fft.py +431 -0
- mindspore/numpy/math_ops.py +61 -67
- mindspore/numpy/utils.py +3 -0
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +8 -4
- mindspore/ops/_grad_experimental/grad_array_ops.py +4 -160
- mindspore/ops/_grad_experimental/grad_comm_ops.py +93 -36
- mindspore/ops/_grad_experimental/grad_inner_ops.py +8 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +92 -287
- mindspore/ops/_grad_experimental/grad_nn_ops.py +0 -53
- mindspore/ops/_grad_experimental/grad_quant_ops.py +3 -3
- mindspore/ops/_grad_experimental/grad_sparse.py +1 -1
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/__init__.py +0 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +1 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -0
- mindspore/ops/_op_impl/{cpu/concat.py → aicpu/generate_eod_mask.py} +16 -17
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +1 -3
- mindspore/ops/_op_impl/aicpu/poisson.py +2 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -3
- mindspore/ops/_op_impl/cpu/adam.py +2 -2
- mindspore/ops/_op_impl/cpu/adam_weight_decay.py +3 -2
- mindspore/ops/_op_impl/cpu/maximum_grad.py +16 -14
- mindspore/ops/_op_impl/cpu/minimum_grad.py +8 -0
- mindspore/ops/_vmap/vmap_array_ops.py +164 -101
- mindspore/ops/_vmap/vmap_base.py +8 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +95 -9
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +143 -58
- mindspore/ops/_vmap/vmap_image_ops.py +70 -13
- mindspore/ops/_vmap/vmap_math_ops.py +130 -58
- mindspore/ops/_vmap/vmap_nn_ops.py +249 -115
- mindspore/ops/_vmap/vmap_other_ops.py +1 -1
- mindspore/ops/auto_generate/__init__.py +31 -0
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +231 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +250 -0
- mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
- mindspore/ops/auto_generate/gen_extend_func.py +980 -0
- mindspore/ops/auto_generate/gen_ops_def.py +6443 -0
- mindspore/ops/auto_generate/gen_ops_prim.py +13167 -0
- mindspore/ops/auto_generate/pyboost_inner_prim.py +429 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +121 -23
- mindspore/ops/composite/math_ops.py +10 -49
- mindspore/ops/composite/multitype_ops/_compile_utils.py +191 -618
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +25 -134
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +8 -2
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +9 -3
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +6 -1
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/pow_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +32 -21
- mindspore/ops/composite/multitype_ops/sub_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +6 -3
- mindspore/ops/deprecated.py +14 -3
- mindspore/ops/extend/__init__.py +53 -0
- mindspore/ops/extend/array_func.py +218 -0
- mindspore/ops/extend/math_func.py +76 -0
- mindspore/ops/extend/nn_func.py +308 -0
- mindspore/ops/function/__init__.py +31 -11
- mindspore/ops/function/array_func.py +848 -1736
- mindspore/ops/function/clip_func.py +19 -31
- mindspore/ops/function/debug_func.py +2 -5
- mindspore/ops/function/fft_func.py +31 -0
- mindspore/ops/function/grad/grad_func.py +27 -20
- mindspore/ops/function/image_func.py +27 -21
- mindspore/ops/function/linalg_func.py +30 -53
- mindspore/ops/function/math_func.py +916 -2791
- mindspore/ops/function/nn_func.py +1445 -889
- mindspore/ops/function/other_func.py +6 -7
- mindspore/ops/function/parameter_func.py +6 -92
- mindspore/ops/function/random_func.py +254 -108
- mindspore/ops/function/reshard_func.py +102 -0
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/function/sparse_unary_func.py +11 -18
- mindspore/ops/function/spectral_func.py +1 -1
- mindspore/ops/function/vmap_func.py +15 -14
- mindspore/ops/functional.py +342 -343
- mindspore/ops/op_info_register.py +16 -43
- mindspore/ops/operations/__init__.py +32 -23
- mindspore/ops/operations/_embedding_cache_ops.py +1 -1
- mindspore/ops/operations/_grad_ops.py +21 -853
- mindspore/ops/operations/_infer_ops.py +19 -0
- mindspore/ops/operations/_inner_ops.py +155 -511
- mindspore/ops/operations/_quant_ops.py +4 -4
- mindspore/ops/operations/_rl_inner_ops.py +3 -3
- mindspore/ops/operations/_scalar_ops.py +5 -480
- mindspore/ops/operations/_sequence_ops.py +6 -36
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +112 -2698
- mindspore/ops/operations/comm_ops.py +801 -118
- mindspore/ops/operations/custom_ops.py +62 -121
- mindspore/ops/operations/debug_ops.py +105 -36
- mindspore/ops/operations/image_ops.py +3 -219
- mindspore/ops/operations/inner_ops.py +54 -40
- mindspore/ops/operations/linalg_ops.py +1 -49
- mindspore/ops/operations/manually_defined/__init__.py +24 -0
- mindspore/ops/operations/manually_defined/_inner.py +61 -0
- mindspore/ops/operations/manually_defined/ops_def.py +2016 -0
- mindspore/ops/operations/math_ops.py +621 -4654
- mindspore/ops/operations/nn_ops.py +316 -2226
- mindspore/ops/operations/other_ops.py +53 -45
- mindspore/ops/operations/random_ops.py +4 -51
- mindspore/ops/operations/reshard_ops.py +53 -0
- mindspore/ops/operations/sparse_ops.py +8 -8
- mindspore/ops/primitive.py +204 -103
- mindspore/ops/silent_check.py +162 -0
- mindspore/ops_generate/__init__.py +27 -0
- mindspore/ops_generate/arg_dtype_cast.py +250 -0
- mindspore/ops_generate/arg_handler.py +197 -0
- mindspore/ops_generate/gen_aclnn_implement.py +263 -0
- mindspore/ops_generate/gen_ops.py +1084 -0
- mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
- mindspore/ops_generate/gen_pyboost_func.py +968 -0
- mindspore/ops_generate/gen_utils.py +209 -0
- mindspore/ops_generate/op_proto.py +138 -0
- mindspore/ops_generate/pyboost_utils.py +354 -0
- mindspore/ops_generate/template.py +239 -0
- mindspore/parallel/__init__.py +7 -4
- mindspore/parallel/_auto_parallel_context.py +155 -6
- mindspore/parallel/_cell_wrapper.py +16 -9
- mindspore/parallel/_cost_model_context.py +1 -1
- mindspore/parallel/_dp_allreduce_fusion.py +159 -159
- mindspore/parallel/_parallel_serialization.py +62 -14
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +18 -9
- mindspore/parallel/_transformer/__init__.py +1 -1
- mindspore/parallel/_transformer/layers.py +1 -1
- mindspore/parallel/_transformer/loss.py +1 -1
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/op_parallel_config.py +1 -1
- mindspore/parallel/_transformer/transformer.py +10 -10
- mindspore/parallel/_utils.py +161 -6
- mindspore/parallel/algo_parameter_config.py +6 -8
- mindspore/parallel/checkpoint_transform.py +369 -64
- mindspore/parallel/cluster/__init__.py +15 -0
- mindspore/parallel/cluster/process_entity/__init__.py +18 -0
- mindspore/parallel/cluster/process_entity/_api.py +344 -0
- mindspore/parallel/cluster/process_entity/_utils.py +126 -0
- mindspore/parallel/cluster/run.py +136 -0
- mindspore/parallel/mpi/__init__.py +1 -1
- mindspore/parallel/mpi/_mpi_config.py +1 -1
- mindspore/parallel/parameter_broadcast.py +152 -0
- mindspore/parallel/shard.py +128 -17
- mindspore/profiler/__init__.py +3 -2
- mindspore/profiler/common/process_pool.py +41 -0
- mindspore/profiler/common/singleton.py +28 -0
- mindspore/profiler/common/util.py +125 -0
- mindspore/profiler/envprofiling.py +2 -2
- mindspore/{_extends/parallel_compile/tbe_compiler → profiler/parser/ascend_analysis}/__init__.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +53 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +159 -0
- mindspore/profiler/parser/ascend_analysis/function_event.py +161 -0
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +131 -0
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +85 -0
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +57 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +116 -0
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +68 -0
- mindspore/profiler/parser/ascend_cluster_generator.py +116 -0
- mindspore/profiler/parser/ascend_communicate_generator.py +314 -0
- mindspore/profiler/parser/ascend_flops_generator.py +27 -5
- mindspore/profiler/parser/ascend_fpbp_generator.py +8 -2
- mindspore/profiler/parser/ascend_hccl_generator.py +31 -280
- mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
- mindspore/profiler/parser/ascend_memory_generator.py +185 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +151 -126
- mindspore/profiler/parser/ascend_msprof_generator.py +75 -274
- mindspore/profiler/parser/ascend_op_generator.py +94 -36
- mindspore/profiler/parser/ascend_timeline_generator.py +297 -131
- mindspore/profiler/parser/base_timeline_generator.py +17 -3
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +2 -1
- mindspore/profiler/parser/framework_parser.py +11 -4
- mindspore/profiler/parser/integrator.py +3 -1
- mindspore/profiler/parser/memory_usage_parser.py +8 -2
- mindspore/profiler/parser/minddata_analyzer.py +8 -2
- mindspore/profiler/parser/minddata_parser.py +73 -4
- mindspore/profiler/parser/msadvisor_analyzer.py +5 -3
- mindspore/profiler/parser/msadvisor_parser.py +10 -4
- mindspore/profiler/parser/profiler_info.py +16 -1
- mindspore/profiler/profiling.py +522 -195
- mindspore/rewrite/__init__.py +2 -13
- mindspore/rewrite/api/node.py +123 -37
- mindspore/rewrite/api/pattern_engine.py +2 -3
- mindspore/rewrite/api/scoped_value.py +16 -15
- mindspore/rewrite/api/symbol_tree.py +46 -30
- mindspore/rewrite/ast_helpers/__init__.py +3 -6
- mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +48 -0
- mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +160 -92
- mindspore/rewrite/common/__init__.py +1 -2
- mindspore/rewrite/common/config.py +24 -0
- mindspore/rewrite/common/{rewrite_elog.py → error_log.py} +39 -39
- mindspore/rewrite/{namer.py → common/namer.py} +63 -18
- mindspore/rewrite/common/namespace.py +118 -0
- mindspore/rewrite/node/__init__.py +5 -5
- mindspore/rewrite/node/call_function.py +23 -7
- mindspore/rewrite/node/cell_container.py +7 -3
- mindspore/rewrite/node/control_flow.py +53 -28
- mindspore/rewrite/node/node.py +212 -196
- mindspore/rewrite/node/node_manager.py +51 -22
- mindspore/rewrite/node/node_topological_manager.py +3 -23
- mindspore/rewrite/parsers/__init__.py +12 -0
- mindspore/rewrite/parsers/arguments_parser.py +8 -9
- mindspore/rewrite/parsers/assign_parser.py +637 -413
- mindspore/rewrite/parsers/attribute_parser.py +3 -4
- mindspore/rewrite/parsers/class_def_parser.py +115 -148
- mindspore/rewrite/parsers/constant_parser.py +5 -5
- mindspore/rewrite/parsers/container_parser.py +4 -6
- mindspore/rewrite/parsers/expr_parser.py +55 -0
- mindspore/rewrite/parsers/for_parser.py +31 -98
- mindspore/rewrite/parsers/function_def_parser.py +13 -5
- mindspore/rewrite/parsers/if_parser.py +28 -10
- mindspore/rewrite/parsers/module_parser.py +8 -182
- mindspore/rewrite/parsers/parser.py +1 -5
- mindspore/rewrite/parsers/parser_register.py +1 -1
- mindspore/rewrite/parsers/return_parser.py +5 -10
- mindspore/rewrite/parsers/while_parser.py +59 -0
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree/__init__.py +20 -0
- mindspore/rewrite/{symbol_tree.py → symbol_tree/symbol_tree.py} +704 -185
- mindspore/rewrite/{symbol_tree_builder.py → symbol_tree/symbol_tree_builder.py} +8 -8
- mindspore/rewrite/{symbol_tree_dumper.py → symbol_tree/symbol_tree_dumper.py} +4 -4
- mindspore/run_check/_check_version.py +6 -14
- mindspore/run_check/run_check.py +1 -1
- mindspore/safeguard/rewrite_obfuscation.py +9 -19
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +6 -5
- mindspore/train/_utils.py +178 -4
- mindspore/train/amp.py +167 -245
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/__init__.py +5 -2
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +151 -37
- mindspore/train/callback/_cluster_monitor.py +201 -0
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_flops_collector.py +238 -0
- mindspore/train/callback/_landscape.py +16 -11
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_mindio_ttp.py +443 -0
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
- mindspore/train/callback/_summary_collector.py +13 -14
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/data_sink.py +6 -5
- mindspore/train/dataset_helper.py +66 -21
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/accuracy.py +7 -7
- mindspore/train/metrics/confusion_matrix.py +8 -6
- mindspore/train/metrics/cosine_similarity.py +6 -4
- mindspore/train/metrics/error.py +2 -2
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/perplexity.py +2 -1
- mindspore/train/metrics/topk.py +2 -2
- mindspore/train/mind_ir_pb2.py +89 -15
- mindspore/train/model.py +298 -56
- mindspore/train/serialization.py +501 -221
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +1 -1
- mindspore/train/summary/summary_record.py +56 -34
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/METADATA +3 -3
- mindspore-2.3.0.dist-info/RECORD +1400 -0
- {mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/entry_points.txt +1 -0
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +0 -662
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +0 -377
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +0 -201
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +0 -515
- mindspore/gen_ops.py +0 -273
- mindspore/nn/layer/flash_attention.py +0 -189
- mindspore/ops/_op_impl/cpu/tensor_shape.py +0 -42
- mindspore/ops/_op_impl/tbe/__init__.py +0 -47
- mindspore/ops/_op_impl/tbe/abs.py +0 -38
- mindspore/ops/_op_impl/tbe/abs_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/abs_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/abs_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/accumulate_n_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/accumulate_n_v2_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/acos.py +0 -37
- mindspore/ops/_op_impl/tbe/acos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acos_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acos_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/acosh.py +0 -37
- mindspore/ops/_op_impl/tbe/acosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acosh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acosh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_max_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_min_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/acts_ulq.py +0 -45
- mindspore/ops/_op_impl/tbe/acts_ulq_input_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/adam_apply_one.py +0 -50
- mindspore/ops/_op_impl/tbe/adam_apply_one_assign.py +0 -53
- mindspore/ops/_op_impl/tbe/adam_apply_one_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_assign.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/adaptive_max_pool2d.py +0 -37
- mindspore/ops/_op_impl/tbe/add.py +0 -42
- mindspore/ops/_op_impl/tbe/add_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/add_n.py +0 -39
- mindspore/ops/_op_impl/tbe/add_n_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/addcdiv.py +0 -41
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/addcmul.py +0 -43
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_ada_max.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_ada_max_ds.py +0 -69
- mindspore/ops/_op_impl/tbe/apply_adadelta.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_adadelta_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_adagrad_d_a.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_adam.py +0 -79
- mindspore/ops/_op_impl/tbe/apply_adam_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad.py +0 -60
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad_ds.py +0 -61
- mindspore/ops/_op_impl/tbe/apply_add_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_add_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop.py +0 -77
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop_ds.py +0 -78
- mindspore/ops/_op_impl/tbe/apply_ftrl.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_ftrl_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_gradient_descent.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_gradient_descent_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/apply_keras_momentum.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_momentum.py +0 -64
- mindspore/ops/_op_impl/tbe/apply_momentum_ds.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +0 -57
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py +0 -54
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_rms_prop.py +0 -52
- mindspore/ops/_op_impl/tbe/approximate_equal.py +0 -39
- mindspore/ops/_op_impl/tbe/approximate_equal_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_max.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/arg_min.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_v2_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_min_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/asin.py +0 -37
- mindspore/ops/_op_impl/tbe/asin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asin_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asin_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/asinh.py +0 -37
- mindspore/ops/_op_impl/tbe/asinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asinh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asinh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/assign.py +0 -79
- mindspore/ops/_op_impl/tbe/assign_add.py +0 -59
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +0 -60
- mindspore/ops/_op_impl/tbe/assign_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/assign_sub.py +0 -55
- mindspore/ops/_op_impl/tbe/assign_sub_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/atan.py +0 -37
- mindspore/ops/_op_impl/tbe/atan2.py +0 -38
- mindspore/ops/_op_impl/tbe/atan2_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/atan_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/atan_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/atan_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/atanh.py +0 -37
- mindspore/ops/_op_impl/tbe/atanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/avg_pool.py +0 -43
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +0 -45
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell.py +0 -57
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad.py +0 -50
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -51
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_input_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_weight_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul.py +0 -42
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul_v2.py +0 -47
- mindspore/ops/_op_impl/tbe/batch_to_space.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/batchnorm.py +0 -58
- mindspore/ops/_op_impl/tbe/batchnorm_grad.py +0 -58
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +0 -42
- mindspore/ops/_op_impl/tbe/bessel_i0e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i0e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bessel_i1e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i1e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +0 -53
- mindspore/ops/_op_impl/tbe/binary_cross_entropy.py +0 -39
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bitwise_and.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_and_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_or.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_or_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_xor.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_xor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_infer.py +0 -43
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_inference.py +0 -50
- mindspore/ops/_op_impl/tbe/bn_training_reduce.py +0 -38
- mindspore/ops/_op_impl/tbe/bn_training_reduce_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -52
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -53
- mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/bn_training_update_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/bn_training_update_v3.py +0 -51
- mindspore/ops/_op_impl/tbe/bounding_box_decode.py +0 -41
- mindspore/ops/_op_impl/tbe/bounding_box_decode_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/bounding_box_encode.py +0 -38
- mindspore/ops/_op_impl/tbe/broadcast_to.py +0 -40
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cast.py +0 -55
- mindspore/ops/_op_impl/tbe/cast_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/cdist.py +0 -38
- mindspore/ops/_op_impl/tbe/cdist_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/ceil.py +0 -37
- mindspore/ops/_op_impl/tbe/ceil_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/celu.py +0 -39
- mindspore/ops/_op_impl/tbe/centralization.py +0 -39
- mindspore/ops/_op_impl/tbe/check_valid.py +0 -38
- mindspore/ops/_op_impl/tbe/check_valid_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/clip_by_value.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_value_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/concat.py +0 -40
- mindspore/ops/_op_impl/tbe/concat_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/confusion_matrix.py +0 -63
- mindspore/ops/_op_impl/tbe/confusion_mul_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/confusion_softmax_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/confusion_transpose_d.py +0 -39
- mindspore/ops/_op_impl/tbe/conv2d.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/conv2d_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_transpose.py +0 -48
- mindspore/ops/_op_impl/tbe/conv3d.py +0 -45
- mindspore/ops/_op_impl/tbe/conv3d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_transpose.py +0 -47
- mindspore/ops/_op_impl/tbe/conv3d_transpose_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/cos.py +0 -37
- mindspore/ops/_op_impl/tbe/cos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/cosh.py +0 -37
- mindspore/ops/_op_impl/tbe/cosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -42
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/cum_sum.py +0 -42
- mindspore/ops/_op_impl/tbe/cum_sum_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cummin.py +0 -41
- mindspore/ops/_op_impl/tbe/cumprod.py +0 -42
- mindspore/ops/_op_impl/tbe/data_format_dim_map.py +0 -38
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +0 -45
- mindspore/ops/_op_impl/tbe/deformable_offsets_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/depth_to_space_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +0 -44
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_filter.py +0 -41
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_input.py +0 -41
- mindspore/ops/_op_impl/tbe/diag.py +0 -38
- mindspore/ops/_op_impl/tbe/diag_part.py +0 -38
- mindspore/ops/_op_impl/tbe/dilation.py +0 -40
- mindspore/ops/_op_impl/tbe/div.py +0 -41
- mindspore/ops/_op_impl/tbe/div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/div_no_nan.py +0 -41
- mindspore/ops/_op_impl/tbe/div_no_nan_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/dropout_do_mask.py +0 -38
- mindspore/ops/_op_impl/tbe/dropout_do_mask_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +0 -34
- mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +0 -95
- mindspore/ops/_op_impl/tbe/dynamic_rnn.py +0 -82
- mindspore/ops/_op_impl/tbe/elu.py +0 -38
- mindspore/ops/_op_impl/tbe/elu_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/elu_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/elu_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/equal.py +0 -42
- mindspore/ops/_op_impl/tbe/equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/erf.py +0 -37
- mindspore/ops/_op_impl/tbe/erf_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfc.py +0 -37
- mindspore/ops/_op_impl/tbe/erfc_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfinv.py +0 -36
- mindspore/ops/_op_impl/tbe/exp.py +0 -40
- mindspore/ops/_op_impl/tbe/exp_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/expand_dims.py +0 -38
- mindspore/ops/_op_impl/tbe/expm1.py +0 -37
- mindspore/ops/_op_impl/tbe/expm1_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/extract_image_patches.py +0 -41
- mindspore/ops/_op_impl/tbe/extract_volume_patches.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fast_gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/fast_gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/fast_gelu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/fill.py +0 -56
- mindspore/ops/_op_impl/tbe/fill_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/flatten.py +0 -48
- mindspore/ops/_op_impl/tbe/floor.py +0 -37
- mindspore/ops/_op_impl/tbe/floor_div.py +0 -41
- mindspore/ops/_op_impl/tbe/floor_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/floor_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/floor_mod.py +0 -39
- mindspore/ops/_op_impl/tbe/floor_mod_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/fused_dbn_dw.py +0 -52
- mindspore/ops/_op_impl/tbe/fused_mul_add.py +0 -38
- mindspore/ops/_op_impl/tbe/fused_mul_add_n.py +0 -48
- mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py +0 -53
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum.py +0 -57
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py +0 -67
- mindspore/ops/_op_impl/tbe/gather_nd.py +0 -52
- mindspore/ops/_op_impl/tbe/gather_nd_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/gather_v2_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/gelu_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/gelu_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/ger.py +0 -43
- mindspore/ops/_op_impl/tbe/ger_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/greater.py +0 -43
- mindspore/ops/_op_impl/tbe/greater_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/greater_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad_cell.py +0 -52
- mindspore/ops/_op_impl/tbe/hard_swish.py +0 -37
- mindspore/ops/_op_impl/tbe/hard_swish_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/hard_swish_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/hard_swish_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/histogram_fixed_width.py +0 -40
- mindspore/ops/_op_impl/tbe/hshrink.py +0 -33
- mindspore/ops/_op_impl/tbe/hshrink_grad.py +0 -37
- mindspore/ops/_op_impl/tbe/hsigmoid.py +0 -45
- mindspore/ops/_op_impl/tbe/hsigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/ifmr.py +0 -47
- mindspore/ops/_op_impl/tbe/ifmr_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/im2col.py +0 -42
- mindspore/ops/_op_impl/tbe/in_top_k.py +0 -37
- mindspore/ops/_op_impl/tbe/inplace_add.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +0 -46
- mindspore/ops/_op_impl/tbe/inplace_sub.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/inv.py +0 -38
- mindspore/ops/_op_impl/tbe/inv_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/inv_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/inv_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/invert.py +0 -37
- mindspore/ops/_op_impl/tbe/invert_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/iou.py +0 -38
- mindspore/ops/_op_impl/tbe/iou_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/is_close.py +0 -40
- mindspore/ops/_op_impl/tbe/kl_div_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/kl_div_loss_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/kl_div_loss_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/l2_loss.py +0 -36
- mindspore/ops/_op_impl/tbe/l2_loss_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/l2_normalize.py +0 -38
- mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/lamb_apply_optimizer_assign.py +0 -55
- mindspore/ops/_op_impl/tbe/lamb_apply_weight_assign.py +0 -42
- mindspore/ops/_op_impl/tbe/lamb_next_mv.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_mv_with_decay.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_right.py +0 -44
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr.py +0 -48
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr_v2.py +0 -44
- mindspore/ops/_op_impl/tbe/lars_update.py +0 -50
- mindspore/ops/_op_impl/tbe/lars_update_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/layer_norm.py +0 -46
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/layer_norm_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/layer_norm_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +0 -43
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/lerp.py +0 -38
- mindspore/ops/_op_impl/tbe/less.py +0 -41
- mindspore/ops/_op_impl/tbe/less_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/less_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/less_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/log.py +0 -40
- mindspore/ops/_op_impl/tbe/log1p.py +0 -37
- mindspore/ops/_op_impl/tbe/log1p_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/log_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/logical_and.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_and_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logical_not.py +0 -36
- mindspore/ops/_op_impl/tbe/logical_not_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax.py +0 -37
- mindspore/ops/_op_impl/tbe/logsoftmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/lp_norm.py +0 -40
- mindspore/ops/_op_impl/tbe/lp_norm_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/lstm_input_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/masked_fill.py +0 -40
- mindspore/ops/_op_impl/tbe/masked_fill_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/matmul.py +0 -53
- mindspore/ops/_op_impl/tbe/matmul_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/matmul_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/matrix_diag.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_diag_part.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_set_diag.py +0 -46
- mindspore/ops/_op_impl/tbe/max_pool.py +0 -39
- mindspore/ops/_op_impl/tbe/max_pool3d.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool3d_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool3d_grad_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/max_pool_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad_with_argmax.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_with_argmax.py +0 -42
- mindspore/ops/_op_impl/tbe/max_pool_with_argmax.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum.py +0 -39
- mindspore/ops/_op_impl/tbe/maximum_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/maximum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mem_set.py +0 -38
- mindspore/ops/_op_impl/tbe/minimum.py +0 -40
- mindspore/ops/_op_impl/tbe/minimum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/minimum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/minimum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mish.py +0 -37
- mindspore/ops/_op_impl/tbe/mod.py +0 -41
- mindspore/ops/_op_impl/tbe/mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/mul.py +0 -37
- mindspore/ops/_op_impl/tbe/mul_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/mul_no_nan.py +0 -39
- mindspore/ops/_op_impl/tbe/mul_no_nan_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/multilabel_margin_loss.py +0 -39
- mindspore/ops/_op_impl/tbe/neg.py +0 -39
- mindspore/ops/_op_impl/tbe/neg_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/new_im2col.py +0 -40
- mindspore/ops/_op_impl/tbe/nll_loss.py +0 -41
- mindspore/ops/_op_impl/tbe/nll_loss_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/nms_with_mask.py +0 -39
- mindspore/ops/_op_impl/tbe/not_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/not_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/npu_alloc_float_status.py +0 -34
- mindspore/ops/_op_impl/tbe/npu_clear_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/one_hot.py +0 -48
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/ones_like.py +0 -40
- mindspore/ops/_op_impl/tbe/ones_like_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling.py +0 -40
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/pack.py +0 -58
- mindspore/ops/_op_impl/tbe/pack_ds.py +0 -59
- mindspore/ops/_op_impl/tbe/pad_d.py +0 -40
- mindspore/ops/_op_impl/tbe/pad_d_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/parallel_concat.py +0 -70
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear.py +0 -45
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/pdist.py +0 -36
- mindspore/ops/_op_impl/tbe/pooling.py +0 -46
- mindspore/ops/_op_impl/tbe/population_count.py +0 -38
- mindspore/ops/_op_impl/tbe/pow.py +0 -41
- mindspore/ops/_op_impl/tbe/pow_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/prelu.py +0 -37
- mindspore/ops/_op_impl/tbe/prelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/prelu_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/range.py +0 -39
- mindspore/ops/_op_impl/tbe/real_div.py +0 -38
- mindspore/ops/_op_impl/tbe/real_div_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reciprocal.py +0 -36
- mindspore/ops/_op_impl/tbe/reciprocal_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/reciprocal_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/reciprocal_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_all.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_all_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_any.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_max.py +0 -43
- mindspore/ops/_op_impl/tbe/reduce_max_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_mean.py +0 -40
- mindspore/ops/_op_impl/tbe/reduce_mean_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_min.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_min_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_prod.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_prod_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_std.py +0 -44
- mindspore/ops/_op_impl/tbe/reduce_sum.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6.py +0 -38
- mindspore/ops/_op_impl/tbe/relu6_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/relu6_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/relu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/relu_grad_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/renorm.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_bilinear.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_bilinear_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/resize_bilinear_v2.py +0 -43
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reverse_v2_d.py +0 -37
- mindspore/ops/_op_impl/tbe/rint.py +0 -37
- mindspore/ops/_op_impl/tbe/rint_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/roi_align.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roi_align_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roll.py +0 -42
- mindspore/ops/_op_impl/tbe/round.py +0 -38
- mindspore/ops/_op_impl/tbe/round_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/rsqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/rsqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/rsqrt_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/rsqrt_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_add.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_div.py +0 -46
- mindspore/ops/_op_impl/tbe/scatter_max.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_min.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_mul.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_nd.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/scatter_nd_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_nd_update.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_update_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add.py +0 -39
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/scatter_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_update.py +0 -43
- mindspore/ops/_op_impl/tbe/select.py +0 -38
- mindspore/ops/_op_impl/tbe/select_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/selu.py +0 -39
- mindspore/ops/_op_impl/tbe/selu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sgd.py +0 -62
- mindspore/ops/_op_impl/tbe/sigmoid.py +0 -37
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/sigmoid_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/sigmoid_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sign.py +0 -38
- mindspore/ops/_op_impl/tbe/sign_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/sin.py +0 -37
- mindspore/ops/_op_impl/tbe/sin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sinh.py +0 -37
- mindspore/ops/_op_impl/tbe/sinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/slice.py +0 -58
- mindspore/ops/_op_impl/tbe/smooth_l1_loss.py +0 -45
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/soft_margin_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/soft_margin_loss_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/soft_shrink.py +0 -36
- mindspore/ops/_op_impl/tbe/soft_shrink_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax.py +0 -37
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/softmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +0 -42
- mindspore/ops/_op_impl/tbe/softmax_v2_with_dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/softplus.py +0 -37
- mindspore/ops/_op_impl/tbe/softplus_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softsign.py +0 -37
- mindspore/ops/_op_impl/tbe/softsign_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sort.py +0 -38
- mindspore/ops/_op_impl/tbe/sort_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/space_to_batch.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_depth.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_adadelta.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +0 -45
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d.py +0 -53
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +0 -66
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop.py +0 -57
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/sparse_gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_gather_v2_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/split_d.py +0 -38
- mindspore/ops/_op_impl/tbe/split_d_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/split_v.py +0 -39
- mindspore/ops/_op_impl/tbe/splitv.py +0 -39
- mindspore/ops/_op_impl/tbe/sqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/sqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sqrt_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/sqrt_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/square.py +0 -38
- mindspore/ops/_op_impl/tbe/square_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_all.py +0 -40
- mindspore/ops/_op_impl/tbe/square_sum_all_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/square_sum_v1.py +0 -38
- mindspore/ops/_op_impl/tbe/square_sum_v1_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_v2.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/squeeze.py +0 -37
- mindspore/ops/_op_impl/tbe/strided_read.py +0 -38
- mindspore/ops/_op_impl/tbe/strided_slice_d.py +0 -44
- mindspore/ops/_op_impl/tbe/strided_slice_ds.py +0 -71
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +0 -51
- mindspore/ops/_op_impl/tbe/strided_slice_grad_ds.py +0 -57
- mindspore/ops/_op_impl/tbe/strided_write.py +0 -38
- mindspore/ops/_op_impl/tbe/sub.py +0 -39
- mindspore/ops/_op_impl/tbe/sub_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tan.py +0 -38
- mindspore/ops/_op_impl/tbe/tan_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh.py +0 -37
- mindspore/ops/_op_impl/tbe/tanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/tanh_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tensor_move.py +0 -49
- mindspore/ops/_op_impl/tbe/tensor_move_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/tensor_scatter_update.py +0 -41
- mindspore/ops/_op_impl/tbe/tile.py +0 -37
- mindspore/ops/_op_impl/tbe/tile_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/trans_data.py +0 -167
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +0 -180
- mindspore/ops/_op_impl/tbe/trans_data_rnn.py +0 -44
- mindspore/ops/_op_impl/tbe/transpose.py +0 -60
- mindspore/ops/_op_impl/tbe/transpose_d.py +0 -47
- mindspore/ops/_op_impl/tbe/transpose_nod.py +0 -60
- mindspore/ops/_op_impl/tbe/trunc.py +0 -39
- mindspore/ops/_op_impl/tbe/truncate_div.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/truncate_mod.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/unpack.py +0 -38
- mindspore/ops/_op_impl/tbe/unpack_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/unsorted_segment_max.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_max_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_min.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_min_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/wts_arq.py +0 -40
- mindspore/ops/_op_impl/tbe/xdivy.py +0 -38
- mindspore/ops/_op_impl/tbe/xdivy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/xlogy.py +0 -38
- mindspore/ops/_op_impl/tbe/xlogy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/zeros_like.py +0 -41
- mindspore/ops/_op_impl/tbe/zeros_like_ds.py +0 -42
- mindspore/ops/_tracefunc.py +0 -241
- mindspore/ops/arg_dtype_cast.py +0 -54
- mindspore/rewrite/api/tree_node_helper.py +0 -60
- mindspore/rewrite/ast_helpers/ast_creator.py +0 -115
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +0 -267
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +0 -228
- mindspore/rewrite/namespace.py +0 -53
- mindspore-2.2.11.dist-info/RECORD +0 -1920
- {mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/WHEEL +0 -0
- {mindspore-2.2.11.dist-info → mindspore-2.3.0.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -14,13 +14,19 @@
|
|
|
14
14
|
# ============================================================================
|
|
15
15
|
"""Profiling api file."""
|
|
16
16
|
import os
|
|
17
|
+
import re
|
|
18
|
+
import shutil
|
|
17
19
|
import stat
|
|
18
20
|
import time
|
|
19
21
|
import json
|
|
22
|
+
from json import JSONDecodeError
|
|
20
23
|
import glob
|
|
21
24
|
import subprocess
|
|
22
25
|
import csv
|
|
26
|
+
import socket
|
|
23
27
|
from enum import Enum
|
|
28
|
+
from multiprocessing import Process
|
|
29
|
+
from typing import List
|
|
24
30
|
import numpy as np
|
|
25
31
|
|
|
26
32
|
from mindspore import log as logger, context
|
|
@@ -28,14 +34,17 @@ from mindspore.context import get_auto_parallel_context
|
|
|
28
34
|
from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
|
|
29
35
|
import mindspore._c_expression as c_expression
|
|
30
36
|
import mindspore._c_dataengine as cde
|
|
37
|
+
from mindspore._c_expression import _framework_profiler_enable_mi
|
|
31
38
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
|
32
|
-
ProfilerIOException, ProfilerException, ProfilerRawFileException
|
|
39
|
+
ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
|
|
33
40
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
|
|
34
41
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
|
|
35
|
-
from mindspore.profiler.common.util import get_file_path
|
|
42
|
+
from mindspore.profiler.common.util import get_file_path, ProfilerPathManager
|
|
43
|
+
from mindspore.profiler.common.process_pool import MultiProcessPool
|
|
36
44
|
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
37
45
|
from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
|
|
38
46
|
from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
47
|
+
from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
|
|
39
48
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
40
49
|
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
41
50
|
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
@@ -48,12 +57,16 @@ from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
|
48
57
|
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
49
58
|
from mindspore.common.api import _pynative_executor
|
|
50
59
|
from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
|
|
51
|
-
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
60
|
+
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
52
61
|
from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
|
|
53
62
|
from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
|
|
54
63
|
from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
|
|
55
64
|
from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
|
|
56
|
-
from mindspore.profiler.parser.
|
|
65
|
+
from mindspore.profiler.parser.ascend_cluster_generator import AscendClusterGenerator
|
|
66
|
+
from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
67
|
+
from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
|
|
68
|
+
from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
|
|
69
|
+
from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
|
|
57
70
|
|
|
58
71
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
59
72
|
|
|
@@ -64,10 +77,24 @@ AICORE_METRICS_DICT = {
|
|
|
64
77
|
3: "MemoryL0",
|
|
65
78
|
4: "ResourceConflictRatio",
|
|
66
79
|
5: "MemoryUB",
|
|
80
|
+
6: "L2Cache",
|
|
67
81
|
-1: "None"
|
|
68
82
|
}
|
|
69
83
|
|
|
70
84
|
|
|
85
|
+
class ModelTraingMode(Enum):
|
|
86
|
+
PYNATIVE = 0
|
|
87
|
+
GRAPH = 1
|
|
88
|
+
KERNEL_BY_KERNEL = 2
|
|
89
|
+
UNKNOWN = 3
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ProfilerLevel(Enum):
|
|
93
|
+
Level0 = "Level0"
|
|
94
|
+
Level1 = "Level1"
|
|
95
|
+
Level2 = "Level2"
|
|
96
|
+
|
|
97
|
+
|
|
71
98
|
class DeviceSupportParam(Enum):
|
|
72
99
|
"""The device target enum."""
|
|
73
100
|
CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
|
|
@@ -77,17 +104,22 @@ class DeviceSupportParam(Enum):
|
|
|
77
104
|
]
|
|
78
105
|
ASCEND = [
|
|
79
106
|
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
80
|
-
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', '
|
|
81
|
-
'profile_framework'
|
|
107
|
+
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
|
|
108
|
+
'ascend_job_id', 'profile_framework', 'host_stack', 'profiler_level', 'data_simplification'
|
|
82
109
|
]
|
|
83
110
|
|
|
84
111
|
|
|
85
112
|
ALWAYS_VALID_PARAM = [
|
|
86
113
|
'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
|
|
87
|
-
'ascend_job_id', 'op_time', 'profile_framework'
|
|
114
|
+
'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
|
|
88
115
|
]
|
|
89
116
|
|
|
90
117
|
|
|
118
|
+
ANALYSIS_ASYNC_MODE = 'async'
|
|
119
|
+
ANALYSIS_SYNC_MODE = 'sync'
|
|
120
|
+
DEFAULT_MODEL_ID = 4294967295
|
|
121
|
+
|
|
122
|
+
|
|
91
123
|
def _environment_check():
|
|
92
124
|
if c_expression.security.enable_security():
|
|
93
125
|
raise RuntimeError("Profiler is not supported when MindSpore is compiled with \'-s on\'.")
|
|
@@ -273,41 +305,36 @@ def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_de
|
|
|
273
305
|
logger.warning("No valid time_stamp is record in file: %s", input_file)
|
|
274
306
|
|
|
275
307
|
|
|
276
|
-
def _ascend_graph_msprof_generator(
|
|
308
|
+
def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
|
|
277
309
|
"""Executing the msprof export mode."""
|
|
278
310
|
try:
|
|
279
311
|
ProfilerInfo.set_export_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
280
|
-
msprof_exporter = AscendMsprofExporter(
|
|
312
|
+
msprof_exporter = AscendMsprofExporter(mindstudio_profiler_output)
|
|
281
313
|
flag = msprof_exporter.export(model_iteration_dict)
|
|
282
314
|
ProfilerInfo.set_export_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
283
315
|
return flag
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
logger.warning(err.message)
|
|
316
|
+
except (ProfilerException, TimeoutError, FileNotFoundError, RuntimeError) as err:
|
|
317
|
+
logger.warning(str(err))
|
|
287
318
|
return False
|
|
288
319
|
|
|
289
320
|
|
|
290
|
-
def _ascend_graph_msprof_analyse(
|
|
321
|
+
def _ascend_graph_msprof_analyse(mindstudio_profiler_output):
|
|
291
322
|
"""
|
|
292
323
|
Ascend graph model msprof data analyse.
|
|
293
324
|
|
|
294
325
|
Returns:
|
|
295
|
-
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace
|
|
326
|
+
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace, df_step_trace_model
|
|
296
327
|
"""
|
|
297
|
-
|
|
298
|
-
df_op_statistic = []
|
|
299
|
-
df_step_trace = []
|
|
328
|
+
res = ([], [], [], [])
|
|
300
329
|
try:
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
msprof_analyser = AscendMsprofDataGeneratorOld(os.path.join(source_path, 'summary'))
|
|
305
|
-
df_op_summary, df_op_statistic, df_step_trace = msprof_analyser.parse()
|
|
330
|
+
msprof_analyser = AscendMsprofDataGenerator(mindstudio_profiler_output)
|
|
331
|
+
res = msprof_analyser.parse()
|
|
332
|
+
return res
|
|
306
333
|
except ProfilerException as err:
|
|
307
334
|
logger.warning(err.message)
|
|
308
335
|
finally:
|
|
309
336
|
pass
|
|
310
|
-
return
|
|
337
|
+
return res
|
|
311
338
|
|
|
312
339
|
|
|
313
340
|
class Profiler:
|
|
@@ -316,15 +343,23 @@ class Profiler:
|
|
|
316
343
|
MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
|
|
317
344
|
and use Profiler.analyse() to stop profiling and analyse the results.
|
|
318
345
|
Users can visualize the results using the `MindSpore Insight
|
|
319
|
-
<https://www.mindspore.cn/mindinsight/docs/en/
|
|
346
|
+
<https://www.mindspore.cn/mindinsight/docs/en/master/index.html>`_ tool.
|
|
320
347
|
Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
|
|
321
348
|
correspondence, cluster, etc data analysis.
|
|
322
349
|
|
|
323
350
|
Args:
|
|
324
351
|
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
352
|
+
profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
|
|
353
|
+
|
|
354
|
+
- Profiler.Level0: Leanest level of profiling data collection, collects information about the elapsed
|
|
355
|
+
time of the computational operators on the NPU and communication large operator information.
|
|
356
|
+
- Profiler.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and communication
|
|
357
|
+
mini operator information based on Level0.
|
|
358
|
+
- Profiler.Level2: Collect GE and Runtime information in CANN layer on top of Level1
|
|
359
|
+
|
|
325
360
|
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
|
|
326
361
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
327
|
-
a multi devices training,collect when True. Setting this parameter has no effect during single
|
|
362
|
+
a multi devices training,collect when True. Setting this parameter has no effect during single card
|
|
328
363
|
training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
|
|
329
364
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
330
365
|
When using this parameter, `op_time` must be set to True. Default: ``False`` .
|
|
@@ -333,7 +368,7 @@ class Profiler:
|
|
|
333
368
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
334
369
|
data collection based on conditions. Default: ``True`` .
|
|
335
370
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
336
|
-
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5],
|
|
371
|
+
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5, 6],
|
|
337
372
|
Default: ``0`` , the data items contained in each metric are as follows:
|
|
338
373
|
|
|
339
374
|
- -1: Does not collect AICORE data.
|
|
@@ -344,9 +379,14 @@ class Profiler:
|
|
|
344
379
|
- 3: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
|
|
345
380
|
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
346
381
|
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
382
|
+
- 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
|
|
347
383
|
|
|
348
384
|
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
349
385
|
Default: ``False`` .
|
|
386
|
+
hbm_ddr (bool, optional): (Ascend only) Whether to collect HBM/DDR read and write rate data, collect when True.
|
|
387
|
+
Default: ``False`` .
|
|
388
|
+
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
389
|
+
Default: ``False`` .
|
|
350
390
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
351
391
|
Default: ``True`` .
|
|
352
392
|
|
|
@@ -368,6 +408,12 @@ class Profiler:
|
|
|
368
408
|
- "time": Only record host timestamp.
|
|
369
409
|
- "memory": Only record host memory usage.
|
|
370
410
|
- None: Not record host information.
|
|
411
|
+
data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
|
|
412
|
+
If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
|
|
413
|
+
directory are retained to save disk space.
|
|
414
|
+
Default value: ``True`` .
|
|
415
|
+
host_stack (bool, optional): (Ascend) Whether to collect frame host call stack data.
|
|
416
|
+
Default value: ``True`` .
|
|
371
417
|
|
|
372
418
|
Raises:
|
|
373
419
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
@@ -424,8 +470,13 @@ class Profiler:
|
|
|
424
470
|
_has_initialized = False
|
|
425
471
|
_ascend_profiling_options = ""
|
|
426
472
|
_ascend_job_id = ""
|
|
473
|
+
ENABLE_STATUS = "on"
|
|
474
|
+
DISABLE_STATUS = "off"
|
|
427
475
|
|
|
428
476
|
def __init__(self, **kwargs):
|
|
477
|
+
if os.getenv("PROFILING_MODE"):
|
|
478
|
+
raise RuntimeError("Profiling is already enabled by PROFILING_MODE env.")
|
|
479
|
+
|
|
429
480
|
self._dev_id = None
|
|
430
481
|
self._cpu_profiler = None
|
|
431
482
|
self._gpu_profiler = None
|
|
@@ -444,10 +495,13 @@ class Profiler:
|
|
|
444
495
|
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
445
496
|
self._parallel_strategy = True
|
|
446
497
|
self._model_iteration_dict = None
|
|
498
|
+
self._analyse_mode = ANALYSIS_SYNC_MODE
|
|
447
499
|
_environment_check()
|
|
448
500
|
# default aicore_metrics type is ArithmeticUtilization
|
|
449
501
|
self._aicore_metrics_id = 0
|
|
450
|
-
self._l2_cache =
|
|
502
|
+
self._l2_cache = self.DISABLE_STATUS
|
|
503
|
+
self._hbm_ddr = self.DISABLE_STATUS
|
|
504
|
+
self._pcie = self.DISABLE_STATUS
|
|
451
505
|
self._data_process = True
|
|
452
506
|
self._op_time = True
|
|
453
507
|
self._profile_communication = False
|
|
@@ -460,24 +514,45 @@ class Profiler:
|
|
|
460
514
|
self._dynamic_status = False
|
|
461
515
|
self._profile_framework = "all"
|
|
462
516
|
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
517
|
+
self.profiler_level = None
|
|
518
|
+
self._pretty_json = False
|
|
519
|
+
self._analyse_only = kwargs.get("analyse_only", False)
|
|
520
|
+
self._data_simplification = kwargs.get("data_simplification", True)
|
|
521
|
+
self._host_stack = True
|
|
463
522
|
if self._msprof_enable:
|
|
464
523
|
return
|
|
465
|
-
self._start_time = int(time.time() *
|
|
524
|
+
self._start_time = int(time.time() * 1e6) # us
|
|
525
|
+
self._monotonic_time = int(time.monotonic() * 1e6) # us
|
|
466
526
|
logger.info("Profiling: start time: %d", self._start_time)
|
|
467
527
|
if kwargs.get("env_enable"):
|
|
468
528
|
self._profiler_init(kwargs)
|
|
469
529
|
return
|
|
470
|
-
if Profiler._has_initialized:
|
|
471
|
-
msg = "Do not init twice in the profiler."
|
|
472
|
-
raise RuntimeError(msg)
|
|
473
530
|
Profiler._has_initialized = True
|
|
474
531
|
# get device_id and device_target
|
|
475
|
-
self.
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
self.
|
|
532
|
+
if self._analyse_only:
|
|
533
|
+
self._device_target = DeviceTarget.ASCEND.value
|
|
534
|
+
self._rank_id = kwargs.get("rank_id", 0)
|
|
535
|
+
else:
|
|
536
|
+
self._get_devid_rankid_and_devtarget()
|
|
537
|
+
self._parser_kwargs(kwargs)
|
|
538
|
+
self._get_output_path(kwargs)
|
|
539
|
+
self._decide_device_target(kwargs)
|
|
540
|
+
if self.start_profile:
|
|
541
|
+
self.start()
|
|
542
|
+
|
|
543
|
+
@staticmethod
|
|
544
|
+
def _get_prof_rank(prof_path: str):
|
|
545
|
+
"""get rank id."""
|
|
546
|
+
sub_dirs = os.listdir(os.path.realpath(prof_path))
|
|
547
|
+
info_json_path = ""
|
|
548
|
+
for sub_dir in sub_dirs:
|
|
549
|
+
if sub_dir.startswith("device_"):
|
|
550
|
+
device_id = sub_dir.split("_")[-1]
|
|
551
|
+
info_json_path = os.path.join(prof_path, sub_dir, f"info.json.{device_id}")
|
|
552
|
+
if not os.path.exists(info_json_path):
|
|
553
|
+
return -1
|
|
554
|
+
rank_id, _ = Profiler._parse_info_json(info_json_path)
|
|
555
|
+
return rank_id
|
|
481
556
|
|
|
482
557
|
@staticmethod
|
|
483
558
|
def _check_output_path(output_path):
|
|
@@ -493,9 +568,9 @@ class Profiler:
|
|
|
493
568
|
return output_path
|
|
494
569
|
|
|
495
570
|
@staticmethod
|
|
496
|
-
def
|
|
571
|
+
def _parse_job_start_time(prof_dir):
|
|
497
572
|
"""
|
|
498
|
-
|
|
573
|
+
Get the start time of the job.
|
|
499
574
|
|
|
500
575
|
Args:
|
|
501
576
|
input_file (str): The file path of the host start log file.
|
|
@@ -503,12 +578,29 @@ class Profiler:
|
|
|
503
578
|
Returns:
|
|
504
579
|
str, job start time.
|
|
505
580
|
"""
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
581
|
+
try:
|
|
582
|
+
AscendMsprofExporter.check_msprof_env()
|
|
583
|
+
script_path = AscendMsprofExporter.get_msprof_info_path()
|
|
584
|
+
if not script_path:
|
|
585
|
+
logger.warning("Can`t find get_msprof_info.py path, use single-export mode instead.")
|
|
586
|
+
return None
|
|
587
|
+
logger.info("get_msprof_info.py path is : %s", script_path)
|
|
588
|
+
host_dir = os.path.join(prof_dir, 'host')
|
|
589
|
+
cmd = ['python', script_path, '-dir', host_dir]
|
|
590
|
+
outs, _ = AscendMsprofExporter.run_cmd(cmd)
|
|
591
|
+
if not outs:
|
|
592
|
+
logger.warning('Can`t find the msprof info result')
|
|
593
|
+
return None
|
|
594
|
+
result = json.loads(outs)
|
|
595
|
+
if result.get('status', 1) == 1:
|
|
596
|
+
return None
|
|
597
|
+
jor_start_time = result.get('data', {}).get('collection_info', {}).get('Collection start time', None)
|
|
598
|
+
if jor_start_time is not None:
|
|
599
|
+
return float(jor_start_time.strip())
|
|
600
|
+
return None
|
|
601
|
+
except (RuntimeError, JSONDecodeError, AttributeError, TimeoutError, FileNotFoundError) as err:
|
|
602
|
+
logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
|
|
603
|
+
return None
|
|
512
604
|
|
|
513
605
|
@staticmethod
|
|
514
606
|
def _parse_info_json(info_file):
|
|
@@ -527,8 +619,52 @@ class Profiler:
|
|
|
527
619
|
dev_info = info_dict.get("DeviceInfo", [])
|
|
528
620
|
dev_id = dev_info[0].get("id", -1)
|
|
529
621
|
|
|
622
|
+
if int(rank_id) < 0:
|
|
623
|
+
rank_id = 0
|
|
624
|
+
|
|
530
625
|
return str(rank_id), str(dev_id)
|
|
531
626
|
|
|
627
|
+
@classmethod
|
|
628
|
+
def offline_analyse(cls, path: str, pretty=False, step_list=None):
|
|
629
|
+
"""
|
|
630
|
+
Analyze training performance data offline, which is invoked after performance data collection is completed.
|
|
631
|
+
|
|
632
|
+
Args:
|
|
633
|
+
path (str): The profiling data path which need to be analyzed offline.
|
|
634
|
+
There needs to be a profiler directory in this path.
|
|
635
|
+
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
636
|
+
step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
|
|
637
|
+
By default, all steps will be analyzed.
|
|
638
|
+
|
|
639
|
+
Examples:
|
|
640
|
+
>>> from mindspore import Profiler
|
|
641
|
+
>>> Profiler.offline_analyse("./profiling_path")
|
|
642
|
+
"""
|
|
643
|
+
profiler_path = os.path.join(path, "profiler")
|
|
644
|
+
if not os.path.exists(profiler_path):
|
|
645
|
+
raise ProfilerPathErrorException(f'There must be a profiler folder in the data path: {path}.')
|
|
646
|
+
|
|
647
|
+
rank_set = set()
|
|
648
|
+
sub_dirs = os.listdir(os.path.realpath(profiler_path))
|
|
649
|
+
for sub_dir in sub_dirs:
|
|
650
|
+
sub_path = os.path.join(profiler_path, sub_dir)
|
|
651
|
+
if os.path.isdir(sub_path) and re.match(r"^PROF_\d+_\d+_[a-zA-Z0-9]+", sub_dir):
|
|
652
|
+
rank = cls._get_prof_rank(sub_path)
|
|
653
|
+
rank_set.add(rank)
|
|
654
|
+
if not rank_set:
|
|
655
|
+
return
|
|
656
|
+
|
|
657
|
+
process_list = []
|
|
658
|
+
for rank_id in rank_set:
|
|
659
|
+
profiler = cls(analyse_only=True, rank_id=rank_id)
|
|
660
|
+
process = Process(target=profiler.analyse,
|
|
661
|
+
args=(path, pretty, step_list))
|
|
662
|
+
process.start()
|
|
663
|
+
process_list.append(process)
|
|
664
|
+
|
|
665
|
+
for process in process_list:
|
|
666
|
+
process.join()
|
|
667
|
+
|
|
532
668
|
def op_analyse(self, op_name, device_id=None):
|
|
533
669
|
"""
|
|
534
670
|
Profiler users can use this interface to obtain operator performance data.
|
|
@@ -554,12 +690,12 @@ class Profiler:
|
|
|
554
690
|
>>> # Profiler init.
|
|
555
691
|
>>> profiler = Profiler()
|
|
556
692
|
>>> # Train Model or eval Model, taking LeNet5 as an example.
|
|
557
|
-
>>> # Refer to https://gitee.com/mindspore/docs/blob/
|
|
693
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
|
|
558
694
|
>>> net = LeNet5()
|
|
559
695
|
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
560
696
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
561
697
|
>>> # Create the dataset taking MNIST as an example.
|
|
562
|
-
>>> # Refer to https://gitee.com/mindspore/docs/blob/
|
|
698
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
|
|
563
699
|
>>> dataloader = create_dataset()
|
|
564
700
|
>>> model = Model(net, loss, optimizer)
|
|
565
701
|
>>> model.train(5, dataloader, dataset_sink_mode=False)
|
|
@@ -594,18 +730,49 @@ class Profiler:
|
|
|
594
730
|
return message
|
|
595
731
|
return op_info
|
|
596
732
|
|
|
597
|
-
def analyse(self, offline_path=None):
|
|
733
|
+
def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync"):
|
|
598
734
|
"""
|
|
599
735
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
600
736
|
|
|
601
737
|
Args:
|
|
602
|
-
offline_path (Union[str, None], optional): The data path which need to be
|
|
738
|
+
offline_path (Union[str, None], optional): The data path which need to be analyzed with offline mode.
|
|
603
739
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
604
740
|
for online mode. Default: ``None``.
|
|
605
|
-
|
|
606
|
-
|
|
741
|
+
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
742
|
+
step_list (list, optional): A list of steps that need to be analyzed. Default: ``None``.
|
|
743
|
+
By default, all steps will be analyzed.
|
|
744
|
+
mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
|
|
745
|
+
|
|
746
|
+
- sync: analyse data in current process, it will block the current process.
|
|
747
|
+
- async: analyse data in subprocess, it will not the current process.Since the parsing process
|
|
748
|
+
will take up extra CPU resources, please enable this mode according to the actual resource situation.
|
|
607
749
|
|
|
608
|
-
|
|
750
|
+
"""
|
|
751
|
+
try:
|
|
752
|
+
if isinstance(pretty, bool):
|
|
753
|
+
self._pretty_json = pretty
|
|
754
|
+
if mode not in [ANALYSIS_SYNC_MODE, ANALYSIS_ASYNC_MODE]:
|
|
755
|
+
logger.warning("For analyse, the parameter mode must be one of ['sync', 'async'], "
|
|
756
|
+
"it will be set to 'sync'.")
|
|
757
|
+
mode = ANALYSIS_SYNC_MODE
|
|
758
|
+
model_iteration_dict = {}
|
|
759
|
+
if step_list is not None and not isinstance(step_list, list):
|
|
760
|
+
raise ProfilerParamTypeErrorException("Parameter step_list must be a list.")
|
|
761
|
+
if step_list:
|
|
762
|
+
if not all(isinstance(step_id, int) for step_id in step_list):
|
|
763
|
+
raise ProfilerParamTypeErrorException("The elements of the parameter step_list must be integers.")
|
|
764
|
+
step_list.sort()
|
|
765
|
+
if step_list[-1] - step_list[0] != len(step_list) - 1:
|
|
766
|
+
err_msg = "The elements of the parameter step_list must be continuous integers."
|
|
767
|
+
raise ProfilerParamTypeErrorException(err_msg)
|
|
768
|
+
model_iteration_dict[DEFAULT_MODEL_ID] = step_list
|
|
769
|
+
if offline_path is not None and not isinstance(offline_path, str):
|
|
770
|
+
raise ProfilerParamTypeErrorException("For analyse, the type of parameter offline_path must be str.")
|
|
771
|
+
self._analyse(offline_path=offline_path, model_iteration_dict=model_iteration_dict, mode=mode)
|
|
772
|
+
except (ProfilerException, RuntimeError, OSError, TypeError, NameError) as err:
|
|
773
|
+
logger.error("Profiler analyse failed: %s", str(err))
|
|
774
|
+
|
|
775
|
+
def _analyse(self, offline_path=None, model_iteration_dict=None, mode=ANALYSIS_SYNC_MODE):
|
|
609
776
|
"""
|
|
610
777
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
611
778
|
|
|
@@ -614,23 +781,22 @@ class Profiler:
|
|
|
614
781
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
615
782
|
for online mode. Default: ``None``.
|
|
616
783
|
model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
|
|
784
|
+
mode (str, optional): Analysis mode. Whether to analyse data in subprocess. Default: ``sync``.
|
|
785
|
+
By default, analyse data in current process.
|
|
617
786
|
"""
|
|
618
787
|
self._model_iteration_dict = model_iteration_dict
|
|
619
|
-
|
|
620
788
|
self._init_profiler_info()
|
|
621
789
|
self._is_support_step_info_collect()
|
|
790
|
+
self._analyse_mode = mode
|
|
622
791
|
parallel_mode = get_auto_parallel_context("parallel_mode")
|
|
623
792
|
stage_num = get_auto_parallel_context("pipeline_stages")
|
|
624
793
|
|
|
625
794
|
ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
|
|
626
|
-
ProfilerInfo.set_rank_size(self._rank_size)
|
|
627
|
-
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
628
795
|
if offline_path:
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
ProfilerInfo.save(self._output_path)
|
|
796
|
+
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
797
|
+
self._ascend_graph_analyse(offline_path=offline_path)
|
|
798
|
+
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
799
|
+
ProfilerInfo.save(self._output_path)
|
|
634
800
|
_offline_parse(offline_path)
|
|
635
801
|
return
|
|
636
802
|
if self._msprof_enable:
|
|
@@ -643,11 +809,11 @@ class Profiler:
|
|
|
643
809
|
self._dynamic_status = self._profiler_manager.dynamic_status()
|
|
644
810
|
_environment_check()
|
|
645
811
|
|
|
646
|
-
self._cpu_profiler.stop()
|
|
647
|
-
|
|
648
812
|
cpu_op_file = glob.glob(os.path.join(self._output_path, 'cpu_op_type_info_*'))
|
|
649
813
|
if self._device_target and self._device_target != DeviceTarget.CPU.value and cpu_op_file:
|
|
650
814
|
self._is_heterogeneous = True
|
|
815
|
+
|
|
816
|
+
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
651
817
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
652
818
|
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
|
653
819
|
self._cpu_analyse()
|
|
@@ -673,7 +839,6 @@ class Profiler:
|
|
|
673
839
|
|
|
674
840
|
Raises:
|
|
675
841
|
RuntimeError: If the profiler has already started.
|
|
676
|
-
RuntimeError: If MD profiling has stopped, repeated start action is not supported.
|
|
677
842
|
RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
|
|
678
843
|
|
|
679
844
|
Examples:
|
|
@@ -707,21 +872,14 @@ class Profiler:
|
|
|
707
872
|
if not self._has_started:
|
|
708
873
|
if not self._has_started_twice:
|
|
709
874
|
self._has_started = True
|
|
710
|
-
self._has_started_twice = True
|
|
711
|
-
else:
|
|
712
|
-
raise RuntimeError("MindSpore Profiling has finished, repeated start and stop actions are not "
|
|
713
|
-
"supported.")
|
|
714
875
|
else:
|
|
715
|
-
raise RuntimeError("The profiler has already started.
|
|
716
|
-
"is set to False.")
|
|
717
|
-
|
|
718
|
-
# No need to start anything if parse profiling data offline
|
|
719
|
-
if self._is_offline_parser():
|
|
720
|
-
return
|
|
876
|
+
raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
|
|
721
877
|
|
|
722
878
|
self._cpu_profiler.step_profiling_enable(True)
|
|
723
879
|
if self._op_time:
|
|
724
880
|
self._cpu_profiler.enable_op_time()
|
|
881
|
+
if self._profile_memory:
|
|
882
|
+
self._cpu_profiler.enable_profile_memory()
|
|
725
883
|
|
|
726
884
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
727
885
|
if self._data_process:
|
|
@@ -736,6 +894,9 @@ class Profiler:
|
|
|
736
894
|
self._md_profiler.start()
|
|
737
895
|
self._ascend_graph_start()
|
|
738
896
|
ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
897
|
+
ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
|
|
898
|
+
ProfilerInfo.set_system_time(int(c_expression.get_clock_time() * 1e3)) # cast us to ns
|
|
899
|
+
_framework_profiler_enable_mi()
|
|
739
900
|
|
|
740
901
|
def stop(self):
|
|
741
902
|
"""
|
|
@@ -778,14 +939,11 @@ class Profiler:
|
|
|
778
939
|
raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
|
|
779
940
|
"before calling the stop() method.")
|
|
780
941
|
|
|
781
|
-
# No need to stop anything if parse profiling data offline
|
|
782
|
-
if self._is_offline_parser():
|
|
783
|
-
return
|
|
784
|
-
|
|
785
942
|
# Stop data collection after all operators are executed.
|
|
786
943
|
_pynative_executor.sync()
|
|
787
944
|
|
|
788
|
-
|
|
945
|
+
self._cpu_profiler.stop()
|
|
946
|
+
if self._data_process and self._md_profiler is not None:
|
|
789
947
|
self._md_profiler.stop()
|
|
790
948
|
self._md_profiler.save(self._output_path)
|
|
791
949
|
|
|
@@ -797,9 +955,21 @@ class Profiler:
|
|
|
797
955
|
self._stop_time = int(time.time() * 10000000)
|
|
798
956
|
ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
799
957
|
self._init_profiler_info()
|
|
958
|
+
ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
|
|
800
959
|
ProfilerInfo.save(self._output_path)
|
|
801
960
|
logger.info("Profiling: stop time: %d", self._stop_time)
|
|
802
961
|
|
|
962
|
+
def _set_ascend_job_id(self, ascend_job_id):
|
|
963
|
+
"""Set output_path for offline parsing performance data."""
|
|
964
|
+
if not ascend_job_id:
|
|
965
|
+
return
|
|
966
|
+
self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
|
|
967
|
+
if not os.path.exists(self._ascend_job_id):
|
|
968
|
+
msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
|
|
969
|
+
logger.critical(msg)
|
|
970
|
+
raise ValueError(msg)
|
|
971
|
+
self._output_path, _ = os.path.split(self._ascend_job_id)
|
|
972
|
+
|
|
803
973
|
def _profiler_init(self, kwargs):
|
|
804
974
|
"""Initialize variables when profiler is enabled by environment variables."""
|
|
805
975
|
options = kwargs.get("env_enable")
|
|
@@ -918,16 +1088,20 @@ class Profiler:
|
|
|
918
1088
|
"output": self._output_path,
|
|
919
1089
|
"fp_point": fp_point,
|
|
920
1090
|
"bp_point": bp_point,
|
|
921
|
-
"training_trace":
|
|
922
|
-
"task_trace":
|
|
1091
|
+
"training_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1092
|
+
"task_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
923
1093
|
"aic_metrics": AICORE_METRICS_DICT.get(self._aicore_metrics_id, "ArithmeticUtilization"),
|
|
924
|
-
"aicpu":
|
|
925
|
-
"profile_memory":
|
|
926
|
-
"hccl":
|
|
1094
|
+
"aicpu": self.ENABLE_STATUS if self._data_process or self._op_time else self.DISABLE_STATUS,
|
|
1095
|
+
"profile_memory": self.ENABLE_STATUS if self._op_time and self._profile_memory else self.DISABLE_STATUS,
|
|
1096
|
+
"hccl": self.ENABLE_STATUS if self._op_time and self._profile_communication else self.DISABLE_STATUS,
|
|
927
1097
|
"l2_cache": self._l2_cache,
|
|
928
|
-
"
|
|
929
|
-
"
|
|
930
|
-
"
|
|
1098
|
+
"hbm_ddr": self._hbm_ddr,
|
|
1099
|
+
"pcie": self._pcie,
|
|
1100
|
+
"parallel_strategy": self.ENABLE_STATUS if self._parallel_strategy else self.DISABLE_STATUS,
|
|
1101
|
+
"op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1102
|
+
"profile_framework": self._profile_framework,
|
|
1103
|
+
"profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
|
|
1104
|
+
"host_stack": "on" if self._host_stack else "off"
|
|
931
1105
|
}
|
|
932
1106
|
|
|
933
1107
|
return profiling_options
|
|
@@ -960,11 +1134,8 @@ class Profiler:
|
|
|
960
1134
|
self._profile_communication = False
|
|
961
1135
|
|
|
962
1136
|
if self._profile_communication:
|
|
963
|
-
hccl_option = {"output": self._output_path, "task_trace":
|
|
1137
|
+
hccl_option = {"output": self._output_path, "task_trace": self.ENABLE_STATUS}
|
|
964
1138
|
os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
|
|
965
|
-
if not self.start_profile:
|
|
966
|
-
raise RuntimeError(f"For '{self.__class__.__name__}', the parameter profile_communication can "
|
|
967
|
-
f"not be True while starting profiler in the process of training.")
|
|
968
1139
|
|
|
969
1140
|
self._profile_memory = kwargs.pop("profile_memory", False)
|
|
970
1141
|
if not isinstance(self._profile_memory, bool):
|
|
@@ -980,7 +1151,7 @@ class Profiler:
|
|
|
980
1151
|
|
|
981
1152
|
if self._aicore_metrics_id not in AICORE_METRICS_DICT:
|
|
982
1153
|
logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be in "
|
|
983
|
-
f"[-1, 0, 1, 2, 3, 4, 5], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
1154
|
+
f"[-1, 0, 1, 2, 3, 4, 5, 6], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
984
1155
|
self._aicore_metrics_id = 0
|
|
985
1156
|
|
|
986
1157
|
l2_cache_enable = kwargs.pop("l2_cache", False)
|
|
@@ -988,10 +1159,21 @@ class Profiler:
|
|
|
988
1159
|
logger.warning(f"For '{self.__class__.__name__}', the parameter l2_cache must be bool, "
|
|
989
1160
|
f"but got type {type(l2_cache_enable)}, it will be set to False.")
|
|
990
1161
|
l2_cache_enable = False
|
|
991
|
-
if l2_cache_enable
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1162
|
+
self._l2_cache = self.ENABLE_STATUS if l2_cache_enable else self.DISABLE_STATUS
|
|
1163
|
+
|
|
1164
|
+
hbm_ddr_enable = kwargs.pop("hbm_ddr", False)
|
|
1165
|
+
if not isinstance(hbm_ddr_enable, bool):
|
|
1166
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter hbm_ddr must be bool, "
|
|
1167
|
+
f"but got type {type(hbm_ddr_enable)}, it will be set to False.")
|
|
1168
|
+
hbm_ddr_enable = False
|
|
1169
|
+
self._hbm_ddr = self.ENABLE_STATUS if hbm_ddr_enable else self.DISABLE_STATUS
|
|
1170
|
+
|
|
1171
|
+
pcie_enable = kwargs.pop("pcie", False)
|
|
1172
|
+
if not isinstance(pcie_enable, bool):
|
|
1173
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter pcie must be bool, "
|
|
1174
|
+
f"but got type {type(pcie_enable)}, it will be set to False.")
|
|
1175
|
+
pcie_enable = False
|
|
1176
|
+
self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
|
|
995
1177
|
|
|
996
1178
|
self._parallel_strategy = kwargs.pop("parallel_strategy", True)
|
|
997
1179
|
if not isinstance(self._parallel_strategy, bool):
|
|
@@ -999,27 +1181,21 @@ class Profiler:
|
|
|
999
1181
|
f"but got type {type(self._parallel_strategy)}, it will be set to True.")
|
|
1000
1182
|
self._parallel_strategy = True
|
|
1001
1183
|
|
|
1002
|
-
|
|
1003
|
-
if
|
|
1004
|
-
logger.warning(f"For '{self.__class__.__name__}',
|
|
1005
|
-
f"
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
def _is_offline_parser(self):
|
|
1019
|
-
"""Return whether offline parser or online parser."""
|
|
1020
|
-
if self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
1021
|
-
return bool(self._ascend_job_id)
|
|
1022
|
-
return False
|
|
1184
|
+
self.profiler_level = kwargs.pop("profiler_level", None)
|
|
1185
|
+
if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
|
|
1186
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profiler_level must be one of "
|
|
1187
|
+
f"[ProfilerLevel.Level0, ProfilerLevel.Level1, ProfilerLevel.Level2], but got type "
|
|
1188
|
+
f"{type(self.profiler_level)}, it will be set to ProfilerLevel.Level0.")
|
|
1189
|
+
self.profiler_level = ProfilerLevel.Level0
|
|
1190
|
+
elif self.profiler_level == ProfilerLevel.Level0:
|
|
1191
|
+
self._data_process = False
|
|
1192
|
+
self._aicore_metrics_id = -1
|
|
1193
|
+
logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level0, data_process will be set "
|
|
1194
|
+
f"to False and aicore_metrics set to -1.")
|
|
1195
|
+
elif self.profiler_level == ProfilerLevel.Level1:
|
|
1196
|
+
self._data_process = False
|
|
1197
|
+
logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level1, data_process will be set "
|
|
1198
|
+
f"to False.")
|
|
1023
1199
|
|
|
1024
1200
|
def _ascend_analyse(self):
|
|
1025
1201
|
"""Collect and analyse ascend performance data."""
|
|
@@ -1031,23 +1207,21 @@ class Profiler:
|
|
|
1031
1207
|
self._rank_size = get_group_size()
|
|
1032
1208
|
else:
|
|
1033
1209
|
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
1210
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
1034
1211
|
|
|
1035
1212
|
if self._has_started:
|
|
1036
1213
|
self.stop()
|
|
1037
1214
|
else:
|
|
1038
1215
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1216
|
+
self._ascend_profiler.finalize()
|
|
1039
1217
|
# export op data before analyse
|
|
1040
1218
|
self._ascend_graph_analyse()
|
|
1041
1219
|
|
|
1042
|
-
def _minddata_analyse(self
|
|
1220
|
+
def _minddata_analyse(self):
|
|
1043
1221
|
"""Analyse mindadata for ascend graph model."""
|
|
1044
1222
|
if not self._data_process:
|
|
1045
1223
|
return
|
|
1046
1224
|
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1047
|
-
# Parsing minddata AICPU profiling
|
|
1048
|
-
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1049
|
-
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1050
|
-
MinddataParser.execute(source_path, self._output_path, store_id)
|
|
1051
1225
|
|
|
1052
1226
|
# parse minddata pipeline operator and queue
|
|
1053
1227
|
try:
|
|
@@ -1060,12 +1234,23 @@ class Profiler:
|
|
|
1060
1234
|
# Analyze minddata information
|
|
1061
1235
|
logger.info("Profiling: analyzing the minddata information.")
|
|
1062
1236
|
try:
|
|
1063
|
-
MinddataProfilingAnalyzer(self._output_path, store_id,
|
|
1237
|
+
MinddataProfilingAnalyzer(self._output_path, store_id,
|
|
1238
|
+
self._output_path, pretty=self._pretty_json).analyze()
|
|
1064
1239
|
except ProfilerException as err:
|
|
1065
1240
|
logger.warning(err.message)
|
|
1066
1241
|
finally:
|
|
1067
1242
|
pass
|
|
1068
1243
|
|
|
1244
|
+
def _minddata_aicpu_analyse(self, source_path, job_id):
|
|
1245
|
+
"""Analyse minddata aicpu after ascend."""
|
|
1246
|
+
if not self._data_process:
|
|
1247
|
+
return
|
|
1248
|
+
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1249
|
+
# Parsing minddata AICPU profiling
|
|
1250
|
+
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1251
|
+
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1252
|
+
MinddataParser.execute(source_path, self._output_path, job_id, store_id)
|
|
1253
|
+
|
|
1069
1254
|
def _ascend_fpbp_analyse(self, op_summary, steptrace):
|
|
1070
1255
|
"""
|
|
1071
1256
|
Ascned graph model op analyse.
|
|
@@ -1080,7 +1265,7 @@ class Profiler:
|
|
|
1080
1265
|
|
|
1081
1266
|
step_trace_point_info_path = validate_and_normalize_path(step_trace_point_info_path)
|
|
1082
1267
|
|
|
1083
|
-
fpbp_analyse = AscendFPBPGenerator(op_summary, steptrace)
|
|
1268
|
+
fpbp_analyse = AscendFPBPGenerator(op_summary, steptrace, pretty=self._pretty_json)
|
|
1084
1269
|
points, _ = fpbp_analyse.parse()
|
|
1085
1270
|
fpbp_analyse.write(step_trace_point_info_path)
|
|
1086
1271
|
except ProfilerException as err:
|
|
@@ -1089,7 +1274,7 @@ class Profiler:
|
|
|
1089
1274
|
pass
|
|
1090
1275
|
return points
|
|
1091
1276
|
|
|
1092
|
-
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status):
|
|
1277
|
+
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status, launch_ops: List):
|
|
1093
1278
|
"""
|
|
1094
1279
|
Ascend graph model hwts analyse.
|
|
1095
1280
|
|
|
@@ -1116,12 +1301,12 @@ class Profiler:
|
|
|
1116
1301
|
else:
|
|
1117
1302
|
output_timeline_data_path = None
|
|
1118
1303
|
|
|
1119
|
-
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status)
|
|
1304
|
+
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status, launch_ops)
|
|
1120
1305
|
op_analyser.parse()
|
|
1121
1306
|
op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
|
|
1122
1307
|
aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
|
|
1123
|
-
except ProfilerException as err:
|
|
1124
|
-
logger.warning(err
|
|
1308
|
+
except (ProfilerException, RuntimeError) as err:
|
|
1309
|
+
logger.warning(str(err))
|
|
1125
1310
|
finally:
|
|
1126
1311
|
pass
|
|
1127
1312
|
|
|
@@ -1143,19 +1328,22 @@ class Profiler:
|
|
|
1143
1328
|
finally:
|
|
1144
1329
|
pass
|
|
1145
1330
|
|
|
1146
|
-
def _ascend_timeline_analyse(self, op_summary, steptrace):
|
|
1331
|
+
def _ascend_timeline_analyse(self, op_summary, steptrace, source_path, mindstudio_profiler_output) -> List:
|
|
1147
1332
|
"""Analyse timeline info."""
|
|
1148
1333
|
try:
|
|
1149
1334
|
logger.info("Profiling: analyzing the timeline data")
|
|
1150
|
-
timeline_analyser = AscendTimelineGenerator(self._output_path,
|
|
1151
|
-
context.get_context('mode')
|
|
1152
|
-
|
|
1153
|
-
timeline_analyser.
|
|
1335
|
+
timeline_analyser = AscendTimelineGenerator(self._output_path, source_path, mindstudio_profiler_output,
|
|
1336
|
+
self._rank_id, self._rank_size, context.get_context('mode'),
|
|
1337
|
+
self._model_iteration_dict.get(DEFAULT_MODEL_ID))
|
|
1338
|
+
timeline_analyser.parse_cluster_data(op_summary, steptrace)
|
|
1339
|
+
timeline_analyser.parse_timeline_data(pretty=self._pretty_json)
|
|
1340
|
+
timeline_analyser.write_timeline_display()
|
|
1154
1341
|
timeline_analyser.write_timeline_summary()
|
|
1155
1342
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1156
1343
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1157
1344
|
finally:
|
|
1158
1345
|
pass
|
|
1346
|
+
return timeline_analyser.get_kernel_event_list()
|
|
1159
1347
|
|
|
1160
1348
|
def _ascend_dynamic_net_analyse(self, op_summary):
|
|
1161
1349
|
"""Analyse dynamic shape network info."""
|
|
@@ -1166,10 +1354,10 @@ class Profiler:
|
|
|
1166
1354
|
logger.warning("The profile_memory parameter cannot be set on the dynamic shape network.")
|
|
1167
1355
|
logger.warning(
|
|
1168
1356
|
"[Profiler]Dynamic Shape network does not support collecting step trace performance data currently.")
|
|
1169
|
-
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id)
|
|
1357
|
+
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1170
1358
|
dynamic_parser.write_dynamic_shape_data(op_summary)
|
|
1171
1359
|
|
|
1172
|
-
def _ascend_flops_analyse(self, op_summary):
|
|
1360
|
+
def _ascend_flops_analyse(self, op_summary, launch_ops):
|
|
1173
1361
|
"""Get op FLOPs from op_summary, write output_op_flops_x.csv."""
|
|
1174
1362
|
if 'vector_fops' not in op_summary.dtype.names and 'cube_fops' not in op_summary.dtype.names:
|
|
1175
1363
|
logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
|
|
@@ -1184,12 +1372,12 @@ class Profiler:
|
|
|
1184
1372
|
flops_path = validate_and_normalize_path(flops_path)
|
|
1185
1373
|
flops_summary_path = validate_and_normalize_path(flops_summary_path)
|
|
1186
1374
|
|
|
1187
|
-
flops_analyser = AscendFlopsGenerator(op_summary)
|
|
1375
|
+
flops_analyser = AscendFlopsGenerator(op_summary, launch_ops, pretty=self._pretty_json)
|
|
1188
1376
|
flops_analyser.parse()
|
|
1189
1377
|
flops_analyser.write(flops_path, flops_summary_path)
|
|
1190
1378
|
|
|
1191
|
-
except ProfilerException as err:
|
|
1192
|
-
logger.warning(err
|
|
1379
|
+
except (ProfilerException, RuntimeError) as err:
|
|
1380
|
+
logger.warning(str(err))
|
|
1193
1381
|
finally:
|
|
1194
1382
|
pass
|
|
1195
1383
|
|
|
@@ -1208,7 +1396,88 @@ class Profiler:
|
|
|
1208
1396
|
finally:
|
|
1209
1397
|
pass
|
|
1210
1398
|
|
|
1211
|
-
def
|
|
1399
|
+
def _ascend_ms_analyze(self, source_path):
|
|
1400
|
+
"""Ascend ms generate"""
|
|
1401
|
+
|
|
1402
|
+
timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
|
|
1403
|
+
if self._rank_id:
|
|
1404
|
+
ascend_ms_path = f"rank-{self._rank_id}_{timestamp}_ascend_ms"
|
|
1405
|
+
else:
|
|
1406
|
+
ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{timestamp}_ascend_ms"
|
|
1407
|
+
ascend_ms_path = os.path.join(self._output_path, ascend_ms_path)
|
|
1408
|
+
|
|
1409
|
+
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1410
|
+
ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
|
|
1411
|
+
os.makedirs(ascend_profiler_output_path, exist_ok=True)
|
|
1412
|
+
|
|
1413
|
+
source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
|
|
1414
|
+
target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
|
|
1415
|
+
shutil.copy(source_profiler_info_path, target_profiler_info_path)
|
|
1416
|
+
|
|
1417
|
+
source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
|
|
1418
|
+
target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
|
|
1419
|
+
shutil.copy(source_timeline_path, target_timeline_path)
|
|
1420
|
+
|
|
1421
|
+
src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
|
|
1422
|
+
if os.path.exists(src_op_mem_file):
|
|
1423
|
+
dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
|
|
1424
|
+
shutil.copy(src_op_mem_file, dst_op_mem_file)
|
|
1425
|
+
|
|
1426
|
+
ms_output_path = os.path.abspath(
|
|
1427
|
+
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1428
|
+
static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
|
|
1429
|
+
src_static_op_mem_path = glob.glob(static_op_mem_path)
|
|
1430
|
+
if src_static_op_mem_path:
|
|
1431
|
+
dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
|
|
1432
|
+
shutil.copy(src_static_op_mem_path[0], dst_static_op_mem_file)
|
|
1433
|
+
|
|
1434
|
+
self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
|
|
1435
|
+
self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
|
|
1436
|
+
AscendIntegrateGenerator(source_path, ascend_profiler_output_path).parse()
|
|
1437
|
+
AscendMemoryGenerator(self._output_path, self._rank_id, source_path, ascend_profiler_output_path).parse()
|
|
1438
|
+
|
|
1439
|
+
def _ascend_graph_cluster_analyse(self, source_path, ascend_profiler_output_path):
|
|
1440
|
+
"""Analyse step trace time info"""
|
|
1441
|
+
|
|
1442
|
+
try:
|
|
1443
|
+
logger.info("Profiling: analyzing the step trace time profiler info.")
|
|
1444
|
+
|
|
1445
|
+
step_trace_time_path = os.path.join(ascend_profiler_output_path, f'step_trace_time.csv')
|
|
1446
|
+
step_trace_time_path = validate_and_normalize_path(step_trace_time_path)
|
|
1447
|
+
|
|
1448
|
+
cluster_analyse = AscendClusterGenerator(source_path)
|
|
1449
|
+
cluster_analyse.parse()
|
|
1450
|
+
cluster_analyse.write(step_trace_time_path)
|
|
1451
|
+
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
1452
|
+
logger.warning(err.message)
|
|
1453
|
+
finally:
|
|
1454
|
+
pass
|
|
1455
|
+
|
|
1456
|
+
def _ascend_graph_communicate_analyse(self, source_path, ascend_profiler_output_path):
|
|
1457
|
+
"""Analyse communicate info"""
|
|
1458
|
+
if not self._profile_communication:
|
|
1459
|
+
return
|
|
1460
|
+
|
|
1461
|
+
try:
|
|
1462
|
+
logger.info("Profiling: analyzing the communicate and communicate_matrix profiler info.")
|
|
1463
|
+
|
|
1464
|
+
communication_file_path = os.path.join(ascend_profiler_output_path, f'communication.json')
|
|
1465
|
+
communication_file_path = validate_and_normalize_path(communication_file_path)
|
|
1466
|
+
|
|
1467
|
+
communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
|
|
1468
|
+
f"communication_matrix.json")
|
|
1469
|
+
communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
|
|
1470
|
+
|
|
1471
|
+
analyze_path = os.path.abspath(os.path.join(source_path, os.path.pardir, 'analyze'))
|
|
1472
|
+
communicate_analyser = AscendCommunicationGenerator(analyze_path)
|
|
1473
|
+
communicate_analyser.parse()
|
|
1474
|
+
communicate_analyser.write(communication_file_path, communication_matrix_file_path)
|
|
1475
|
+
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
1476
|
+
logger.warning(err.message)
|
|
1477
|
+
finally:
|
|
1478
|
+
pass
|
|
1479
|
+
|
|
1480
|
+
def _ascend_graph_hccl_analyse(self, mindstudio_profiler_output, steptrace):
|
|
1212
1481
|
"""Analyse hccl profiler info."""
|
|
1213
1482
|
if not self._profile_communication:
|
|
1214
1483
|
return
|
|
@@ -1222,10 +1491,7 @@ class Profiler:
|
|
|
1222
1491
|
|
|
1223
1492
|
hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
|
|
1224
1493
|
hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
|
|
1225
|
-
|
|
1226
|
-
hccl_analyse = AscendHCCLGenerator(os.path.join(source_path, 'timeline'), steptrace)
|
|
1227
|
-
else:
|
|
1228
|
-
hccl_analyse = AscendHCCLGeneratorOld(os.path.join(source_path, 'timeline'))
|
|
1494
|
+
hccl_analyse = AscendHCCLGenerator(mindstudio_profiler_output, steptrace)
|
|
1229
1495
|
hccl_analyse.parse()
|
|
1230
1496
|
hccl_analyse.write(hccl_raw_path)
|
|
1231
1497
|
|
|
@@ -1237,7 +1503,7 @@ class Profiler:
|
|
|
1237
1503
|
def _ascend_graph_msadvisor_analyse(self, job_id):
|
|
1238
1504
|
"""Call MSAdvisor function."""
|
|
1239
1505
|
logger.info("MSAdvisor starts running.")
|
|
1240
|
-
msadvisor = Msadvisor(job_id, self._rank_id, self._output_path)
|
|
1506
|
+
msadvisor = Msadvisor(job_id, self._rank_id, self._output_path, pretty=self._pretty_json)
|
|
1241
1507
|
try:
|
|
1242
1508
|
msadvisor.analyse()
|
|
1243
1509
|
except FileNotFoundError as err:
|
|
@@ -1254,41 +1520,85 @@ class Profiler:
|
|
|
1254
1520
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
1255
1521
|
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
1256
1522
|
|
|
1257
|
-
def
|
|
1258
|
-
"""
|
|
1259
|
-
|
|
1523
|
+
def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
|
|
1524
|
+
"""Get the mapping between framework operator and device kernel."""
|
|
1525
|
+
if not kernels:
|
|
1526
|
+
return []
|
|
1527
|
+
kernel_map = {}
|
|
1528
|
+
for kernel in kernels:
|
|
1529
|
+
key = kernel.name if kernel.name.startswith('hcom_') else (kernel.name, str(kernel.ts))
|
|
1530
|
+
kernel_map[key] = kernel.parent
|
|
1531
|
+
launch_ops = [None] * len(op_summary)
|
|
1532
|
+
for index, summary in enumerate(op_summary):
|
|
1533
|
+
ts = str(summary['Task Start Time(us)']).strip("\t")
|
|
1534
|
+
name = summary['Op Name']
|
|
1535
|
+
key = name if name.startswith("hcom_") else (name, ts)
|
|
1536
|
+
launch_op = kernel_map.get(key)
|
|
1537
|
+
if not launch_op:
|
|
1538
|
+
if context.get_context("mode") == context.GRAPH_MODE or not name.startswith("aclnn"):
|
|
1539
|
+
logger.warning(f"Failed to get launch operator for {name}!")
|
|
1540
|
+
continue
|
|
1541
|
+
launch_ops[index] = launch_op.name
|
|
1542
|
+
return launch_ops
|
|
1260
1543
|
|
|
1261
|
-
|
|
1544
|
+
def _ascend_graph_analyse(self, offline_path=None):
|
|
1545
|
+
if offline_path or self._analyse_mode == ANALYSIS_SYNC_MODE:
|
|
1546
|
+
self._ascend_graph_analyse_inner(offline_path)
|
|
1547
|
+
else:
|
|
1548
|
+
MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
|
|
1549
|
+
|
|
1550
|
+
def _ascend_graph_analyse_inner(self, offline_path=None):
|
|
1551
|
+
"""Ascend graph mode analyse."""
|
|
1552
|
+
job_id = self._get_profiling_job_id(offline_path)
|
|
1262
1553
|
if not job_id:
|
|
1263
1554
|
return
|
|
1264
1555
|
logger.info("Profiling: job id is %s ", job_id)
|
|
1265
1556
|
|
|
1266
1557
|
self._check_output_path(output_path=self._output_path)
|
|
1267
1558
|
source_path = os.path.join(self._output_path, job_id)
|
|
1268
|
-
self._minddata_analyse(
|
|
1559
|
+
self._minddata_analyse()
|
|
1269
1560
|
if self._op_time:
|
|
1270
|
-
|
|
1561
|
+
mindstudio_profiler_output = os.path.abspath(
|
|
1562
|
+
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1563
|
+
flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
|
|
1271
1564
|
if not flag:
|
|
1272
1565
|
logger.warning('Current driver package not support all export mode, use single export mode, '
|
|
1273
1566
|
'this may lead to performance degradation. Suggest upgrading the driver package.')
|
|
1274
1567
|
ProfilerInfo.set_export_flag(flag)
|
|
1275
|
-
op_summary, op_statistic, steptrace
|
|
1276
|
-
|
|
1277
|
-
|
|
1568
|
+
op_summary, op_statistic, steptrace, steptrace_model \
|
|
1569
|
+
= _ascend_graph_msprof_analyse(mindstudio_profiler_output)
|
|
1570
|
+
if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
|
|
1571
|
+
not isinstance(op_statistic, np.ndarray) and not op_statistic:
|
|
1572
|
+
return
|
|
1573
|
+
kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
|
|
1574
|
+
launch_ops = self._get_kernel_op_map(op_summary, kernels)
|
|
1575
|
+
self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
|
|
1278
1576
|
graph_ids = np.unique(op_summary['Model ID']).tolist()
|
|
1279
1577
|
points = self._ascend_fpbp_analyse(op_summary, steptrace)
|
|
1280
1578
|
if len(graph_ids) == 1:
|
|
1281
1579
|
self._ascend_step_trace_analyse(steptrace)
|
|
1580
|
+
else:
|
|
1581
|
+
self._ascend_step_trace_analyse(steptrace_model)
|
|
1282
1582
|
if self._dynamic_status:
|
|
1283
1583
|
self._ascend_dynamic_net_analyse(op_summary)
|
|
1284
|
-
self._ascend_flops_analyse(op_summary)
|
|
1584
|
+
self._ascend_flops_analyse(op_summary, launch_ops)
|
|
1285
1585
|
self._ascend_graph_memory_analyse(points)
|
|
1286
|
-
self.
|
|
1586
|
+
self._ascend_ms_analyze(mindstudio_profiler_output)
|
|
1587
|
+
self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
|
|
1287
1588
|
self._ascend_graph_msadvisor_analyse(job_id)
|
|
1589
|
+
self._minddata_aicpu_analyse(self._output_path, job_id)
|
|
1288
1590
|
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1591
|
+
try:
|
|
1592
|
+
ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
|
|
1593
|
+
except RuntimeError as err:
|
|
1594
|
+
logger.error('Profilier simplify data failed, %s', str(err))
|
|
1289
1595
|
|
|
1290
1596
|
def _ascend_graph_start(self):
|
|
1291
1597
|
"""Ascend graph mode start profiling."""
|
|
1598
|
+
op_range_file = os.path.join(self._framework_path, "op_range_" + str(self._rank_id))
|
|
1599
|
+
if os.path.exists(op_range_file):
|
|
1600
|
+
os.remove(op_range_file)
|
|
1601
|
+
logger.info("Clear old op range filer.")
|
|
1292
1602
|
self._ascend_profiler.start()
|
|
1293
1603
|
|
|
1294
1604
|
def _gpu_analyse(self):
|
|
@@ -1303,12 +1613,14 @@ class Profiler:
|
|
|
1303
1613
|
else:
|
|
1304
1614
|
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
1305
1615
|
|
|
1616
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
1617
|
+
|
|
1306
1618
|
if self._has_started:
|
|
1307
1619
|
self.stop()
|
|
1308
1620
|
else:
|
|
1309
1621
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1310
1622
|
|
|
1311
|
-
self._minddata_analyse(
|
|
1623
|
+
self._minddata_analyse()
|
|
1312
1624
|
|
|
1313
1625
|
try:
|
|
1314
1626
|
self._analyse_step_relation_info()
|
|
@@ -1368,11 +1680,16 @@ class Profiler:
|
|
|
1368
1680
|
|
|
1369
1681
|
def _cpu_analyse(self):
|
|
1370
1682
|
"""Collect and analyse cpu performance data."""
|
|
1683
|
+
if self._has_started:
|
|
1684
|
+
self.stop()
|
|
1685
|
+
else:
|
|
1686
|
+
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1687
|
+
|
|
1371
1688
|
if not self._op_time:
|
|
1372
1689
|
return
|
|
1373
1690
|
try:
|
|
1374
1691
|
timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
|
|
1375
|
-
timeline_generator.init_timeline()
|
|
1692
|
+
timeline_generator.init_timeline(pretty=self._pretty_json)
|
|
1376
1693
|
timeline_generator.write_timeline(self._timeline_size_limit_byte)
|
|
1377
1694
|
timeline_generator.write_timeline_summary()
|
|
1378
1695
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
@@ -1462,27 +1779,19 @@ class Profiler:
|
|
|
1462
1779
|
"""Analyse memory usage data."""
|
|
1463
1780
|
integrator = Integrator(self._output_path, self._rank_id)
|
|
1464
1781
|
aicore_detail_data = integrator.get_aicore_detail_data()
|
|
1465
|
-
memory_parser = MemoryUsageParser(self._output_path, self._rank_id)
|
|
1782
|
+
memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1466
1783
|
memory_parser.init_memory_usage_info(aicore_detail_data, points)
|
|
1467
1784
|
memory_parser.write_memory_files()
|
|
1468
1785
|
|
|
1469
|
-
def _get_profiling_job_id(self):
|
|
1786
|
+
def _get_profiling_job_id(self, offline_path):
|
|
1470
1787
|
"""Get profiling job id, which was generated by ada service.
|
|
1471
1788
|
|
|
1472
1789
|
Returns:
|
|
1473
1790
|
str, profiling job id.
|
|
1474
1791
|
"""
|
|
1475
1792
|
|
|
1476
|
-
if
|
|
1477
|
-
|
|
1478
|
-
job_id = self._ascend_job_id.rstrip('/').split('/')[-1]
|
|
1479
|
-
if job_id.startswith('PROF'):
|
|
1480
|
-
device_dir = [dir for dir in os.listdir(self._ascend_job_id) if dir.startswith('device')]
|
|
1481
|
-
info_file_path = get_file_path(os.path.join(self._ascend_job_id, device_dir[0]), "info.json")
|
|
1482
|
-
training_rank_id, _ = self._parse_info_json(info_file_path)
|
|
1483
|
-
self._rank_id = int(training_rank_id)
|
|
1484
|
-
return os.path.join(job_id, device_dir[0])
|
|
1485
|
-
return job_id
|
|
1793
|
+
if offline_path:
|
|
1794
|
+
self._output_path = os.path.join(offline_path, 'profiler')
|
|
1486
1795
|
|
|
1487
1796
|
job_id = ""
|
|
1488
1797
|
job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
|
|
@@ -1491,16 +1800,12 @@ class Profiler:
|
|
|
1491
1800
|
job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
|
|
1492
1801
|
|
|
1493
1802
|
for dir_name in sorted_job_dirs:
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
|
|
1499
|
-
else:
|
|
1500
|
-
job_dir = os.path.join(self._output_path, dir_name)
|
|
1803
|
+
prof_dir = os.path.join(self._output_path, dir_name)
|
|
1804
|
+
device_dir = [dir for dir in os.listdir(prof_dir) \
|
|
1805
|
+
if dir.startswith('device') and os.path.isdir(os.path.join(prof_dir, dir))]
|
|
1806
|
+
job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
|
|
1501
1807
|
|
|
1502
|
-
|
|
1503
|
-
if start_file_path is None:
|
|
1808
|
+
if get_file_path(job_dir, "start_info") is None:
|
|
1504
1809
|
logger.warning("Find profiling job path %s, but host_start.log not exist, "
|
|
1505
1810
|
"profiler will ignore this job dir.", job_dir)
|
|
1506
1811
|
continue
|
|
@@ -1511,25 +1816,27 @@ class Profiler:
|
|
|
1511
1816
|
"profiler will ignore this job dir.", job_dir)
|
|
1512
1817
|
continue
|
|
1513
1818
|
|
|
1514
|
-
|
|
1515
|
-
|
|
1819
|
+
prof_rank_id, prof_device_id = self._parse_info_json(info_file_path)
|
|
1820
|
+
job_start_time = self._parse_job_start_time(prof_dir)
|
|
1516
1821
|
|
|
1517
|
-
if
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1822
|
+
if offline_path:
|
|
1823
|
+
if self._rank_id != prof_rank_id:
|
|
1824
|
+
continue
|
|
1825
|
+
self._start_time = int(job_start_time)
|
|
1826
|
+
else:
|
|
1827
|
+
if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
|
|
1828
|
+
logger.debug("Find profiling find job path %s, but not current training device id. "
|
|
1829
|
+
"Current training rank id %s, but job path rank id: %s, "
|
|
1830
|
+
"profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
|
|
1831
|
+
continue
|
|
1522
1832
|
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
|
|
1527
|
-
|
|
1833
|
+
if job_start_time < self._start_time:
|
|
1834
|
+
logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
|
|
1835
|
+
"start_time(%d), profiler will ignore this job dir.",
|
|
1836
|
+
job_dir, job_start_time, self._start_time)
|
|
1837
|
+
continue
|
|
1528
1838
|
|
|
1529
|
-
|
|
1530
|
-
job_id = os.path.join(dir_name, device_dir[0])
|
|
1531
|
-
else:
|
|
1532
|
-
job_id = dir_name
|
|
1839
|
+
job_id = os.path.join(dir_name, device_dir[0])
|
|
1533
1840
|
break
|
|
1534
1841
|
|
|
1535
1842
|
if not job_id:
|
|
@@ -1630,6 +1937,7 @@ class Profiler:
|
|
|
1630
1937
|
else:
|
|
1631
1938
|
output_path = kwargs.pop("output_path")
|
|
1632
1939
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1940
|
+
|
|
1633
1941
|
self._output_path = os.path.join(self._output_path, "profiler")
|
|
1634
1942
|
if not os.path.exists(self._output_path):
|
|
1635
1943
|
os.makedirs(self._output_path, exist_ok=True)
|
|
@@ -1637,6 +1945,10 @@ class Profiler:
|
|
|
1637
1945
|
else:
|
|
1638
1946
|
logger.warning("The target dir already exists. "
|
|
1639
1947
|
"There may be some old profiling data, and they will be rewritten in the end.")
|
|
1948
|
+
self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
|
|
1949
|
+
if not os.path.exists(self._framework_path):
|
|
1950
|
+
os.makedirs(self._framework_path, exist_ok=True)
|
|
1951
|
+
os.chmod(self._framework_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1640
1952
|
|
|
1641
1953
|
def _parser_kwargs(self, kwargs):
|
|
1642
1954
|
"""Parse kwargs vale."""
|
|
@@ -1678,6 +1990,21 @@ class Profiler:
|
|
|
1678
1990
|
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
|
|
1679
1991
|
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
|
|
1680
1992
|
self._profile_framework = "all"
|
|
1993
|
+
if not isinstance(self._data_simplification, bool):
|
|
1994
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
1995
|
+
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
1996
|
+
self._data_simplification = True
|
|
1997
|
+
|
|
1998
|
+
if not isinstance(self._data_simplification, bool):
|
|
1999
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
2000
|
+
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
2001
|
+
self._data_simplification = True
|
|
2002
|
+
|
|
2003
|
+
self._host_stack = kwargs.pop("host_stack", True)
|
|
2004
|
+
if not isinstance(self._host_stack, bool):
|
|
2005
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter host_stack must be bool, but got "
|
|
2006
|
+
f"type {type(self._host_stack)}, it will be set to True.")
|
|
2007
|
+
self._host_stack = True
|
|
1681
2008
|
|
|
1682
2009
|
def _host_info_analyse(self):
|
|
1683
2010
|
"""
|
|
@@ -1687,7 +2014,7 @@ class Profiler:
|
|
|
1687
2014
|
host_dir = os.path.join(self._output_path, 'host_info')
|
|
1688
2015
|
host_dir = validate_and_normalize_path(host_dir)
|
|
1689
2016
|
if not os.path.exists(host_dir):
|
|
1690
|
-
logger.
|
|
2017
|
+
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
1691
2018
|
return
|
|
1692
2019
|
csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
|
|
1693
2020
|
json_file_name = 'timeline_' + str(self._rank_id) + '.json'
|
|
@@ -1708,7 +2035,7 @@ def _offline_parse(offline_path):
|
|
|
1708
2035
|
host_dir = os.path.join(offline_path, 'profiler', 'host_info')
|
|
1709
2036
|
host_dir = validate_and_normalize_path(host_dir)
|
|
1710
2037
|
if not os.path.exists(host_dir):
|
|
1711
|
-
logger.
|
|
2038
|
+
logger.warning("Host info directory: %s not exist.", host_dir)
|
|
1712
2039
|
return
|
|
1713
2040
|
files = os.listdir(host_dir)
|
|
1714
2041
|
for file in files:
|