mindspore 2.2.14__cp39-cp39-win_amd64.whl → 2.4.0__cp39-cp39-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +8 -5
- mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +124 -25
- mindspore/_extends/builtin_operations.py +2 -1
- mindspore/_extends/graph_kernel/model/graph_parallel.py +16 -6
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +3 -16
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +16 -4
- mindspore/_extends/parallel_compile/akg_compiler/compiler.py +1 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +96 -0
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +2 -1
- mindspore/_extends/parallel_compile/akg_compiler/util.py +5 -2
- mindspore/_extends/parse/__init__.py +18 -14
- mindspore/_extends/parse/compile_config.py +299 -0
- mindspore/_extends/parse/namespace.py +2 -2
- mindspore/_extends/parse/parser.py +182 -68
- mindspore/_extends/parse/resources.py +45 -14
- mindspore/_extends/parse/standard_method.py +192 -252
- mindspore/{ops/_op_impl/tbe/atomic_addr_clean.py → _extends/pijit/__init__.py} +6 -16
- mindspore/_extends/pijit/pijit_func_white_list.py +669 -0
- mindspore/_extends/remote/kernel_build_server.py +2 -0
- mindspore/_profiler.py +30 -0
- mindspore/amp.py +67 -26
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/adasum.py +1 -1
- mindspore/boost/base.py +1 -1
- mindspore/boost/boost_cell_wrapper.py +2 -2
- mindspore/boost/grad_freeze.py +2 -2
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/boost/less_batch_normalization.py +9 -6
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +20 -7
- mindspore/common/_jit_fallback_utils.py +2 -3
- mindspore/common/_pijit_context.py +190 -0
- mindspore/common/_register_for_adapter.py +7 -0
- mindspore/common/_register_for_recompute.py +48 -0
- mindspore/common/_register_for_tensor.py +10 -10
- mindspore/common/_stub_tensor.py +7 -1
- mindspore/common/_tensor_overload.py +139 -0
- mindspore/common/_utils.py +5 -17
- mindspore/common/api.py +449 -129
- mindspore/common/auto_dynamic_shape.py +27 -14
- mindspore/common/dtype.py +17 -10
- mindspore/common/dump.py +8 -11
- mindspore/common/file_system.py +48 -0
- mindspore/common/generator.py +254 -0
- mindspore/common/hook_handle.py +65 -30
- mindspore/common/initializer.py +1 -1
- mindspore/common/jit_config.py +34 -14
- mindspore/common/lazy_inline.py +72 -19
- mindspore/common/mindir_util.py +12 -2
- mindspore/common/mutable.py +79 -14
- mindspore/common/no_inline.py +54 -0
- mindspore/common/np_dtype.py +25 -0
- mindspore/common/parameter.py +73 -21
- mindspore/common/recompute.py +292 -0
- mindspore/common/seed.py +9 -9
- mindspore/common/sparse_tensor.py +276 -24
- mindspore/common/symbol.py +122 -0
- mindspore/common/tensor.py +668 -514
- mindspore/communication/__init__.py +6 -11
- mindspore/communication/_comm_helper.py +43 -3
- mindspore/communication/comm_func.py +1395 -0
- mindspore/communication/management.py +117 -104
- mindspore/config/op_info.config +22 -54
- mindspore/context.py +455 -71
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +6 -6
- mindspore/dataset/audio/transforms.py +711 -158
- mindspore/dataset/callback/ds_callback.py +2 -2
- mindspore/dataset/core/config.py +7 -0
- mindspore/dataset/core/validator_helpers.py +7 -0
- mindspore/dataset/engine/cache_client.py +2 -2
- mindspore/dataset/engine/datasets.py +201 -116
- mindspore/dataset/engine/datasets_audio.py +14 -14
- mindspore/dataset/engine/datasets_standard_format.py +83 -3
- mindspore/dataset/engine/datasets_text.py +39 -39
- mindspore/dataset/engine/datasets_user_defined.py +230 -141
- mindspore/dataset/engine/datasets_vision.py +78 -74
- mindspore/dataset/engine/iterators.py +29 -0
- mindspore/dataset/engine/obs/util.py +7 -0
- mindspore/dataset/engine/offload.py +5 -7
- mindspore/dataset/engine/queue.py +138 -66
- mindspore/dataset/engine/serializer_deserializer.py +2 -2
- mindspore/dataset/engine/validators.py +41 -15
- mindspore/dataset/text/__init__.py +2 -5
- mindspore/dataset/text/transforms.py +408 -121
- mindspore/dataset/text/utils.py +9 -9
- mindspore/dataset/transforms/__init__.py +0 -3
- mindspore/dataset/transforms/transforms.py +261 -76
- mindspore/dataset/utils/browse_dataset.py +9 -9
- mindspore/dataset/utils/line_reader.py +2 -0
- mindspore/dataset/vision/__init__.py +7 -10
- mindspore/dataset/vision/c_transforms.py +10 -10
- mindspore/dataset/vision/py_transforms_util.py +1 -1
- mindspore/dataset/vision/transforms.py +2844 -549
- mindspore/dataset/vision/utils.py +161 -10
- mindspore/dataset/vision/validators.py +16 -3
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/{rewrite/ast_creator_register.py → experimental/es/__init__.py} +5 -20
- mindspore/experimental/es/embedding_service.py +883 -0
- mindspore/experimental/es/embedding_service_layer.py +581 -0
- mindspore/experimental/llm_boost/__init__.py +21 -0
- mindspore/experimental/llm_boost/atb/__init__.py +23 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
- mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
- mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
- mindspore/experimental/llm_boost/register.py +129 -0
- mindspore/experimental/llm_boost/utils.py +31 -0
- mindspore/experimental/optim/__init__.py +12 -2
- mindspore/experimental/optim/adadelta.py +161 -0
- mindspore/experimental/optim/adagrad.py +168 -0
- mindspore/experimental/optim/adam.py +35 -34
- mindspore/experimental/optim/adamax.py +170 -0
- mindspore/experimental/optim/adamw.py +124 -15
- mindspore/experimental/optim/asgd.py +153 -0
- mindspore/experimental/optim/lr_scheduler.py +66 -121
- mindspore/experimental/optim/nadam.py +157 -0
- mindspore/experimental/optim/optimizer.py +18 -8
- mindspore/experimental/optim/radam.py +194 -0
- mindspore/experimental/optim/rmsprop.py +154 -0
- mindspore/experimental/optim/rprop.py +164 -0
- mindspore/experimental/optim/sgd.py +28 -19
- mindspore/hal/__init__.py +40 -0
- mindspore/hal/_ascend.py +57 -0
- mindspore/hal/_base.py +57 -0
- mindspore/hal/_cpu.py +56 -0
- mindspore/hal/_gpu.py +57 -0
- mindspore/hal/contiguous_tensors_handle.py +175 -0
- mindspore/hal/device.py +356 -0
- mindspore/hal/event.py +179 -0
- mindspore/hal/memory.py +326 -0
- mindspore/hal/stream.py +357 -0
- mindspore/include/api/data_type.h +2 -2
- mindspore/include/api/dual_abi_helper.h +16 -3
- mindspore/include/api/model.h +4 -3
- mindspore/include/api/model_group.h +13 -1
- mindspore/include/api/status.h +14 -0
- mindspore/include/api/types.h +10 -10
- mindspore/include/c_api/model_c.h +173 -0
- mindspore/include/c_api/types_c.h +19 -0
- mindspore/include/dataset/config.h +2 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/include/dataset/execute.h +3 -5
- mindspore/include/dataset/vision.h +58 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +3 -3
- mindspore/mindrecord/__init__.py +5 -1
- mindspore/mindrecord/config.py +809 -0
- mindspore/mindrecord/filereader.py +25 -0
- mindspore/mindrecord/filewriter.py +138 -103
- mindspore/mindrecord/mindpage.py +40 -6
- mindspore/mindrecord/shardutils.py +3 -2
- mindspore/mindrecord/shardwriter.py +7 -0
- mindspore/mindrecord/tools/cifar100_to_mr.py +8 -13
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -15
- mindspore/mindrecord/tools/csv_to_mr.py +4 -9
- mindspore/mindrecord/tools/imagenet_to_mr.py +3 -8
- mindspore/mindrecord/tools/mnist_to_mr.py +7 -12
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -6
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +1586 -0
- mindspore/mint/distributed/__init__.py +31 -0
- mindspore/mint/distributed/distributed.py +254 -0
- mindspore/{rewrite/ast_transformers → mint/linalg}/__init__.py +9 -4
- mindspore/mint/nn/__init__.py +757 -0
- mindspore/mint/nn/functional.py +679 -0
- mindspore/mint/nn/layer/__init__.py +39 -0
- mindspore/mint/nn/layer/activation.py +133 -0
- mindspore/mint/nn/layer/normalization.py +477 -0
- mindspore/mint/nn/layer/pooling.py +110 -0
- mindspore/mint/optim/__init__.py +24 -0
- mindspore/mint/optim/adamw.py +206 -0
- mindspore/mint/special/__init__.py +63 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +73 -0
- mindspore/nn/cell.py +461 -323
- mindspore/nn/dynamic_lr.py +2 -2
- mindspore/nn/layer/activation.py +292 -135
- mindspore/nn/layer/basic.py +288 -83
- mindspore/nn/layer/channel_shuffle.py +3 -16
- mindspore/nn/layer/container.py +3 -3
- mindspore/nn/layer/conv.py +75 -66
- mindspore/nn/layer/embedding.py +221 -45
- mindspore/nn/layer/image.py +4 -7
- mindspore/nn/layer/math.py +1 -1
- mindspore/nn/layer/normalization.py +150 -68
- mindspore/nn/layer/padding.py +64 -87
- mindspore/nn/layer/pooling.py +175 -12
- mindspore/nn/layer/rnn_cells.py +6 -16
- mindspore/nn/layer/rnns.py +6 -5
- mindspore/nn/layer/thor_layer.py +1 -2
- mindspore/nn/layer/timedistributed.py +1 -1
- mindspore/nn/layer/transformer.py +55 -53
- mindspore/nn/learning_rate_schedule.py +6 -5
- mindspore/nn/loss/__init__.py +2 -2
- mindspore/nn/loss/loss.py +145 -88
- mindspore/nn/optim/__init__.py +2 -1
- mindspore/nn/optim/ada_grad.py +4 -2
- mindspore/nn/optim/adadelta.py +4 -2
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +102 -181
- mindspore/nn/optim/adamax.py +4 -2
- mindspore/nn/optim/adasum.py +3 -3
- mindspore/nn/optim/asgd.py +4 -2
- mindspore/nn/optim/ftrl.py +31 -61
- mindspore/nn/optim/lamb.py +5 -3
- mindspore/nn/optim/lars.py +2 -2
- mindspore/nn/optim/lazyadam.py +6 -4
- mindspore/nn/optim/momentum.py +13 -25
- mindspore/nn/optim/optimizer.py +6 -3
- mindspore/nn/optim/proximal_ada_grad.py +4 -2
- mindspore/nn/optim/rmsprop.py +9 -3
- mindspore/nn/optim/rprop.py +4 -2
- mindspore/nn/optim/sgd.py +5 -3
- mindspore/nn/optim/tft_wrapper.py +127 -0
- mindspore/nn/optim/thor.py +2 -2
- mindspore/nn/probability/distribution/_utils/custom_ops.py +2 -2
- mindspore/nn/probability/distribution/beta.py +2 -2
- mindspore/nn/probability/distribution/categorical.py +4 -6
- mindspore/nn/probability/distribution/cauchy.py +2 -2
- mindspore/nn/probability/distribution/exponential.py +2 -2
- mindspore/nn/probability/distribution/geometric.py +1 -1
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +1 -1
- mindspore/nn/probability/distribution/poisson.py +2 -2
- mindspore/nn/probability/distribution/uniform.py +2 -2
- mindspore/nn/reinforcement/_tensors_queue.py +13 -1
- mindspore/nn/wrap/__init__.py +2 -1
- mindspore/nn/wrap/cell_wrapper.py +46 -12
- mindspore/nn/wrap/grad_reducer.py +148 -8
- mindspore/nn/wrap/loss_scale.py +44 -7
- mindspore/numpy/__init__.py +2 -0
- mindspore/numpy/array_creations.py +67 -68
- mindspore/numpy/array_ops.py +70 -66
- mindspore/numpy/dtypes.py +3 -3
- mindspore/numpy/fft.py +966 -0
- mindspore/numpy/logic_ops.py +11 -10
- mindspore/numpy/math_ops.py +147 -152
- mindspore/numpy/utils.py +3 -0
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +9 -6
- mindspore/ops/_grad_experimental/grad_array_ops.py +4 -129
- mindspore/ops/_grad_experimental/grad_comm_ops.py +135 -36
- mindspore/ops/_grad_experimental/grad_math_ops.py +61 -298
- mindspore/ops/_grad_experimental/grad_nn_ops.py +0 -53
- mindspore/ops/_grad_experimental/grad_quant_ops.py +3 -3
- mindspore/ops/_grad_experimental/grad_sparse.py +1 -1
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/__init__.py +0 -1
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -0
- mindspore/ops/_op_impl/aicpu/generate_eod_mask.py +1 -1
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +1 -3
- mindspore/ops/_op_impl/aicpu/poisson.py +2 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -3
- mindspore/ops/_op_impl/cpu/adam.py +2 -2
- mindspore/ops/_op_impl/cpu/adam_weight_decay.py +3 -2
- mindspore/ops/_op_impl/cpu/maximum_grad.py +16 -14
- mindspore/ops/_op_impl/cpu/minimum_grad.py +8 -0
- mindspore/ops/_vmap/vmap_array_ops.py +162 -101
- mindspore/ops/_vmap/vmap_base.py +8 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +95 -9
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +143 -58
- mindspore/ops/_vmap/vmap_image_ops.py +70 -13
- mindspore/ops/_vmap/vmap_math_ops.py +147 -59
- mindspore/ops/_vmap/vmap_nn_ops.py +292 -117
- mindspore/ops/_vmap/vmap_other_ops.py +1 -1
- mindspore/ops/auto_generate/__init__.py +31 -0
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +309 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +252 -0
- mindspore/ops/auto_generate/gen_arg_handler.py +197 -0
- mindspore/ops/auto_generate/gen_extend_func.py +1701 -0
- mindspore/ops/auto_generate/gen_ops_def.py +8482 -0
- mindspore/ops/auto_generate/gen_ops_prim.py +16704 -0
- mindspore/ops/auto_generate/pyboost_inner_prim.py +549 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +201 -66
- mindspore/ops/composite/math_ops.py +10 -49
- mindspore/ops/composite/multitype_ops/_compile_utils.py +192 -618
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +25 -134
- mindspore/ops/composite/multitype_ops/add_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +8 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +8 -2
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +9 -3
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +8 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/pow_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +32 -21
- mindspore/ops/composite/multitype_ops/sub_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +6 -3
- mindspore/ops/deprecated.py +14 -3
- mindspore/ops/function/__init__.py +53 -11
- mindspore/ops/function/array_func.py +1269 -1821
- mindspore/ops/function/clip_func.py +19 -31
- mindspore/ops/function/debug_func.py +114 -5
- mindspore/ops/function/fft_func.py +44 -0
- mindspore/ops/function/grad/grad_func.py +30 -22
- mindspore/ops/function/image_func.py +27 -21
- mindspore/ops/function/linalg_func.py +35 -68
- mindspore/ops/function/math_func.py +1170 -2697
- mindspore/ops/function/nn_func.py +2116 -1128
- mindspore/ops/function/other_func.py +8 -8
- mindspore/ops/function/parameter_func.py +5 -93
- mindspore/ops/function/random_func.py +435 -113
- mindspore/ops/function/reshard_func.py +104 -0
- mindspore/ops/function/sparse_func.py +4 -4
- mindspore/ops/function/sparse_unary_func.py +9 -16
- mindspore/ops/function/spectral_func.py +1 -1
- mindspore/ops/function/vmap_func.py +16 -15
- mindspore/ops/functional.py +355 -346
- mindspore/ops/op_info_register.py +18 -45
- mindspore/ops/operations/__init__.py +38 -24
- mindspore/ops/operations/_grad_ops.py +21 -927
- mindspore/ops/operations/_infer_ops.py +19 -0
- mindspore/ops/operations/_inner_ops.py +173 -607
- mindspore/ops/operations/_rl_inner_ops.py +2 -2
- mindspore/ops/operations/_scalar_ops.py +5 -480
- mindspore/ops/operations/_sequence_ops.py +6 -36
- mindspore/ops/operations/_tensor_array.py +8 -8
- mindspore/ops/operations/array_ops.py +106 -2837
- mindspore/ops/operations/comm_ops.py +799 -127
- mindspore/ops/operations/custom_ops.py +124 -119
- mindspore/ops/operations/debug_ops.py +142 -41
- mindspore/ops/operations/image_ops.py +1 -217
- mindspore/ops/operations/inner_ops.py +5 -40
- mindspore/ops/operations/linalg_ops.py +1 -49
- mindspore/ops/operations/manually_defined/__init__.py +24 -0
- mindspore/ops/operations/manually_defined/_inner.py +73 -0
- mindspore/ops/operations/manually_defined/ops_def.py +2271 -0
- mindspore/ops/operations/math_ops.py +666 -4972
- mindspore/ops/operations/nn_ops.py +205 -2213
- mindspore/ops/operations/other_ops.py +60 -49
- mindspore/ops/operations/random_ops.py +50 -54
- mindspore/ops/operations/reshard_ops.py +53 -0
- mindspore/ops/operations/sparse_ops.py +4 -4
- mindspore/ops/primitive.py +216 -103
- mindspore/ops_generate/__init__.py +27 -0
- mindspore/ops_generate/arg_dtype_cast.py +252 -0
- mindspore/ops_generate/arg_handler.py +197 -0
- mindspore/ops_generate/gen_aclnn_implement.py +263 -0
- mindspore/ops_generate/gen_constants.py +36 -0
- mindspore/ops_generate/gen_ops.py +1099 -0
- mindspore/ops_generate/gen_ops_inner_prim.py +131 -0
- mindspore/ops_generate/gen_pyboost_func.py +1052 -0
- mindspore/ops_generate/gen_utils.py +209 -0
- mindspore/ops_generate/op_proto.py +145 -0
- mindspore/ops_generate/pyboost_utils.py +367 -0
- mindspore/ops_generate/template.py +261 -0
- mindspore/parallel/__init__.py +8 -4
- mindspore/parallel/_auto_parallel_context.py +100 -10
- mindspore/parallel/_cell_wrapper.py +99 -9
- mindspore/parallel/_cost_model_context.py +1 -1
- mindspore/parallel/_dp_allreduce_fusion.py +159 -159
- mindspore/parallel/_parallel_serialization.py +67 -23
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +99 -22
- mindspore/parallel/_transformer/__init__.py +1 -1
- mindspore/parallel/_transformer/layers.py +1 -1
- mindspore/parallel/_transformer/loss.py +1 -1
- mindspore/parallel/_transformer/moe.py +1 -1
- mindspore/parallel/_transformer/op_parallel_config.py +1 -1
- mindspore/parallel/_transformer/transformer.py +2 -2
- mindspore/parallel/_utils.py +173 -6
- mindspore/parallel/algo_parameter_config.py +8 -10
- mindspore/parallel/checkpoint_transform.py +204 -38
- mindspore/parallel/cluster/__init__.py +15 -0
- mindspore/parallel/cluster/process_entity/__init__.py +18 -0
- mindspore/parallel/cluster/process_entity/_api.py +352 -0
- mindspore/parallel/cluster/process_entity/_utils.py +101 -0
- mindspore/parallel/cluster/run.py +136 -0
- mindspore/parallel/mpi/__init__.py +1 -1
- mindspore/parallel/mpi/_mpi_config.py +1 -1
- mindspore/parallel/parameter_broadcast.py +151 -0
- mindspore/parallel/shard.py +279 -37
- mindspore/parallel/transform_safetensors.py +993 -0
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +4 -2
- mindspore/profiler/common/constant.py +29 -0
- mindspore/profiler/common/process_pool.py +41 -0
- mindspore/profiler/common/registry.py +47 -0
- mindspore/profiler/common/singleton.py +28 -0
- mindspore/profiler/common/util.py +153 -0
- mindspore/profiler/dynamic_profiler.py +694 -0
- mindspore/profiler/envprofiling.py +18 -20
- mindspore/{_extends/parallel_compile/tbe_compiler → profiler/parser/ascend_analysis}/__init__.py +1 -1
- mindspore/profiler/parser/ascend_analysis/constant.py +71 -0
- mindspore/profiler/parser/ascend_analysis/file_manager.py +180 -0
- mindspore/profiler/parser/ascend_analysis/function_event.py +185 -0
- mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +136 -0
- mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +131 -0
- mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +104 -0
- mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
- mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +123 -0
- mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +86 -0
- mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +75 -0
- mindspore/profiler/parser/ascend_cluster_generator.py +14 -9
- mindspore/profiler/parser/ascend_communicate_generator.py +0 -1
- mindspore/profiler/parser/ascend_flops_generator.py +20 -4
- mindspore/profiler/parser/ascend_hccl_generator.py +29 -278
- mindspore/profiler/parser/ascend_integrate_generator.py +42 -0
- mindspore/profiler/parser/ascend_memory_generator.py +185 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +148 -146
- mindspore/profiler/parser/ascend_msprof_generator.py +73 -283
- mindspore/profiler/parser/ascend_op_generator.py +92 -42
- mindspore/profiler/parser/ascend_timeline_generator.py +298 -133
- mindspore/profiler/parser/base_timeline_generator.py +25 -25
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
- mindspore/profiler/parser/framework_parser.py +4 -393
- mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
- mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
- mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
- mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
- mindspore/profiler/parser/integrator.py +3 -1
- mindspore/profiler/parser/memory_usage_parser.py +0 -154
- mindspore/profiler/parser/minddata_parser.py +72 -3
- mindspore/profiler/parser/profiler_info.py +94 -7
- mindspore/profiler/profiler.py +153 -0
- mindspore/profiler/profiling.py +631 -508
- mindspore/rewrite/__init__.py +2 -14
- mindspore/rewrite/api/node.py +122 -36
- mindspore/rewrite/api/pattern_engine.py +2 -3
- mindspore/rewrite/api/scoped_value.py +16 -15
- mindspore/rewrite/api/symbol_tree.py +45 -29
- mindspore/rewrite/ast_helpers/__init__.py +3 -6
- mindspore/rewrite/ast_helpers/ast_converter.py +143 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +48 -0
- mindspore/rewrite/ast_helpers/ast_flattener.py +268 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +160 -92
- mindspore/rewrite/common/__init__.py +1 -2
- mindspore/rewrite/common/config.py +24 -0
- mindspore/rewrite/common/{rewrite_elog.py → error_log.py} +39 -39
- mindspore/rewrite/{namer.py → common/namer.py} +63 -18
- mindspore/rewrite/common/namespace.py +118 -0
- mindspore/rewrite/node/__init__.py +5 -5
- mindspore/rewrite/node/call_function.py +23 -7
- mindspore/rewrite/node/cell_container.py +7 -3
- mindspore/rewrite/node/control_flow.py +53 -28
- mindspore/rewrite/node/node.py +212 -196
- mindspore/rewrite/node/node_manager.py +51 -22
- mindspore/rewrite/node/node_topological_manager.py +3 -23
- mindspore/rewrite/parsers/__init__.py +12 -0
- mindspore/rewrite/parsers/arguments_parser.py +8 -9
- mindspore/rewrite/parsers/assign_parser.py +637 -413
- mindspore/rewrite/parsers/attribute_parser.py +3 -4
- mindspore/rewrite/parsers/class_def_parser.py +115 -148
- mindspore/rewrite/parsers/constant_parser.py +5 -5
- mindspore/rewrite/parsers/container_parser.py +4 -6
- mindspore/rewrite/parsers/expr_parser.py +55 -0
- mindspore/rewrite/parsers/for_parser.py +31 -98
- mindspore/rewrite/parsers/function_def_parser.py +13 -5
- mindspore/rewrite/parsers/if_parser.py +28 -10
- mindspore/rewrite/parsers/module_parser.py +8 -182
- mindspore/rewrite/parsers/parser.py +1 -5
- mindspore/rewrite/parsers/parser_register.py +1 -1
- mindspore/rewrite/parsers/return_parser.py +5 -10
- mindspore/rewrite/parsers/while_parser.py +59 -0
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree/__init__.py +20 -0
- mindspore/rewrite/{symbol_tree.py → symbol_tree/symbol_tree.py} +705 -186
- mindspore/rewrite/{symbol_tree_builder.py → symbol_tree/symbol_tree_builder.py} +8 -8
- mindspore/rewrite/{symbol_tree_dumper.py → symbol_tree/symbol_tree_dumper.py} +4 -4
- mindspore/run_check/_check_version.py +40 -115
- mindspore/run_check/run_check.py +1 -1
- mindspore/safeguard/rewrite_obfuscation.py +597 -263
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +7 -5
- mindspore/train/_utils.py +204 -4
- mindspore/train/amp.py +335 -295
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/__init__.py +5 -2
- mindspore/train/callback/_backup_and_restore.py +5 -5
- mindspore/train/callback/_callback.py +4 -4
- mindspore/train/callback/_checkpoint.py +220 -43
- mindspore/train/callback/_cluster_monitor.py +201 -0
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_flops_collector.py +239 -0
- mindspore/train/callback/_landscape.py +15 -9
- mindspore/train/callback/_loss_monitor.py +5 -5
- mindspore/train/callback/_on_request_exit.py +136 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +2 -2
- mindspore/train/callback/_summary_collector.py +12 -12
- mindspore/train/callback/_tft_register.py +352 -0
- mindspore/train/callback/_time_monitor.py +3 -3
- mindspore/train/data_sink.py +6 -5
- mindspore/train/dataset_helper.py +66 -23
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/accuracy.py +7 -7
- mindspore/train/metrics/confusion_matrix.py +8 -6
- mindspore/train/metrics/cosine_similarity.py +6 -4
- mindspore/train/metrics/error.py +2 -2
- mindspore/train/metrics/metric.py +3 -3
- mindspore/train/metrics/perplexity.py +2 -1
- mindspore/train/metrics/roc.py +4 -4
- mindspore/train/metrics/topk.py +2 -2
- mindspore/train/mind_ir_pb2.py +116 -37
- mindspore/train/model.py +382 -76
- mindspore/train/serialization.py +787 -288
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +51 -28
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +21 -0
- mindspore/utils/utils.py +60 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/METADATA +8 -4
- mindspore-2.4.0.dist-info/RECORD +1406 -0
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +1 -0
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +0 -662
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +0 -377
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +0 -201
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +0 -515
- mindspore/gen_ops.py +0 -273
- mindspore/include/c_api/ms/abstract.h +0 -67
- mindspore/include/c_api/ms/attribute.h +0 -197
- mindspore/include/c_api/ms/base/handle_types.h +0 -43
- mindspore/include/c_api/ms/base/macros.h +0 -32
- mindspore/include/c_api/ms/base/status.h +0 -33
- mindspore/include/c_api/ms/base/types.h +0 -282
- mindspore/include/c_api/ms/context.h +0 -102
- mindspore/include/c_api/ms/graph.h +0 -160
- mindspore/include/c_api/ms/node.h +0 -606
- mindspore/include/c_api/ms/tensor.h +0 -161
- mindspore/include/c_api/ms/value.h +0 -84
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/nn/layer/flash_attention.py +0 -189
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
- mindspore/ops/_op_impl/cpu/concat.py +0 -39
- mindspore/ops/_op_impl/cpu/tensor_shape.py +0 -42
- mindspore/ops/_op_impl/tbe/__init__.py +0 -47
- mindspore/ops/_op_impl/tbe/abs.py +0 -38
- mindspore/ops/_op_impl/tbe/abs_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/abs_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/abs_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/accumulate_n_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/accumulate_n_v2_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/acos.py +0 -37
- mindspore/ops/_op_impl/tbe/acos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acos_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acos_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/acosh.py +0 -37
- mindspore/ops/_op_impl/tbe/acosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/acosh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/acosh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_max_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/act_ulq_clamp_min_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/acts_ulq.py +0 -45
- mindspore/ops/_op_impl/tbe/acts_ulq_input_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/adam_apply_one.py +0 -50
- mindspore/ops/_op_impl/tbe/adam_apply_one_assign.py +0 -53
- mindspore/ops/_op_impl/tbe/adam_apply_one_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_assign.py +0 -54
- mindspore/ops/_op_impl/tbe/adam_apply_one_with_decay_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/adaptive_max_pool2d.py +0 -37
- mindspore/ops/_op_impl/tbe/add.py +0 -42
- mindspore/ops/_op_impl/tbe/add_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/add_n.py +0 -39
- mindspore/ops/_op_impl/tbe/add_n_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/addcdiv.py +0 -41
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/addcmul.py +0 -43
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_ada_max.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_ada_max_ds.py +0 -69
- mindspore/ops/_op_impl/tbe/apply_adadelta.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_adadelta_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_adagrad_d_a.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_adagrad_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/apply_adagrad_v2_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_adam.py +0 -79
- mindspore/ops/_op_impl/tbe/apply_adam_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad.py +0 -60
- mindspore/ops/_op_impl/tbe/apply_adam_with_amsgrad_ds.py +0 -61
- mindspore/ops/_op_impl/tbe/apply_add_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_add_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop.py +0 -77
- mindspore/ops/_op_impl/tbe/apply_centered_rms_prop_ds.py +0 -78
- mindspore/ops/_op_impl/tbe/apply_ftrl.py +0 -67
- mindspore/ops/_op_impl/tbe/apply_ftrl_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/apply_gradient_descent.py +0 -44
- mindspore/ops/_op_impl/tbe/apply_gradient_descent_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/apply_keras_momentum.py +0 -49
- mindspore/ops/_op_impl/tbe/apply_momentum.py +0 -64
- mindspore/ops/_op_impl/tbe/apply_momentum_ds.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign.py +0 -65
- mindspore/ops/_op_impl/tbe/apply_power_sign_ds.py +0 -66
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad.py +0 -57
- mindspore/ops/_op_impl/tbe/apply_proximal_adagrad_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent.py +0 -54
- mindspore/ops/_op_impl/tbe/apply_proximal_gradient_descent_ds.py +0 -55
- mindspore/ops/_op_impl/tbe/apply_rms_prop.py +0 -52
- mindspore/ops/_op_impl/tbe/approximate_equal.py +0 -39
- mindspore/ops/_op_impl/tbe/approximate_equal_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_max.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_max_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/arg_min.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_v2_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/arg_min_with_value.py +0 -38
- mindspore/ops/_op_impl/tbe/arg_min_with_value_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/asin.py +0 -37
- mindspore/ops/_op_impl/tbe/asin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asin_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asin_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/asinh.py +0 -37
- mindspore/ops/_op_impl/tbe/asinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/asinh_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/asinh_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/assign.py +0 -79
- mindspore/ops/_op_impl/tbe/assign_add.py +0 -59
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +0 -60
- mindspore/ops/_op_impl/tbe/assign_ds.py +0 -80
- mindspore/ops/_op_impl/tbe/assign_sub.py +0 -55
- mindspore/ops/_op_impl/tbe/assign_sub_ds.py +0 -56
- mindspore/ops/_op_impl/tbe/atan.py +0 -37
- mindspore/ops/_op_impl/tbe/atan2.py +0 -38
- mindspore/ops/_op_impl/tbe/atan2_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/atan_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/atan_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/atan_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/atanh.py +0 -37
- mindspore/ops/_op_impl/tbe/atanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/avg_pool.py +0 -43
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +0 -45
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell.py +0 -57
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad.py +0 -50
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -51
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_input_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_weight_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul.py +0 -42
- mindspore/ops/_op_impl/tbe/batch_matmul_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/batch_matmul_v2.py +0 -47
- mindspore/ops/_op_impl/tbe/batch_to_space.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +0 -41
- mindspore/ops/_op_impl/tbe/batchnorm.py +0 -58
- mindspore/ops/_op_impl/tbe/batchnorm_grad.py +0 -58
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +0 -42
- mindspore/ops/_op_impl/tbe/bessel_i0e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i0e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bessel_i1e.py +0 -37
- mindspore/ops/_op_impl/tbe/bessel_i1e_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add.py +0 -38
- mindspore/ops/_op_impl/tbe/bias_add_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +0 -53
- mindspore/ops/_op_impl/tbe/binary_cross_entropy.py +0 -39
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/binary_cross_entropy_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bitwise_and.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_and_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_or.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_or_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bitwise_xor.py +0 -39
- mindspore/ops/_op_impl/tbe/bitwise_xor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_infer.py +0 -43
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/bn_inference.py +0 -50
- mindspore/ops/_op_impl/tbe/bn_training_reduce.py +0 -38
- mindspore/ops/_op_impl/tbe/bn_training_reduce_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/bn_training_reduce_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -52
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -53
- mindspore/ops/_op_impl/tbe/bn_training_update_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/bn_training_update_grad_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/bn_training_update_v2.py +0 -48
- mindspore/ops/_op_impl/tbe/bn_training_update_v3.py +0 -51
- mindspore/ops/_op_impl/tbe/bounding_box_decode.py +0 -41
- mindspore/ops/_op_impl/tbe/bounding_box_decode_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/bounding_box_encode.py +0 -38
- mindspore/ops/_op_impl/tbe/broadcast_to.py +0 -40
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cast.py +0 -55
- mindspore/ops/_op_impl/tbe/cast_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/cdist.py +0 -38
- mindspore/ops/_op_impl/tbe/cdist_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/ceil.py +0 -37
- mindspore/ops/_op_impl/tbe/ceil_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/celu.py +0 -39
- mindspore/ops/_op_impl/tbe/centralization.py +0 -39
- mindspore/ops/_op_impl/tbe/check_valid.py +0 -38
- mindspore/ops/_op_impl/tbe/check_valid_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_norm_no_div_sum_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/clip_by_value.py +0 -41
- mindspore/ops/_op_impl/tbe/clip_by_value_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/concat.py +0 -40
- mindspore/ops/_op_impl/tbe/concat_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/confusion_matrix.py +0 -63
- mindspore/ops/_op_impl/tbe/confusion_mul_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/confusion_softmax_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/confusion_transpose_d.py +0 -39
- mindspore/ops/_op_impl/tbe/conv2d.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_filter_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv2d_backprop_input_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/conv2d_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/conv2d_transpose.py +0 -48
- mindspore/ops/_op_impl/tbe/conv3d.py +0 -45
- mindspore/ops/_op_impl/tbe/conv3d_backprop_filter.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_backprop_input.py +0 -42
- mindspore/ops/_op_impl/tbe/conv3d_transpose.py +0 -47
- mindspore/ops/_op_impl/tbe/conv3d_transpose_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/cos.py +0 -37
- mindspore/ops/_op_impl/tbe/cos_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/cosh.py +0 -37
- mindspore/ops/_op_impl/tbe/cosh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -42
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/cum_sum.py +0 -42
- mindspore/ops/_op_impl/tbe/cum_sum_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/cummin.py +0 -41
- mindspore/ops/_op_impl/tbe/cumprod.py +0 -42
- mindspore/ops/_op_impl/tbe/data_format_dim_map.py +0 -38
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +0 -45
- mindspore/ops/_op_impl/tbe/deformable_offsets_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/depth_to_space_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +0 -44
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_filter.py +0 -41
- mindspore/ops/_op_impl/tbe/depthwise_conv2d_backprop_input.py +0 -41
- mindspore/ops/_op_impl/tbe/diag.py +0 -38
- mindspore/ops/_op_impl/tbe/diag_part.py +0 -38
- mindspore/ops/_op_impl/tbe/dilation.py +0 -40
- mindspore/ops/_op_impl/tbe/div.py +0 -41
- mindspore/ops/_op_impl/tbe/div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/div_no_nan.py +0 -41
- mindspore/ops/_op_impl/tbe/div_no_nan_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/dropout_do_mask.py +0 -38
- mindspore/ops/_op_impl/tbe/dropout_do_mask_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +0 -34
- mindspore/ops/_op_impl/tbe/dynamic_gru_v2.py +0 -95
- mindspore/ops/_op_impl/tbe/dynamic_rnn.py +0 -82
- mindspore/ops/_op_impl/tbe/elu.py +0 -38
- mindspore/ops/_op_impl/tbe/elu_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/elu_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/elu_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/equal.py +0 -42
- mindspore/ops/_op_impl/tbe/equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/erf.py +0 -37
- mindspore/ops/_op_impl/tbe/erf_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfc.py +0 -37
- mindspore/ops/_op_impl/tbe/erfc_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/erfinv.py +0 -36
- mindspore/ops/_op_impl/tbe/exp.py +0 -40
- mindspore/ops/_op_impl/tbe/exp_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/expand_dims.py +0 -38
- mindspore/ops/_op_impl/tbe/expm1.py +0 -37
- mindspore/ops/_op_impl/tbe/expm1_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/extract_image_patches.py +0 -41
- mindspore/ops/_op_impl/tbe/extract_volume_patches.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel.py +0 -39
- mindspore/ops/_op_impl/tbe/fake_quant_with_min_max_vars_per_channel_gradient.py +0 -43
- mindspore/ops/_op_impl/tbe/fast_gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/fast_gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/fast_gelu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/fast_gelu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/fill.py +0 -56
- mindspore/ops/_op_impl/tbe/fill_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/flatten.py +0 -48
- mindspore/ops/_op_impl/tbe/floor.py +0 -37
- mindspore/ops/_op_impl/tbe/floor_div.py +0 -41
- mindspore/ops/_op_impl/tbe/floor_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/floor_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/floor_mod.py +0 -39
- mindspore/ops/_op_impl/tbe/floor_mod_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/fused_dbn_dw.py +0 -52
- mindspore/ops/_op_impl/tbe/fused_mul_add.py +0 -38
- mindspore/ops/_op_impl/tbe/fused_mul_add_n.py +0 -48
- mindspore/ops/_op_impl/tbe/fused_mul_add_n_l2loss.py +0 -53
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum.py +0 -57
- mindspore/ops/_op_impl/tbe/fused_mul_apply_momentum_extern.py +0 -67
- mindspore/ops/_op_impl/tbe/gather_nd.py +0 -52
- mindspore/ops/_op_impl/tbe/gather_nd_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/gather_v2_ds.py +0 -68
- mindspore/ops/_op_impl/tbe/gelu.py +0 -37
- mindspore/ops/_op_impl/tbe/gelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/gelu_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/gelu_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/ger.py +0 -43
- mindspore/ops/_op_impl/tbe/ger_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/greater.py +0 -43
- mindspore/ops/_op_impl/tbe/greater_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/greater_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/gru_v2_hidden_grad_cell.py +0 -52
- mindspore/ops/_op_impl/tbe/hard_swish.py +0 -37
- mindspore/ops/_op_impl/tbe/hard_swish_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/hard_swish_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/hard_swish_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/histogram_fixed_width.py +0 -40
- mindspore/ops/_op_impl/tbe/hshrink.py +0 -33
- mindspore/ops/_op_impl/tbe/hshrink_grad.py +0 -37
- mindspore/ops/_op_impl/tbe/hsigmoid.py +0 -45
- mindspore/ops/_op_impl/tbe/hsigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/ifmr.py +0 -47
- mindspore/ops/_op_impl/tbe/ifmr_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/im2col.py +0 -42
- mindspore/ops/_op_impl/tbe/in_top_k.py +0 -37
- mindspore/ops/_op_impl/tbe/inplace_add.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +0 -46
- mindspore/ops/_op_impl/tbe/inplace_sub.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update.py +0 -39
- mindspore/ops/_op_impl/tbe/inplace_update_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/inv.py +0 -38
- mindspore/ops/_op_impl/tbe/inv_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/inv_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/inv_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/invert.py +0 -37
- mindspore/ops/_op_impl/tbe/invert_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/iou.py +0 -38
- mindspore/ops/_op_impl/tbe/iou_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/is_close.py +0 -40
- mindspore/ops/_op_impl/tbe/kl_div_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/kl_div_loss_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/kl_div_loss_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/l2_loss.py +0 -36
- mindspore/ops/_op_impl/tbe/l2_loss_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/l2_normalize.py +0 -38
- mindspore/ops/_op_impl/tbe/l2_normalize_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/lamb_apply_optimizer_assign.py +0 -55
- mindspore/ops/_op_impl/tbe/lamb_apply_weight_assign.py +0 -42
- mindspore/ops/_op_impl/tbe/lamb_next_mv.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_mv_with_decay.py +0 -59
- mindspore/ops/_op_impl/tbe/lamb_next_right.py +0 -44
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr.py +0 -48
- mindspore/ops/_op_impl/tbe/lamb_update_with_lr_v2.py +0 -44
- mindspore/ops/_op_impl/tbe/lars_update.py +0 -50
- mindspore/ops/_op_impl/tbe/lars_update_ds.py +0 -51
- mindspore/ops/_op_impl/tbe/layer_norm.py +0 -46
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/layer_norm_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/layer_norm_grad.py +0 -48
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop.py +0 -43
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2.py +0 -45
- mindspore/ops/_op_impl/tbe/layer_norm_x_backprop_v2_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/lerp.py +0 -38
- mindspore/ops/_op_impl/tbe/less.py +0 -41
- mindspore/ops/_op_impl/tbe/less_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/less_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/less_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/log.py +0 -40
- mindspore/ops/_op_impl/tbe/log1p.py +0 -37
- mindspore/ops/_op_impl/tbe/log1p_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/log_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/logical_and.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_and_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logical_not.py +0 -36
- mindspore/ops/_op_impl/tbe/logical_not_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or.py +0 -37
- mindspore/ops/_op_impl/tbe/logical_or_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax.py +0 -37
- mindspore/ops/_op_impl/tbe/logsoftmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/logsoftmax_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/lp_norm.py +0 -40
- mindspore/ops/_op_impl/tbe/lp_norm_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn.py +0 -41
- mindspore/ops/_op_impl/tbe/lrn_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/lstm_input_grad.py +0 -51
- mindspore/ops/_op_impl/tbe/masked_fill.py +0 -40
- mindspore/ops/_op_impl/tbe/masked_fill_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/matmul.py +0 -53
- mindspore/ops/_op_impl/tbe/matmul_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/matmul_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/matrix_diag.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_diag_part.py +0 -45
- mindspore/ops/_op_impl/tbe/matrix_set_diag.py +0 -46
- mindspore/ops/_op_impl/tbe/max_pool.py +0 -39
- mindspore/ops/_op_impl/tbe/max_pool3d.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool3d_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool3d_grad_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/max_pool_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/max_pool_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_grad_with_argmax.py +0 -41
- mindspore/ops/_op_impl/tbe/max_pool_grad_with_argmax.py +0 -42
- mindspore/ops/_op_impl/tbe/max_pool_with_argmax.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum.py +0 -39
- mindspore/ops/_op_impl/tbe/maximum_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/maximum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/maximum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mem_set.py +0 -38
- mindspore/ops/_op_impl/tbe/minimum.py +0 -40
- mindspore/ops/_op_impl/tbe/minimum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/minimum_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/minimum_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/mish.py +0 -37
- mindspore/ops/_op_impl/tbe/mod.py +0 -41
- mindspore/ops/_op_impl/tbe/mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/mul.py +0 -37
- mindspore/ops/_op_impl/tbe/mul_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/mul_no_nan.py +0 -39
- mindspore/ops/_op_impl/tbe/mul_no_nan_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/multilabel_margin_loss.py +0 -39
- mindspore/ops/_op_impl/tbe/neg.py +0 -39
- mindspore/ops/_op_impl/tbe/neg_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/new_im2col.py +0 -40
- mindspore/ops/_op_impl/tbe/nll_loss.py +0 -41
- mindspore/ops/_op_impl/tbe/nll_loss_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/nms_with_mask.py +0 -39
- mindspore/ops/_op_impl/tbe/not_equal.py +0 -41
- mindspore/ops/_op_impl/tbe/not_equal_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/npu_alloc_float_status.py +0 -34
- mindspore/ops/_op_impl/tbe/npu_clear_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status.py +0 -35
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +0 -35
- mindspore/ops/_op_impl/tbe/one_hot.py +0 -48
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -45
- mindspore/ops/_op_impl/tbe/ones_like.py +0 -40
- mindspore/ops/_op_impl/tbe/ones_like_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling.py +0 -40
- mindspore/ops/_op_impl/tbe/p_s_r_o_i_pooling_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/pack.py +0 -58
- mindspore/ops/_op_impl/tbe/pack_ds.py +0 -59
- mindspore/ops/_op_impl/tbe/pad_d.py +0 -40
- mindspore/ops/_op_impl/tbe/pad_d_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/parallel_concat.py +0 -70
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear.py +0 -45
- mindspore/ops/_op_impl/tbe/parallel_resize_bilinear_grad.py +0 -44
- mindspore/ops/_op_impl/tbe/pdist.py +0 -36
- mindspore/ops/_op_impl/tbe/pooling.py +0 -46
- mindspore/ops/_op_impl/tbe/population_count.py +0 -38
- mindspore/ops/_op_impl/tbe/pow.py +0 -41
- mindspore/ops/_op_impl/tbe/pow_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/prelu.py +0 -37
- mindspore/ops/_op_impl/tbe/prelu_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/prelu_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/range.py +0 -39
- mindspore/ops/_op_impl/tbe/real_div.py +0 -38
- mindspore/ops/_op_impl/tbe/real_div_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reciprocal.py +0 -36
- mindspore/ops/_op_impl/tbe/reciprocal_ds.py +0 -37
- mindspore/ops/_op_impl/tbe/reciprocal_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/reciprocal_grad_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_all.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_all_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_any.py +0 -38
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_max.py +0 -43
- mindspore/ops/_op_impl/tbe/reduce_max_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_mean.py +0 -40
- mindspore/ops/_op_impl/tbe/reduce_mean_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_min.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_min_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_prod.py +0 -42
- mindspore/ops/_op_impl/tbe/reduce_prod_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/reduce_std.py +0 -44
- mindspore/ops/_op_impl/tbe/reduce_sum.py +0 -39
- mindspore/ops/_op_impl/tbe/reduce_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6.py +0 -38
- mindspore/ops/_op_impl/tbe/relu6_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/relu6_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/relu6_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/relu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/relu_grad_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_grad_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/relu_v2.py +0 -40
- mindspore/ops/_op_impl/tbe/relu_v2_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/renorm.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_bilinear.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_bilinear_grad.py +0 -41
- mindspore/ops/_op_impl/tbe/resize_bilinear_v2.py +0 -43
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/resize_nearest_neighbor_grad_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/reverse_v2_d.py +0 -37
- mindspore/ops/_op_impl/tbe/rint.py +0 -37
- mindspore/ops/_op_impl/tbe/rint_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/roi_align.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roi_align_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/roll.py +0 -42
- mindspore/ops/_op_impl/tbe/round.py +0 -38
- mindspore/ops/_op_impl/tbe/round_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/rsqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/rsqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/rsqrt_grad.py +0 -40
- mindspore/ops/_op_impl/tbe/rsqrt_grad_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_add.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_div.py +0 -46
- mindspore/ops/_op_impl/tbe/scatter_max.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_min.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_mul.py +0 -44
- mindspore/ops/_op_impl/tbe/scatter_nd.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +0 -45
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +0 -41
- mindspore/ops/_op_impl/tbe/scatter_nd_ds.py +0 -49
- mindspore/ops/_op_impl/tbe/scatter_nd_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_nd_update.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_nd_update_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add.py +0 -39
- mindspore/ops/_op_impl/tbe/scatter_non_aliasing_add_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/scatter_sub.py +0 -47
- mindspore/ops/_op_impl/tbe/scatter_sub_ds.py +0 -48
- mindspore/ops/_op_impl/tbe/scatter_update.py +0 -43
- mindspore/ops/_op_impl/tbe/select.py +0 -38
- mindspore/ops/_op_impl/tbe/select_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/selu.py +0 -39
- mindspore/ops/_op_impl/tbe/selu_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sgd.py +0 -62
- mindspore/ops/_op_impl/tbe/sigmoid.py +0 -37
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad.py +0 -42
- mindspore/ops/_op_impl/tbe/sigmoid_cross_entropy_with_logits_grad_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/sigmoid_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sigmoid_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/sigmoid_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/sign.py +0 -38
- mindspore/ops/_op_impl/tbe/sign_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/sin.py +0 -37
- mindspore/ops/_op_impl/tbe/sin_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sinh.py +0 -37
- mindspore/ops/_op_impl/tbe/sinh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/slice.py +0 -58
- mindspore/ops/_op_impl/tbe/smooth_l1_loss.py +0 -45
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad.py +0 -46
- mindspore/ops/_op_impl/tbe/smooth_l1_loss_grad_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/soft_margin_loss.py +0 -38
- mindspore/ops/_op_impl/tbe/soft_margin_loss_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/soft_shrink.py +0 -36
- mindspore/ops/_op_impl/tbe/soft_shrink_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax.py +0 -37
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_cross_entropy_with_logits_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/softmax_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softmax_grad_ext.py +0 -42
- mindspore/ops/_op_impl/tbe/softmax_v2_with_dropout_do_mask_v3.py +0 -39
- mindspore/ops/_op_impl/tbe/softplus.py +0 -37
- mindspore/ops/_op_impl/tbe/softplus_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad.py +0 -38
- mindspore/ops/_op_impl/tbe/softplus_grad_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/softsign.py +0 -37
- mindspore/ops/_op_impl/tbe/softsign_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sort.py +0 -38
- mindspore/ops/_op_impl/tbe/sort_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/space_to_batch.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +0 -38
- mindspore/ops/_op_impl/tbe/space_to_depth.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_adadelta.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad.py +0 -45
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_ds.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2.py +0 -46
- mindspore/ops/_op_impl/tbe/sparse_apply_adagrad_v2_ds.py +0 -47
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d.py +0 -53
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_d_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_ftrl_v2.py +0 -50
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad.py +0 -66
- mindspore/ops/_op_impl/tbe/sparse_apply_proximal_adagrad_ds.py +0 -67
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop.py +0 -57
- mindspore/ops/_op_impl/tbe/sparse_apply_r_m_s_prop_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/sparse_gather_v2.py +0 -56
- mindspore/ops/_op_impl/tbe/sparse_gather_v2_ds.py +0 -58
- mindspore/ops/_op_impl/tbe/split_d.py +0 -38
- mindspore/ops/_op_impl/tbe/split_d_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/split_v.py +0 -39
- mindspore/ops/_op_impl/tbe/splitv.py +0 -39
- mindspore/ops/_op_impl/tbe/sqrt.py +0 -37
- mindspore/ops/_op_impl/tbe/sqrt_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/sqrt_grad.py +0 -43
- mindspore/ops/_op_impl/tbe/sqrt_grad_ds.py +0 -44
- mindspore/ops/_op_impl/tbe/square.py +0 -38
- mindspore/ops/_op_impl/tbe/square_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_all.py +0 -40
- mindspore/ops/_op_impl/tbe/square_sum_all_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/square_sum_v1.py +0 -38
- mindspore/ops/_op_impl/tbe/square_sum_v1_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/square_sum_v2.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference.py +0 -39
- mindspore/ops/_op_impl/tbe/squared_difference_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/squeeze.py +0 -37
- mindspore/ops/_op_impl/tbe/strided_read.py +0 -38
- mindspore/ops/_op_impl/tbe/strided_slice_d.py +0 -44
- mindspore/ops/_op_impl/tbe/strided_slice_ds.py +0 -71
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +0 -51
- mindspore/ops/_op_impl/tbe/strided_slice_grad_ds.py +0 -57
- mindspore/ops/_op_impl/tbe/strided_write.py +0 -38
- mindspore/ops/_op_impl/tbe/sub.py +0 -39
- mindspore/ops/_op_impl/tbe/sub_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tan.py +0 -38
- mindspore/ops/_op_impl/tbe/tan_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh.py +0 -37
- mindspore/ops/_op_impl/tbe/tanh_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/tanh_grad.py +0 -39
- mindspore/ops/_op_impl/tbe/tanh_grad_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/tensor_move.py +0 -49
- mindspore/ops/_op_impl/tbe/tensor_move_ds.py +0 -50
- mindspore/ops/_op_impl/tbe/tensor_scatter_update.py +0 -41
- mindspore/ops/_op_impl/tbe/tile.py +0 -37
- mindspore/ops/_op_impl/tbe/tile_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k.py +0 -42
- mindspore/ops/_op_impl/tbe/top_k_ds.py +0 -43
- mindspore/ops/_op_impl/tbe/trans_data.py +0 -167
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +0 -180
- mindspore/ops/_op_impl/tbe/trans_data_rnn.py +0 -44
- mindspore/ops/_op_impl/tbe/transpose.py +0 -60
- mindspore/ops/_op_impl/tbe/transpose_d.py +0 -47
- mindspore/ops/_op_impl/tbe/transpose_nod.py +0 -60
- mindspore/ops/_op_impl/tbe/trunc.py +0 -39
- mindspore/ops/_op_impl/tbe/truncate_div.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_div_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/truncate_mod.py +0 -41
- mindspore/ops/_op_impl/tbe/truncate_mod_ds.py +0 -42
- mindspore/ops/_op_impl/tbe/unpack.py +0 -38
- mindspore/ops/_op_impl/tbe/unpack_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/unsorted_segment_max.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_max_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_min.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_min_ds.py +0 -40
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod.py +0 -49
- mindspore/ops/_op_impl/tbe/unsorted_segment_prod_ds.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +0 -38
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +0 -41
- mindspore/ops/_op_impl/tbe/wts_arq.py +0 -40
- mindspore/ops/_op_impl/tbe/xdivy.py +0 -38
- mindspore/ops/_op_impl/tbe/xdivy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/xlogy.py +0 -38
- mindspore/ops/_op_impl/tbe/xlogy_ds.py +0 -39
- mindspore/ops/_op_impl/tbe/zeros_like.py +0 -41
- mindspore/ops/_op_impl/tbe/zeros_like_ds.py +0 -42
- mindspore/ops/_tracefunc.py +0 -241
- mindspore/ops/arg_dtype_cast.py +0 -54
- mindspore/ops/silent_check.py +0 -162
- mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
- mindspore/profiler/parser/msadvisor_parser.py +0 -240
- mindspore/rewrite/api/tree_node_helper.py +0 -60
- mindspore/rewrite/ast_helpers/ast_creator.py +0 -115
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +0 -267
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +0 -228
- mindspore/rewrite/namespace.py +0 -53
- mindspore-2.2.14.dist-info/RECORD +0 -1924
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
- {mindspore-2.2.14.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -17,12 +17,13 @@ import os
|
|
|
17
17
|
import stat
|
|
18
18
|
import time
|
|
19
19
|
import json
|
|
20
|
+
from json import JSONDecodeError
|
|
20
21
|
import glob
|
|
21
|
-
import subprocess
|
|
22
|
-
import csv
|
|
23
22
|
import socket
|
|
24
|
-
import
|
|
23
|
+
import multiprocessing
|
|
25
24
|
from enum import Enum
|
|
25
|
+
from typing import List
|
|
26
|
+
from sys import getsizeof
|
|
26
27
|
import numpy as np
|
|
27
28
|
|
|
28
29
|
from mindspore import log as logger, context
|
|
@@ -30,34 +31,42 @@ from mindspore.context import get_auto_parallel_context
|
|
|
30
31
|
from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
|
|
31
32
|
import mindspore._c_expression as c_expression
|
|
32
33
|
import mindspore._c_dataengine as cde
|
|
34
|
+
from mindspore._c_expression import _framework_profiler_enable_mi
|
|
33
35
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
|
34
|
-
ProfilerIOException, ProfilerException, ProfilerRawFileException
|
|
36
|
+
ProfilerIOException, ProfilerException, ProfilerRawFileException, ProfilerParamTypeErrorException
|
|
35
37
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
|
|
36
38
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
|
|
37
|
-
from mindspore.profiler.common.util import get_file_path
|
|
39
|
+
from mindspore.profiler.common.util import get_file_path, ProfilerPathManager
|
|
40
|
+
from mindspore.profiler.common.process_pool import MultiProcessPool
|
|
38
41
|
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
39
42
|
from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
|
|
40
43
|
from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
44
|
+
from mindspore.profiler.parser.ascend_analysis.function_event import CANNEvent
|
|
41
45
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
42
46
|
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
43
|
-
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
44
47
|
from mindspore.profiler.parser.minddata_parser import MinddataParser
|
|
45
48
|
from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
|
|
46
49
|
from mindspore.profiler.parser.minddata_pipeline_parser import \
|
|
47
50
|
MinddataPipelineParser
|
|
48
|
-
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
|
|
49
|
-
from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
51
|
+
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser
|
|
50
52
|
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
51
53
|
from mindspore.common.api import _pynative_executor
|
|
52
54
|
from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
|
|
53
|
-
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
55
|
+
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
54
56
|
from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
|
|
55
57
|
from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
|
|
56
58
|
from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
|
|
57
59
|
from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
|
|
58
60
|
from mindspore.profiler.parser.ascend_cluster_generator import AscendClusterGenerator
|
|
59
|
-
from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
61
|
+
from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
60
62
|
from mindspore.profiler.parser.ascend_communicate_generator import AscendCommunicationGenerator
|
|
63
|
+
from mindspore.profiler.parser.ascend_memory_generator import AscendMemoryGenerator
|
|
64
|
+
from mindspore.profiler.parser.ascend_integrate_generator import AscendIntegrateGenerator
|
|
65
|
+
from mindspore.profiler.parser.ascend_analysis.file_manager import FileManager
|
|
66
|
+
from mindspore.profiler.parser.ascend_analysis.path_manager import PathManager
|
|
67
|
+
from mindspore.profiler.parser.ascend_analysis.constant import Constant
|
|
68
|
+
from mindspore.profiler.common.util import timeit
|
|
69
|
+
|
|
61
70
|
|
|
62
71
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
63
72
|
|
|
@@ -68,10 +77,24 @@ AICORE_METRICS_DICT = {
|
|
|
68
77
|
3: "MemoryL0",
|
|
69
78
|
4: "ResourceConflictRatio",
|
|
70
79
|
5: "MemoryUB",
|
|
80
|
+
6: "L2Cache",
|
|
71
81
|
-1: "None"
|
|
72
82
|
}
|
|
73
83
|
|
|
74
84
|
|
|
85
|
+
class ModelTraingMode(Enum):
|
|
86
|
+
PYNATIVE = 0
|
|
87
|
+
GRAPH = 1
|
|
88
|
+
KERNEL_BY_KERNEL = 2
|
|
89
|
+
UNKNOWN = 3
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ProfilerLevel(Enum):
|
|
93
|
+
Level0 = "Level0"
|
|
94
|
+
Level1 = "Level1"
|
|
95
|
+
Level2 = "Level2"
|
|
96
|
+
|
|
97
|
+
|
|
75
98
|
class DeviceSupportParam(Enum):
|
|
76
99
|
"""The device target enum."""
|
|
77
100
|
CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
|
|
@@ -81,16 +104,20 @@ class DeviceSupportParam(Enum):
|
|
|
81
104
|
]
|
|
82
105
|
ASCEND = [
|
|
83
106
|
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
84
|
-
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', '
|
|
85
|
-
'profile_framework'
|
|
107
|
+
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'hbm_ddr', 'pcie', 'op_time',
|
|
108
|
+
'ascend_job_id', 'profile_framework', 'with_stack', 'profiler_level', 'data_simplification'
|
|
86
109
|
]
|
|
87
110
|
|
|
88
111
|
|
|
89
112
|
ALWAYS_VALID_PARAM = [
|
|
90
113
|
'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
|
|
91
|
-
'ascend_job_id', 'op_time', 'profile_framework'
|
|
114
|
+
'hbm_ddr', 'pcie', 'ascend_job_id', 'op_time', 'profile_framework', 'profiler_level'
|
|
92
115
|
]
|
|
93
116
|
|
|
117
|
+
ANALYSIS_ASYNC_MODE = 'async'
|
|
118
|
+
ANALYSIS_SYNC_MODE = 'sync'
|
|
119
|
+
DEFAULT_MODEL_ID = 4294967295
|
|
120
|
+
|
|
94
121
|
|
|
95
122
|
def _environment_check():
|
|
96
123
|
if c_expression.security.enable_security():
|
|
@@ -136,182 +163,36 @@ def _calculate_dataset_item(row, execution_time_map, ts_map):
|
|
|
136
163
|
logger.warning("Can not map the start time for item: %s.", row)
|
|
137
164
|
|
|
138
165
|
|
|
139
|
-
def
|
|
140
|
-
r"""
|
|
141
|
-
Parse the host info into timeline file, so as to show on UI.
|
|
142
|
-
|
|
143
|
-
Args:
|
|
144
|
-
input_file: the original host_info file, in csv format.
|
|
145
|
-
output_file: the output file, in csv format.
|
|
146
|
-
"""
|
|
147
|
-
input_file = validate_and_normalize_path(input_file)
|
|
148
|
-
# execution_time_map is used to store the ExecutionCalculator for each stage.
|
|
149
|
-
execution_time_map = {}
|
|
150
|
-
# ts_map is used to store the start time of each event_stage_tid_pid.
|
|
151
|
-
ts_map = {}
|
|
152
|
-
with open(input_file, 'r') as f:
|
|
153
|
-
for row in csv.DictReader(f):
|
|
154
|
-
try:
|
|
155
|
-
module_name = row['module_name']
|
|
156
|
-
if module_name != 'Dataset':
|
|
157
|
-
continue
|
|
158
|
-
_calculate_dataset_item(row, execution_time_map, ts_map)
|
|
159
|
-
except KeyError as e:
|
|
160
|
-
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
161
|
-
continue
|
|
162
|
-
if ts_map:
|
|
163
|
-
logger.warning("Only start time is record for these items:")
|
|
164
|
-
for k, v in ts_map.items():
|
|
165
|
-
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
166
|
-
output_file = validate_and_normalize_path(output_file)
|
|
167
|
-
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
|
|
168
|
-
modes = stat.S_IWUSR | stat.S_IRUSR
|
|
169
|
-
with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
|
|
170
|
-
csv_writer = csv.writer(f)
|
|
171
|
-
csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
|
|
172
|
-
for _, v in execution_time_map.items():
|
|
173
|
-
csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
|
|
174
|
-
os.chmod(output_file, modes)
|
|
175
|
-
logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
def _extract_timeline_item(row, time_line, ts_map):
|
|
179
|
-
"""Process one row, try to extract a timeline item."""
|
|
180
|
-
start_end = row['start_end']
|
|
181
|
-
event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
|
|
182
|
-
# map start and end, put the mapped event into timeline.
|
|
183
|
-
if start_end == '1' and event_stage_tid_pid in ts_map:
|
|
184
|
-
title = row['event'] + '::' + row['stage']
|
|
185
|
-
event = {'name': title, 'cat': row['module_name']}
|
|
186
|
-
ts_end = int(row['time_stamp(us)'])
|
|
187
|
-
ts = ts_map[event_stage_tid_pid]
|
|
188
|
-
event['ts'] = ts
|
|
189
|
-
event['dur'] = ts_end - ts
|
|
190
|
-
event['ph'] = 'X'
|
|
191
|
-
event['pid'] = row['pid']
|
|
192
|
-
event['tid'] = row['tid']
|
|
193
|
-
event['args'] = {'parent_pid': row['parent_pid']}
|
|
194
|
-
time_line.append(event)
|
|
195
|
-
del ts_map[event_stage_tid_pid]
|
|
196
|
-
elif start_end == '0':
|
|
197
|
-
ts = int(row['time_stamp(us)'])
|
|
198
|
-
ts_map[event_stage_tid_pid] = ts
|
|
199
|
-
# Put the instance event into timeline.
|
|
200
|
-
elif start_end == '2':
|
|
201
|
-
title = row['event'] + '::' + row['stage']
|
|
202
|
-
event = {
|
|
203
|
-
'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
|
|
204
|
-
'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
|
|
205
|
-
}
|
|
206
|
-
time_line.append(event)
|
|
207
|
-
else:
|
|
208
|
-
logger.warning("Can not map the start time for item: %s.", row)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
|
|
212
|
-
r"""
|
|
213
|
-
Parse the host info into timeline file, so as to show on UI.
|
|
214
|
-
|
|
215
|
-
Args:
|
|
216
|
-
input_file: the original host_info file, in csv format.
|
|
217
|
-
output_timeline_file: the output timeline file, in json format.
|
|
218
|
-
output_memory_file: the output memory_usage file, in csv format.
|
|
219
|
-
is_develop_user: some data only shown to develop users, other users no need to analyse it.
|
|
220
|
-
"""
|
|
221
|
-
input_file = validate_and_normalize_path(input_file)
|
|
222
|
-
time_line = []
|
|
223
|
-
# ts_map is used to store the start time of each event_stage_tid_pid
|
|
224
|
-
ts_map = {}
|
|
225
|
-
memory_header = [
|
|
226
|
-
'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
|
|
227
|
-
'memory_usage(kB)', 'time_stamp(us)'
|
|
228
|
-
]
|
|
229
|
-
memory_info = []
|
|
230
|
-
with open(input_file, 'r') as f:
|
|
231
|
-
for row in csv.DictReader(f):
|
|
232
|
-
try:
|
|
233
|
-
level = row['level']
|
|
234
|
-
if level == '0' and not is_develop_user:
|
|
235
|
-
continue
|
|
236
|
-
if int(row['time_stamp(us)']) > 0:
|
|
237
|
-
_extract_timeline_item(row, time_line, ts_map)
|
|
238
|
-
if int(row['memory_usage(kB)']) > 0:
|
|
239
|
-
memory_info.append(row)
|
|
240
|
-
except KeyError as e:
|
|
241
|
-
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
242
|
-
continue
|
|
243
|
-
if memory_info:
|
|
244
|
-
with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
|
|
245
|
-
csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
|
|
246
|
-
csv_writer.writeheader()
|
|
247
|
-
for item in memory_info:
|
|
248
|
-
csv_writer.writerow(item)
|
|
249
|
-
os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
|
|
250
|
-
else:
|
|
251
|
-
logger.warning("No memory_usage is record in file: %s", input_file)
|
|
252
|
-
|
|
253
|
-
if ts_map:
|
|
254
|
-
logger.warning("Only start time is record for these items:")
|
|
255
|
-
for k, v in ts_map.items():
|
|
256
|
-
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
257
|
-
last_dash = k.rfind('_')
|
|
258
|
-
if last_dash == -1:
|
|
259
|
-
logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
|
|
260
|
-
continue
|
|
261
|
-
second_last_dash = k.rfind('_', 0, last_dash - 1)
|
|
262
|
-
if second_last_dash == -1:
|
|
263
|
-
logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
|
|
264
|
-
continue
|
|
265
|
-
pid = k[last_dash + 1:]
|
|
266
|
-
tid = k[second_last_dash + 1: last_dash]
|
|
267
|
-
title = k[:second_last_dash]
|
|
268
|
-
unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
|
|
269
|
-
time_line.append(unfinished_timeline)
|
|
270
|
-
|
|
271
|
-
if time_line:
|
|
272
|
-
timeline_file = validate_and_normalize_path(output_timeline_file)
|
|
273
|
-
with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
274
|
-
json.dump(time_line, json_file)
|
|
275
|
-
os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
|
|
276
|
-
else:
|
|
277
|
-
logger.warning("No valid time_stamp is record in file: %s", input_file)
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
def _ascend_graph_msprof_generator(source_path, model_iteration_dict):
|
|
166
|
+
def _ascend_graph_msprof_generator(mindstudio_profiler_output, model_iteration_dict):
|
|
281
167
|
"""Executing the msprof export mode."""
|
|
282
168
|
try:
|
|
283
169
|
ProfilerInfo.set_export_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
284
|
-
msprof_exporter = AscendMsprofExporter(
|
|
170
|
+
msprof_exporter = AscendMsprofExporter(mindstudio_profiler_output)
|
|
285
171
|
flag = msprof_exporter.export(model_iteration_dict)
|
|
286
172
|
ProfilerInfo.set_export_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
287
173
|
return flag
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
logger.warning(err.message)
|
|
174
|
+
except (ProfilerException, TimeoutError, FileNotFoundError, RuntimeError) as err:
|
|
175
|
+
logger.warning(str(err))
|
|
291
176
|
return False
|
|
292
177
|
|
|
293
178
|
|
|
294
|
-
def _ascend_graph_msprof_analyse(
|
|
179
|
+
def _ascend_graph_msprof_analyse(mindstudio_profiler_output):
|
|
295
180
|
"""
|
|
296
181
|
Ascend graph model msprof data analyse.
|
|
297
182
|
|
|
298
183
|
Returns:
|
|
299
|
-
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace
|
|
184
|
+
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace, df_step_trace_model
|
|
300
185
|
"""
|
|
301
|
-
|
|
302
|
-
df_op_statistic = []
|
|
303
|
-
df_step_trace = []
|
|
186
|
+
res = ([], [], [], [])
|
|
304
187
|
try:
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
msprof_analyser = AscendMsprofDataGeneratorOld(os.path.join(source_path, 'summary'))
|
|
309
|
-
df_op_summary, df_op_statistic, df_step_trace = msprof_analyser.parse()
|
|
188
|
+
msprof_analyser = AscendMsprofDataGenerator(mindstudio_profiler_output)
|
|
189
|
+
res = msprof_analyser.parse()
|
|
190
|
+
return res
|
|
310
191
|
except ProfilerException as err:
|
|
311
192
|
logger.warning(err.message)
|
|
312
193
|
finally:
|
|
313
194
|
pass
|
|
314
|
-
return
|
|
195
|
+
return res
|
|
315
196
|
|
|
316
197
|
|
|
317
198
|
class Profiler:
|
|
@@ -320,24 +201,33 @@ class Profiler:
|
|
|
320
201
|
MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
|
|
321
202
|
and use Profiler.analyse() to stop profiling and analyse the results.
|
|
322
203
|
Users can visualize the results using the `MindSpore Insight
|
|
323
|
-
<https://www.mindspore.cn/mindinsight/docs/en/
|
|
204
|
+
<https://www.mindspore.cn/mindinsight/docs/en/master/index.html>`_ tool.
|
|
324
205
|
Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
|
|
325
206
|
correspondence, cluster, etc data analysis.
|
|
326
207
|
|
|
327
208
|
Args:
|
|
328
209
|
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
210
|
+
profiler_level (ProfilerLevel, optional): (Ascend only) The level of profiling. Default: ``None``.
|
|
211
|
+
|
|
212
|
+
- ProfilerLevel.Level0: Leanest level of profiling data collection, collects information about the elapsed
|
|
213
|
+
time of the computational operators on the NPU and communication large operator information.
|
|
214
|
+
- ProfilerLevel.Level1: Collect more CANN layer AscendCL data and AICore performance metrics and
|
|
215
|
+
communication mini operator information based on Level0.
|
|
216
|
+
- ProfilerLevel.Level2: Collect GE and Runtime information in CANN layer on top of Level1
|
|
217
|
+
|
|
329
218
|
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
|
|
330
219
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
331
|
-
a multi devices training,collect when True. Setting this parameter has no effect during single
|
|
220
|
+
a multi devices training,collect when True. Setting this parameter has no effect during single card
|
|
332
221
|
training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
|
|
333
222
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
334
|
-
When using this parameter, `op_time` must be set to True.
|
|
223
|
+
When using this parameter, `op_time` must be set to True. Collecting operator memory data when the graph
|
|
224
|
+
compilation level is O2 requires collecting from the first step. Default: ``False`` .
|
|
335
225
|
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
336
|
-
Default value: ``
|
|
226
|
+
Default value: ``False`` .
|
|
337
227
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
338
228
|
data collection based on conditions. Default: ``True`` .
|
|
339
229
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
340
|
-
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5],
|
|
230
|
+
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5, 6],
|
|
341
231
|
Default: ``0`` , the data items contained in each metric are as follows:
|
|
342
232
|
|
|
343
233
|
- -1: Does not collect AICORE data.
|
|
@@ -348,9 +238,15 @@ class Profiler:
|
|
|
348
238
|
- 3: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
|
|
349
239
|
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
350
240
|
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
241
|
+
- 6: L2Cache contains write_cache_hit, write_cache_miss_allocate, r0_read_cache_hit, r1_read_cache_hit etc.
|
|
242
|
+
This function only support Atlas A2 training series products.
|
|
351
243
|
|
|
352
244
|
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
353
245
|
Default: ``False`` .
|
|
246
|
+
hbm_ddr (bool, optional): (Ascend only) Whether to collect On-Chip Memory/DDR read and write rate data,
|
|
247
|
+
collect when True. Default: ``False`` .
|
|
248
|
+
pcie (bool, optional): (Ascend only) Whether to collect PCIe bandwidth data, collect when True.
|
|
249
|
+
Default: ``False`` .
|
|
354
250
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
355
251
|
Default: ``True`` .
|
|
356
252
|
|
|
@@ -360,19 +256,32 @@ class Profiler:
|
|
|
360
256
|
- False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
|
|
361
257
|
This method can reduce the impact of adding profiler on overall training time.
|
|
362
258
|
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
363
|
-
Default value: ``
|
|
259
|
+
Default value: ``False`` .
|
|
364
260
|
timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
|
|
365
261
|
When using this parameter, `op_time` must be set to True. Default value: ``500`` .
|
|
366
262
|
profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
|
|
367
|
-
["all", "time",
|
|
368
|
-
|
|
369
|
-
Default:
|
|
263
|
+
["all", "time", None], When is not set to None, it would collect the host profiler data. When using this
|
|
264
|
+
parameter, the op_time parameter must be enabled.
|
|
265
|
+
Default: None.
|
|
370
266
|
|
|
371
|
-
- "all": Record
|
|
372
|
-
- "time":
|
|
373
|
-
- "memory": Only record host memory usage.
|
|
267
|
+
- "all": Record host timestamp.
|
|
268
|
+
- "time": The same as "all".
|
|
374
269
|
- None: Not record host information.
|
|
375
|
-
|
|
270
|
+
data_simplification (bool, optional): (Ascend only) Whether to remove FRAMEWORK data and other redundant data.
|
|
271
|
+
If set to True, only the delivery of profiler and the original performance data in the PROF_XXX
|
|
272
|
+
directory are retained to save disk space.
|
|
273
|
+
Default value: ``True`` .
|
|
274
|
+
with_stack (bool, optional): (Ascend) Whether to collect frame host call stack data on the Python side. This
|
|
275
|
+
data is presented in the form of a flame graph in the timeline. When using this parameter, the op_time and
|
|
276
|
+
profile_framework parameters must be enabled. Default value: ``False`` .
|
|
277
|
+
analyse_only (bool, optional): (Ascend/GPU) Whether to parse only performance data and not collect performance
|
|
278
|
+
data. This parameter is experimental parameter and does not need to be set by the user.
|
|
279
|
+
Default value: ``False`` .
|
|
280
|
+
rank_id (int, optional): (Ascend/GPU) Set the rank id during parsing. This parameter is
|
|
281
|
+
experimental parameter and does not need to be set by the user. Default value: ``0`` .
|
|
282
|
+
env_enable (bool, optional): (Ascend/GPU) Whether to enable the collection of environment variables.
|
|
283
|
+
This parameter is experimental parameter and does not need to be set by the user.
|
|
284
|
+
Default value: ``False`` .
|
|
376
285
|
Raises:
|
|
377
286
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
378
287
|
MindSpore cannot parse the generated ascend_job_id directory structure.
|
|
@@ -386,6 +295,7 @@ class Profiler:
|
|
|
386
295
|
>>> from mindspore import nn
|
|
387
296
|
>>> import mindspore.dataset as ds
|
|
388
297
|
>>> from mindspore import Profiler
|
|
298
|
+
>>> from mindspore.profiler import ProfilerLevel
|
|
389
299
|
>>>
|
|
390
300
|
>>> class Net(nn.Cell):
|
|
391
301
|
... def __init__(self):
|
|
@@ -411,7 +321,7 @@ class Profiler:
|
|
|
411
321
|
...
|
|
412
322
|
... # Init Profiler
|
|
413
323
|
... # Note that the Profiler should be initialized before model.train
|
|
414
|
-
... profiler = Profiler()
|
|
324
|
+
... profiler = Profiler(profiler_level=ProfilerLevel.Level0)
|
|
415
325
|
...
|
|
416
326
|
... # Train Model
|
|
417
327
|
... net = Net()
|
|
@@ -420,16 +330,16 @@ class Profiler:
|
|
|
420
330
|
... # Profiler end
|
|
421
331
|
... profiler.analyse()
|
|
422
332
|
"""
|
|
423
|
-
|
|
424
|
-
_hwts_output_filename_target = "output_format_data_hwts_"
|
|
425
|
-
_opcompute_output_filename_target = "output_op_compute_time_"
|
|
426
|
-
_aicpu_op_output_filename_target = "output_data_preprocess_aicpu_"
|
|
427
|
-
_has_analysed = False
|
|
428
333
|
_has_initialized = False
|
|
429
334
|
_ascend_profiling_options = ""
|
|
430
335
|
_ascend_job_id = ""
|
|
336
|
+
ENABLE_STATUS = "on"
|
|
337
|
+
DISABLE_STATUS = "off"
|
|
431
338
|
|
|
432
339
|
def __init__(self, **kwargs):
|
|
340
|
+
if os.getenv("PROFILING_MODE"):
|
|
341
|
+
raise RuntimeError("Profiling is already enabled by PROFILING_MODE env.")
|
|
342
|
+
|
|
433
343
|
self._dev_id = None
|
|
434
344
|
self._cpu_profiler = None
|
|
435
345
|
self._gpu_profiler = None
|
|
@@ -445,13 +355,19 @@ class Profiler:
|
|
|
445
355
|
self._rank_size = 1
|
|
446
356
|
self._rank_id = 0
|
|
447
357
|
self._ascend_profiler = None
|
|
358
|
+
self.metadata = {}
|
|
359
|
+
self.max_str_len = 4096
|
|
360
|
+
self.max_meta_size = 50 * 1024
|
|
448
361
|
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
449
362
|
self._parallel_strategy = True
|
|
450
363
|
self._model_iteration_dict = None
|
|
364
|
+
self._analyse_mode = ANALYSIS_SYNC_MODE
|
|
451
365
|
_environment_check()
|
|
452
366
|
# default aicore_metrics type is ArithmeticUtilization
|
|
453
367
|
self._aicore_metrics_id = 0
|
|
454
|
-
self._l2_cache =
|
|
368
|
+
self._l2_cache = self.DISABLE_STATUS
|
|
369
|
+
self._hbm_ddr = self.DISABLE_STATUS
|
|
370
|
+
self._pcie = self.DISABLE_STATUS
|
|
455
371
|
self._data_process = True
|
|
456
372
|
self._op_time = True
|
|
457
373
|
self._profile_communication = False
|
|
@@ -462,25 +378,33 @@ class Profiler:
|
|
|
462
378
|
self._sync_enable = True
|
|
463
379
|
self._stop_time = 0
|
|
464
380
|
self._dynamic_status = False
|
|
465
|
-
self._profile_framework =
|
|
381
|
+
self._profile_framework = None
|
|
466
382
|
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
383
|
+
self.profiler_level = None
|
|
467
384
|
self._pretty_json = False
|
|
385
|
+
self._analyse_only = kwargs.get("analyse_only", False)
|
|
386
|
+
self._data_simplification = kwargs.get("data_simplification", True)
|
|
387
|
+
self._with_stack = False
|
|
468
388
|
if self._msprof_enable:
|
|
469
389
|
return
|
|
470
|
-
self._start_time = int(time.time() *
|
|
390
|
+
self._start_time = int(time.time() * 1e6) # us
|
|
391
|
+
self._monotonic_time = int(time.monotonic() * 1e6) # us
|
|
471
392
|
logger.info("Profiling: start time: %d", self._start_time)
|
|
472
393
|
if kwargs.get("env_enable"):
|
|
473
394
|
self._profiler_init(kwargs)
|
|
474
395
|
return
|
|
475
|
-
|
|
476
396
|
Profiler._has_initialized = True
|
|
477
397
|
# get device_id and device_target
|
|
478
|
-
self.
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
self.
|
|
398
|
+
if self._analyse_only:
|
|
399
|
+
self._device_target = DeviceTarget.ASCEND.value
|
|
400
|
+
self._rank_id = kwargs.get("rank_id", 0)
|
|
401
|
+
else:
|
|
402
|
+
self._get_devid_rankid_and_devtarget()
|
|
403
|
+
self._parser_kwargs(kwargs)
|
|
404
|
+
self._get_output_path(kwargs)
|
|
405
|
+
self._decide_device_target(kwargs)
|
|
406
|
+
if self.start_profile:
|
|
407
|
+
self.start()
|
|
484
408
|
|
|
485
409
|
@staticmethod
|
|
486
410
|
def _check_output_path(output_path):
|
|
@@ -496,9 +420,9 @@ class Profiler:
|
|
|
496
420
|
return output_path
|
|
497
421
|
|
|
498
422
|
@staticmethod
|
|
499
|
-
def
|
|
423
|
+
def _parse_job_start_time(prof_dir):
|
|
500
424
|
"""
|
|
501
|
-
|
|
425
|
+
Get the start time of the job.
|
|
502
426
|
|
|
503
427
|
Args:
|
|
504
428
|
input_file (str): The file path of the host start log file.
|
|
@@ -506,34 +430,83 @@ class Profiler:
|
|
|
506
430
|
Returns:
|
|
507
431
|
str, job start time.
|
|
508
432
|
"""
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
433
|
+
try:
|
|
434
|
+
AscendMsprofExporter.check_msprof_env()
|
|
435
|
+
script_path = AscendMsprofExporter.get_msprof_info_path()
|
|
436
|
+
if not script_path:
|
|
437
|
+
logger.warning("Can`t find get_msprof_info.py path, use single-export mode instead.")
|
|
438
|
+
return None
|
|
439
|
+
logger.info("get_msprof_info.py path is : %s", script_path)
|
|
440
|
+
host_dir = os.path.join(prof_dir, 'host')
|
|
441
|
+
cmd = ['python', script_path, '-dir', host_dir]
|
|
442
|
+
outs, _ = AscendMsprofExporter.run_cmd(cmd)
|
|
443
|
+
if not outs:
|
|
444
|
+
logger.warning('Can`t find the msprof info result')
|
|
445
|
+
return None
|
|
446
|
+
result = json.loads(outs)
|
|
447
|
+
if result.get('status', 1) == 1:
|
|
448
|
+
return None
|
|
449
|
+
jor_start_time = result.get('data', {}).get('collection_info', {}).get('Collection start time', None)
|
|
450
|
+
if jor_start_time is not None:
|
|
451
|
+
return float(jor_start_time.strip())
|
|
452
|
+
return None
|
|
453
|
+
except (RuntimeError, JSONDecodeError, AttributeError, TimeoutError, FileNotFoundError) as err:
|
|
454
|
+
logger.warning('Get the drvVersion error, use single-export mode instead. detail : %s', err)
|
|
455
|
+
return None
|
|
456
|
+
|
|
457
|
+
@classmethod
|
|
458
|
+
def offline_analyse(cls, path: str, pretty=False, step_list=None, data_simplification=True):
|
|
518
459
|
"""
|
|
519
|
-
|
|
460
|
+
Analyze training performance data offline, which is invoked after performance data collection is completed.
|
|
461
|
+
|
|
520
462
|
Args:
|
|
521
|
-
|
|
463
|
+
path (str): The profiling data path which need to be analyzed offline.
|
|
464
|
+
There needs to be a profiler directory in this path.
|
|
465
|
+
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
466
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
467
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
468
|
+
data_simplification (bool, optional): Whether to enable data simplification. Default: ``True``.
|
|
522
469
|
|
|
523
|
-
|
|
524
|
-
|
|
470
|
+
Examples:
|
|
471
|
+
>>> from mindspore import Profiler
|
|
472
|
+
>>> Profiler.offline_analyse("./profiling_path")
|
|
525
473
|
"""
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
474
|
+
real_path = os.path.realpath(path)
|
|
475
|
+
PathManager.check_input_directory_path(real_path)
|
|
476
|
+
profiler_parent_path_list = PathManager.get_profiler_parent_path_list(real_path)
|
|
477
|
+
if not isinstance(data_simplification, bool):
|
|
478
|
+
logger.warning(f"For offline_analyse, the parameter data_simplification must be bool, "
|
|
479
|
+
f"but got type {type(data_simplification)}, it will be set to True.")
|
|
480
|
+
data_simplification = True
|
|
481
|
+
if not profiler_parent_path_list:
|
|
482
|
+
raise ProfilerPathErrorException(f'The provided path "{path}" must have a "profiler" directory for '
|
|
483
|
+
f'single-device profiler data, or multiple subdirectories each containing '
|
|
484
|
+
f'a "profiler" directory for multi-device profiler data. ')
|
|
485
|
+
# get rank id
|
|
486
|
+
rank_list = []
|
|
487
|
+
for parent_path in profiler_parent_path_list:
|
|
488
|
+
profiler_path = os.path.join(parent_path, Constant.PROFILER_DIR)
|
|
489
|
+
rank_id = ProfilerInfo.get_rank_id(profiler_path)
|
|
533
490
|
if int(rank_id) < 0:
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
491
|
+
logger.error(f"Unable to get a valid rank ID in the profiler directory: {profiler_path}")
|
|
492
|
+
rank_list.append(rank_id)
|
|
493
|
+
# start offline analyse
|
|
494
|
+
if len(profiler_parent_path_list) == 1:
|
|
495
|
+
PathManager.check_directory_path_writeable(profiler_parent_path_list[0])
|
|
496
|
+
profiler = cls(analyse_only=True, rank_id=rank_list[0], data_simplification=data_simplification)
|
|
497
|
+
profiler.analyse(profiler_parent_path_list[0], pretty, step_list)
|
|
498
|
+
else:
|
|
499
|
+
# Multiprocess Parsing
|
|
500
|
+
multiprocessing.set_start_method("fork", force=True)
|
|
501
|
+
process_number = min(Constant.DEFAULT_PROCESS_NUMBER, len(profiler_parent_path_list))
|
|
502
|
+
pool = multiprocessing.Pool(processes=process_number)
|
|
503
|
+
for idx, profiler_parent_path in enumerate(profiler_parent_path_list):
|
|
504
|
+
PathManager.check_directory_path_writeable(profiler_parent_path)
|
|
505
|
+
profiling_parser = cls(analyse_only=True, rank_id=rank_list[idx],
|
|
506
|
+
data_simplification=data_simplification)
|
|
507
|
+
pool.apply_async(profiling_parser.analyse, args=(profiler_parent_path, pretty, step_list))
|
|
508
|
+
pool.close()
|
|
509
|
+
pool.join()
|
|
537
510
|
|
|
538
511
|
def op_analyse(self, op_name, device_id=None):
|
|
539
512
|
"""
|
|
@@ -560,12 +533,12 @@ class Profiler:
|
|
|
560
533
|
>>> # Profiler init.
|
|
561
534
|
>>> profiler = Profiler()
|
|
562
535
|
>>> # Train Model or eval Model, taking LeNet5 as an example.
|
|
563
|
-
>>> # Refer to https://gitee.com/mindspore/docs/blob/
|
|
536
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/lenet.py
|
|
564
537
|
>>> net = LeNet5()
|
|
565
538
|
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
566
539
|
>>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
567
540
|
>>> # Create the dataset taking MNIST as an example.
|
|
568
|
-
>>> # Refer to https://gitee.com/mindspore/docs/blob/
|
|
541
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/master/docs/mindspore/code/mnist.py
|
|
569
542
|
>>> dataloader = create_dataset()
|
|
570
543
|
>>> model = Model(net, loss, optimizer)
|
|
571
544
|
>>> model.train(5, dataloader, dataset_sink_mode=False)
|
|
@@ -600,20 +573,73 @@ class Profiler:
|
|
|
600
573
|
return message
|
|
601
574
|
return op_info
|
|
602
575
|
|
|
603
|
-
def analyse(self, offline_path=None, pretty=False):
|
|
576
|
+
def analyse(self, offline_path=None, pretty=False, step_list=None, mode="sync"):
|
|
604
577
|
"""
|
|
605
578
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
606
579
|
|
|
607
580
|
Args:
|
|
608
|
-
offline_path (Union[str, None], optional): The data path which need to be
|
|
581
|
+
offline_path (Union[str, None], optional): The data path which need to be analyzed with offline mode.
|
|
609
582
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
610
583
|
for online mode. Default: ``None``.
|
|
611
584
|
pretty (bool, optional): Whether to pretty json files. Default: ``False``.
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
585
|
+
step_list (list, optional): A list of steps that need to be analyzed, the steps must be
|
|
586
|
+
consecutive integers. Default: ``None``. By default, all steps will be analyzed.
|
|
587
|
+
mode (str, optional): Analysis mode, it must be one of ["sync", "async"]. Default: ``sync``.
|
|
615
588
|
|
|
616
|
-
|
|
589
|
+
- sync: analyse data in current process, it will block the current process.
|
|
590
|
+
- async: analyse data in subprocess, it will not block the current process. Since the parsing process
|
|
591
|
+
will take up extra CPU resources, please enable this mode according to the actual resource situation.
|
|
592
|
+
|
|
593
|
+
Examples:
|
|
594
|
+
>>> from mindspore.train import Callback
|
|
595
|
+
>>> from mindspore import Profiler
|
|
596
|
+
>>> class StopAtStep(Callback):
|
|
597
|
+
... def __init__(self, start_step=1, stop_step=5):
|
|
598
|
+
... super(StopAtStep, self).__init__()
|
|
599
|
+
... self.start_step = start_step
|
|
600
|
+
... self.stop_step = stop_step
|
|
601
|
+
... self.profiler = Profiler(start_profile=False)
|
|
602
|
+
...
|
|
603
|
+
... def step_begin(self, run_context):
|
|
604
|
+
... cb_params = run_context.original_args()
|
|
605
|
+
... step_num = cb_params.cur_step_num
|
|
606
|
+
... if step_num == self.start_step:
|
|
607
|
+
... self.profiler.start()
|
|
608
|
+
...
|
|
609
|
+
... def step_end(self, run_context):
|
|
610
|
+
... cb_params = run_context.original_args()
|
|
611
|
+
... step_num = cb_params.cur_step_num
|
|
612
|
+
... if step_num == self.stop_step:
|
|
613
|
+
... self.profiler.stop()
|
|
614
|
+
...
|
|
615
|
+
... def end(self, run_context):
|
|
616
|
+
... self.profiler.analyse(step_list=[2,3,4], mode="sync")
|
|
617
|
+
"""
|
|
618
|
+
try:
|
|
619
|
+
if isinstance(pretty, bool):
|
|
620
|
+
self._pretty_json = pretty
|
|
621
|
+
if mode not in [ANALYSIS_SYNC_MODE, ANALYSIS_ASYNC_MODE]:
|
|
622
|
+
logger.warning("For analyse, the parameter mode must be one of ['sync', 'async'], "
|
|
623
|
+
"it will be set to 'sync'.")
|
|
624
|
+
mode = ANALYSIS_SYNC_MODE
|
|
625
|
+
model_iteration_dict = {}
|
|
626
|
+
if step_list is not None and not isinstance(step_list, list):
|
|
627
|
+
raise ProfilerParamTypeErrorException("Parameter step_list must be a list.")
|
|
628
|
+
if step_list:
|
|
629
|
+
if not all(isinstance(step_id, int) for step_id in step_list):
|
|
630
|
+
raise ProfilerParamTypeErrorException("The elements of the parameter step_list must be integers.")
|
|
631
|
+
step_list.sort()
|
|
632
|
+
if step_list[-1] - step_list[0] != len(step_list) - 1:
|
|
633
|
+
err_msg = "The elements of the parameter step_list must be continuous integers."
|
|
634
|
+
raise ProfilerParamTypeErrorException(err_msg)
|
|
635
|
+
model_iteration_dict[DEFAULT_MODEL_ID] = step_list
|
|
636
|
+
if offline_path is not None and not isinstance(offline_path, str):
|
|
637
|
+
raise ProfilerParamTypeErrorException("For analyse, the type of parameter offline_path must be str.")
|
|
638
|
+
self._analyse(offline_path=offline_path, model_iteration_dict=model_iteration_dict, mode=mode)
|
|
639
|
+
except (ProfilerException, RuntimeError, OSError, TypeError, NameError) as err:
|
|
640
|
+
logger.error("Profiler analyse failed: %s", str(err))
|
|
641
|
+
|
|
642
|
+
def _analyse(self, offline_path=None, model_iteration_dict=None, mode=ANALYSIS_SYNC_MODE):
|
|
617
643
|
"""
|
|
618
644
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
619
645
|
|
|
@@ -622,24 +648,24 @@ class Profiler:
|
|
|
622
648
|
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
623
649
|
for online mode. Default: ``None``.
|
|
624
650
|
model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
|
|
651
|
+
mode (str, optional): Analysis mode. Whether to analyse data in subprocess. Default: ``sync``.
|
|
652
|
+
By default, analyse data in current process.
|
|
625
653
|
"""
|
|
626
654
|
self._model_iteration_dict = model_iteration_dict
|
|
627
|
-
|
|
628
655
|
self._init_profiler_info()
|
|
629
656
|
self._is_support_step_info_collect()
|
|
657
|
+
self._analyse_mode = mode
|
|
630
658
|
parallel_mode = get_auto_parallel_context("parallel_mode")
|
|
631
659
|
stage_num = get_auto_parallel_context("pipeline_stages")
|
|
632
660
|
|
|
633
661
|
ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
|
|
634
|
-
ProfilerInfo.set_rank_size(self._rank_size)
|
|
635
|
-
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
636
662
|
if offline_path:
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
663
|
+
# Loads the ProfilerInfo data, avoid overwriting the data collection prof_info_x.json.
|
|
664
|
+
ProfilerInfo.load_profiler_info_dict(os.path.join(offline_path, "profiler"))
|
|
665
|
+
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
666
|
+
self._ascend_graph_analyse(offline_path=offline_path)
|
|
667
|
+
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
668
|
+
ProfilerInfo.save(self._output_path)
|
|
643
669
|
return
|
|
644
670
|
if self._msprof_enable:
|
|
645
671
|
return
|
|
@@ -654,21 +680,21 @@ class Profiler:
|
|
|
654
680
|
cpu_op_file = glob.glob(os.path.join(self._output_path, 'cpu_op_type_info_*'))
|
|
655
681
|
if self._device_target and self._device_target != DeviceTarget.CPU.value and cpu_op_file:
|
|
656
682
|
self._is_heterogeneous = True
|
|
683
|
+
|
|
684
|
+
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
657
685
|
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
658
686
|
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
|
659
687
|
self._cpu_analyse()
|
|
688
|
+
if self._profile_framework:
|
|
689
|
+
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host profiler "
|
|
690
|
+
"data.")
|
|
660
691
|
|
|
661
692
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
662
693
|
self._gpu_analyse()
|
|
663
694
|
|
|
664
695
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
665
696
|
self._ascend_analyse()
|
|
666
|
-
|
|
667
|
-
if self._device_target != DeviceTarget.CPU.value:
|
|
668
|
-
self._host_info_analyse()
|
|
669
|
-
else:
|
|
670
|
-
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
|
|
671
|
-
" directory in the output path.")
|
|
697
|
+
|
|
672
698
|
logger.info("Profiling: all the data have been analyzed.")
|
|
673
699
|
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
674
700
|
ProfilerInfo.save(self._output_path)
|
|
@@ -715,13 +741,11 @@ class Profiler:
|
|
|
715
741
|
else:
|
|
716
742
|
raise RuntimeError("The profiler has already started. Do not turn on again in the open state.")
|
|
717
743
|
|
|
718
|
-
# No need to start anything if parse profiling data offline
|
|
719
|
-
if self._is_offline_parser():
|
|
720
|
-
return
|
|
721
|
-
|
|
722
744
|
self._cpu_profiler.step_profiling_enable(True)
|
|
723
745
|
if self._op_time:
|
|
724
746
|
self._cpu_profiler.enable_op_time()
|
|
747
|
+
if self._profile_memory:
|
|
748
|
+
self._cpu_profiler.enable_profile_memory()
|
|
725
749
|
|
|
726
750
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
727
751
|
if self._data_process:
|
|
@@ -736,6 +760,14 @@ class Profiler:
|
|
|
736
760
|
self._md_profiler.start()
|
|
737
761
|
self._ascend_graph_start()
|
|
738
762
|
ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
763
|
+
ProfilerInfo.set_system_cnt(c_expression.get_clock_syscnt())
|
|
764
|
+
ProfilerInfo.set_system_time(int(c_expression.get_clock_time())) # ns
|
|
765
|
+
if context.get_context("mode") == context.GRAPH_MODE:
|
|
766
|
+
jit_config = context.get_jit_config()
|
|
767
|
+
jit_level = jit_config.get("jit_level", "")
|
|
768
|
+
ProfilerInfo.set_jit_level(jit_level)
|
|
769
|
+
if self._profile_framework:
|
|
770
|
+
_framework_profiler_enable_mi()
|
|
739
771
|
|
|
740
772
|
def stop(self):
|
|
741
773
|
"""
|
|
@@ -778,10 +810,6 @@ class Profiler:
|
|
|
778
810
|
raise RuntimeError("The profiler has not started, so can not stop. Please call the start() method "
|
|
779
811
|
"before calling the stop() method.")
|
|
780
812
|
|
|
781
|
-
# No need to stop anything if parse profiling data offline
|
|
782
|
-
if self._is_offline_parser():
|
|
783
|
-
return
|
|
784
|
-
|
|
785
813
|
# Stop data collection after all operators are executed.
|
|
786
814
|
_pynative_executor.sync()
|
|
787
815
|
|
|
@@ -798,9 +826,101 @@ class Profiler:
|
|
|
798
826
|
self._stop_time = int(time.time() * 10000000)
|
|
799
827
|
ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
800
828
|
self._init_profiler_info()
|
|
829
|
+
ProfilerInfo.set_diff_time(self._start_time - self._monotonic_time)
|
|
801
830
|
ProfilerInfo.save(self._output_path)
|
|
831
|
+
self._dump_metadata()
|
|
802
832
|
logger.info("Profiling: stop time: %d", self._stop_time)
|
|
803
833
|
|
|
834
|
+
def add_metadata(self, key: str, value: str):
|
|
835
|
+
"""
|
|
836
|
+
Report custom metadata key-value pair data.
|
|
837
|
+
|
|
838
|
+
Args:
|
|
839
|
+
key (str): The key to the metadata.
|
|
840
|
+
value (str): The value to the metadata.
|
|
841
|
+
|
|
842
|
+
Examples:
|
|
843
|
+
>>> from mindspore import Profiler
|
|
844
|
+
>>> # Profiler init.
|
|
845
|
+
>>> profiler = Profiler()
|
|
846
|
+
>>> # Call Profiler add_metadata
|
|
847
|
+
>>> profiler.add_metadata("test_key", "test_value")
|
|
848
|
+
>>> # Profiler end
|
|
849
|
+
>>> profiler.analyse()
|
|
850
|
+
"""
|
|
851
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
852
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
853
|
+
return
|
|
854
|
+
if not self._check_str_valid(key) or not self._check_str_valid(value):
|
|
855
|
+
logger.warning("Invalid input key or value. Skip this metadata.")
|
|
856
|
+
return
|
|
857
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
858
|
+
if getsizeof(self.metadata) + add_size < self.max_meta_size:
|
|
859
|
+
if key in self.metadata:
|
|
860
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
861
|
+
self.metadata[key] = value
|
|
862
|
+
else:
|
|
863
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
864
|
+
|
|
865
|
+
def add_metadata_json(self, key: str, value: str):
|
|
866
|
+
"""
|
|
867
|
+
Report custom metadata key-value pair data with the value as a JSON string data.
|
|
868
|
+
|
|
869
|
+
Args:
|
|
870
|
+
key (str): The key to the metadata.
|
|
871
|
+
value (str): The json str format value to the metadata.
|
|
872
|
+
|
|
873
|
+
Examples:
|
|
874
|
+
>>> import json
|
|
875
|
+
>>> from mindspore import Profiler
|
|
876
|
+
>>> # Profiler init.
|
|
877
|
+
>>> profiler = Profiler()
|
|
878
|
+
>>> # Call Profiler add_metadata_json
|
|
879
|
+
>>> profiler.add_metadata_json("test_key", json.dumps({"key1": 1, "key2": 2}))
|
|
880
|
+
>>> # Profiler end, metadata will be saved in profiler_metadata.json
|
|
881
|
+
>>> profiler.analyse()
|
|
882
|
+
"""
|
|
883
|
+
if not isinstance(key, str) or not isinstance(value, str):
|
|
884
|
+
logger.warning("The key and value of metadata must be string. Skip this metadata.")
|
|
885
|
+
return
|
|
886
|
+
if not self._check_str_valid(key) or not self._check_str_valid(value):
|
|
887
|
+
logger.warning("Invalid input key or value. Skip this metadata.")
|
|
888
|
+
return
|
|
889
|
+
add_size = getsizeof(key) + getsizeof(value)
|
|
890
|
+
if getsizeof(self.metadata) + add_size < self.max_meta_size:
|
|
891
|
+
try:
|
|
892
|
+
if key in self.metadata:
|
|
893
|
+
logger.warning(f"{key} is already saved as metadata, override it.")
|
|
894
|
+
self.metadata[key] = json.loads(value)
|
|
895
|
+
except ValueError:
|
|
896
|
+
logger.warning("The metadata value must be json format string. Skip this metadata")
|
|
897
|
+
else:
|
|
898
|
+
logger.warning("Too many metadata added. Skip this metadata")
|
|
899
|
+
|
|
900
|
+
def _dump_metadata(self):
|
|
901
|
+
"""Dump metadata to file."""
|
|
902
|
+
if not self.metadata:
|
|
903
|
+
return
|
|
904
|
+
FileManager.create_json_file(self._output_path, self.metadata, "profiler_metadata.json", indent=4)
|
|
905
|
+
self.metadata.clear()
|
|
906
|
+
|
|
907
|
+
def _check_str_valid(self, input_str: str):
|
|
908
|
+
"""Check str length"""
|
|
909
|
+
if len(input_str) > self.max_str_len:
|
|
910
|
+
return False
|
|
911
|
+
return True
|
|
912
|
+
|
|
913
|
+
def _set_ascend_job_id(self, ascend_job_id):
|
|
914
|
+
"""Set output_path for offline parsing performance data."""
|
|
915
|
+
if not ascend_job_id:
|
|
916
|
+
return
|
|
917
|
+
self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
|
|
918
|
+
if not os.path.exists(self._ascend_job_id):
|
|
919
|
+
msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
|
|
920
|
+
logger.critical(msg)
|
|
921
|
+
raise ValueError(msg)
|
|
922
|
+
self._output_path, _ = os.path.split(self._ascend_job_id)
|
|
923
|
+
|
|
804
924
|
def _profiler_init(self, kwargs):
|
|
805
925
|
"""Initialize variables when profiler is enabled by environment variables."""
|
|
806
926
|
options = kwargs.get("env_enable")
|
|
@@ -814,7 +934,7 @@ class Profiler:
|
|
|
814
934
|
self._profile_communication = options.get('profile_communication')
|
|
815
935
|
self._op_time = options.get('op_time')
|
|
816
936
|
self._device_target = context.get_context("device_target").lower()
|
|
817
|
-
self._profile_framework = options.get('profile_framework',
|
|
937
|
+
self._profile_framework = options.get('profile_framework', None)
|
|
818
938
|
self._profiler_manager = c_expression.ProfilerManager.get_instance()
|
|
819
939
|
self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
|
|
820
940
|
if self._data_process:
|
|
@@ -865,32 +985,32 @@ class Profiler:
|
|
|
865
985
|
|
|
866
986
|
def _gpu_profiler_init(self, kwargs):
|
|
867
987
|
"""Gpu profiler init."""
|
|
988
|
+
self._parse_parameter_for_gpu(kwargs)
|
|
868
989
|
# Setup and start MindData Profiling
|
|
869
990
|
if self._data_process:
|
|
870
991
|
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
871
992
|
self._md_profiler.init()
|
|
872
|
-
self._parse_parameter_for_gpu(kwargs)
|
|
873
993
|
|
|
874
994
|
gpu_profiler = c_expression.Profiler
|
|
875
995
|
self._gpu_profiler = gpu_profiler.get_instance("GPU")
|
|
876
|
-
self._gpu_profiler.init(self._output_path)
|
|
877
|
-
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
878
996
|
if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
|
|
879
997
|
self._dev_id = str(get_rank())
|
|
880
998
|
os.environ['DEVICE_ID'] = self._dev_id
|
|
881
999
|
self._rank_id = self._dev_id
|
|
1000
|
+
self._gpu_profiler.init(self._output_path, int(self._rank_id))
|
|
1001
|
+
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
882
1002
|
|
|
883
1003
|
def _ascend_profiler_init(self, kwargs):
|
|
884
1004
|
"""Ascend profiler init."""
|
|
1005
|
+
self._parse_parameter_for_ascend(kwargs)
|
|
885
1006
|
# Setup and start MindData Profiling
|
|
886
1007
|
if self._data_process:
|
|
887
1008
|
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
888
1009
|
self._md_profiler.init()
|
|
889
1010
|
self._init_time = int(time.time() * 10000000)
|
|
890
1011
|
logger.info("Profiling: profiling init time: %d", self._init_time)
|
|
891
|
-
self._parse_parameter_for_ascend(kwargs)
|
|
892
|
-
os.environ['DEVICE_ID'] = self._dev_id
|
|
893
1012
|
|
|
1013
|
+
os.environ['DEVICE_ID'] = self._dev_id
|
|
894
1014
|
self._ascend_profiling_options = json.dumps(self._construct_profiling_options())
|
|
895
1015
|
# Characters longer than 2048 are ignored, resulting in profiling option resolution errors
|
|
896
1016
|
if len(self._ascend_profiling_options) > 2048:
|
|
@@ -906,7 +1026,7 @@ class Profiler:
|
|
|
906
1026
|
data_path = os.path.join(container_path, "data")
|
|
907
1027
|
data_path = validate_and_normalize_path(data_path)
|
|
908
1028
|
if not os.path.exists(data_path):
|
|
909
|
-
os.makedirs(data_path, exist_ok=True)
|
|
1029
|
+
os.makedirs(data_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
910
1030
|
|
|
911
1031
|
def _construct_profiling_options(self):
|
|
912
1032
|
"""
|
|
@@ -919,18 +1039,22 @@ class Profiler:
|
|
|
919
1039
|
"output": self._output_path,
|
|
920
1040
|
"fp_point": fp_point,
|
|
921
1041
|
"bp_point": bp_point,
|
|
922
|
-
"training_trace":
|
|
923
|
-
"task_trace":
|
|
1042
|
+
"training_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1043
|
+
"task_trace": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
924
1044
|
"aic_metrics": AICORE_METRICS_DICT.get(self._aicore_metrics_id, "ArithmeticUtilization"),
|
|
925
|
-
"aicpu":
|
|
926
|
-
"profile_memory":
|
|
927
|
-
"hccl":
|
|
1045
|
+
"aicpu": self.ENABLE_STATUS if self._data_process or self._op_time else self.DISABLE_STATUS,
|
|
1046
|
+
"profile_memory": self.ENABLE_STATUS if self._op_time and self._profile_memory else self.DISABLE_STATUS,
|
|
1047
|
+
"hccl": self.ENABLE_STATUS if self._op_time and self._profile_communication else self.DISABLE_STATUS,
|
|
928
1048
|
"l2_cache": self._l2_cache,
|
|
929
|
-
"
|
|
930
|
-
"
|
|
931
|
-
"
|
|
1049
|
+
"hbm_ddr": self._hbm_ddr,
|
|
1050
|
+
"pcie": self._pcie,
|
|
1051
|
+
"parallel_strategy": self.ENABLE_STATUS if self._parallel_strategy else self.DISABLE_STATUS,
|
|
1052
|
+
"op_time": self.ENABLE_STATUS if self._op_time else self.DISABLE_STATUS,
|
|
1053
|
+
"profile_framework": self._profile_framework,
|
|
1054
|
+
"profiler_level": self.profiler_level.value if self.profiler_level else self.DISABLE_STATUS,
|
|
1055
|
+
"with_stack": "on" if self._with_stack else "off"
|
|
932
1056
|
}
|
|
933
|
-
|
|
1057
|
+
ProfilerInfo.set_profiling_options(profiling_options)
|
|
934
1058
|
return profiling_options
|
|
935
1059
|
|
|
936
1060
|
def _parse_parameter_for_gpu(self, kwargs):
|
|
@@ -961,7 +1085,7 @@ class Profiler:
|
|
|
961
1085
|
self._profile_communication = False
|
|
962
1086
|
|
|
963
1087
|
if self._profile_communication:
|
|
964
|
-
hccl_option = {"output": self._output_path, "task_trace":
|
|
1088
|
+
hccl_option = {"output": self._output_path, "task_trace": self.ENABLE_STATUS}
|
|
965
1089
|
os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
|
|
966
1090
|
|
|
967
1091
|
self._profile_memory = kwargs.pop("profile_memory", False)
|
|
@@ -978,7 +1102,7 @@ class Profiler:
|
|
|
978
1102
|
|
|
979
1103
|
if self._aicore_metrics_id not in AICORE_METRICS_DICT:
|
|
980
1104
|
logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be in "
|
|
981
|
-
f"[-1, 0, 1, 2, 3, 4, 5], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
1105
|
+
f"[-1, 0, 1, 2, 3, 4, 5, 6], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
982
1106
|
self._aicore_metrics_id = 0
|
|
983
1107
|
|
|
984
1108
|
l2_cache_enable = kwargs.pop("l2_cache", False)
|
|
@@ -986,38 +1110,43 @@ class Profiler:
|
|
|
986
1110
|
logger.warning(f"For '{self.__class__.__name__}', the parameter l2_cache must be bool, "
|
|
987
1111
|
f"but got type {type(l2_cache_enable)}, it will be set to False.")
|
|
988
1112
|
l2_cache_enable = False
|
|
989
|
-
if l2_cache_enable
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1113
|
+
self._l2_cache = self.ENABLE_STATUS if l2_cache_enable else self.DISABLE_STATUS
|
|
1114
|
+
|
|
1115
|
+
hbm_ddr_enable = kwargs.pop("hbm_ddr", False)
|
|
1116
|
+
if not isinstance(hbm_ddr_enable, bool):
|
|
1117
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter hbm_ddr must be bool, "
|
|
1118
|
+
f"but got type {type(hbm_ddr_enable)}, it will be set to False.")
|
|
1119
|
+
hbm_ddr_enable = False
|
|
1120
|
+
self._hbm_ddr = self.ENABLE_STATUS if hbm_ddr_enable else self.DISABLE_STATUS
|
|
1121
|
+
|
|
1122
|
+
pcie_enable = kwargs.pop("pcie", False)
|
|
1123
|
+
if not isinstance(pcie_enable, bool):
|
|
1124
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter pcie must be bool, "
|
|
1125
|
+
f"but got type {type(pcie_enable)}, it will be set to False.")
|
|
1126
|
+
pcie_enable = False
|
|
1127
|
+
self._pcie = self.ENABLE_STATUS if pcie_enable else self.DISABLE_STATUS
|
|
1128
|
+
|
|
1129
|
+
self._parallel_strategy = kwargs.pop("parallel_strategy", False)
|
|
995
1130
|
if not isinstance(self._parallel_strategy, bool):
|
|
996
1131
|
logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
|
|
997
|
-
f"but got type {type(self._parallel_strategy)}, it will be set to
|
|
998
|
-
self._parallel_strategy =
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
if
|
|
1002
|
-
logger.warning(f"For '{self.__class__.__name__}',
|
|
1003
|
-
f"
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
def _is_offline_parser(self):
|
|
1017
|
-
"""Return whether offline parser or online parser."""
|
|
1018
|
-
if self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
1019
|
-
return bool(self._ascend_job_id)
|
|
1020
|
-
return False
|
|
1132
|
+
f"but got type {type(self._parallel_strategy)}, it will be set to False.")
|
|
1133
|
+
self._parallel_strategy = False
|
|
1134
|
+
|
|
1135
|
+
self.profiler_level = kwargs.pop("profiler_level", None)
|
|
1136
|
+
if self.profiler_level and not isinstance(self.profiler_level, ProfilerLevel):
|
|
1137
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profiler_level must be one of "
|
|
1138
|
+
f"[ProfilerLevel.Level0, ProfilerLevel.Level1, ProfilerLevel.Level2], but got type "
|
|
1139
|
+
f"{type(self.profiler_level)}, it will be set to ProfilerLevel.Level0.")
|
|
1140
|
+
self.profiler_level = ProfilerLevel.Level0
|
|
1141
|
+
elif self.profiler_level == ProfilerLevel.Level0:
|
|
1142
|
+
self._data_process = False
|
|
1143
|
+
self._aicore_metrics_id = -1
|
|
1144
|
+
logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level0, data_process will be set "
|
|
1145
|
+
f"to False and aicore_metrics set to -1.")
|
|
1146
|
+
elif self.profiler_level == ProfilerLevel.Level1:
|
|
1147
|
+
self._data_process = False
|
|
1148
|
+
logger.warning(f"For '{self.__class__.__name__}', when profiler_level set Level1, data_process will be set "
|
|
1149
|
+
f"to False.")
|
|
1021
1150
|
|
|
1022
1151
|
def _ascend_analyse(self):
|
|
1023
1152
|
"""Collect and analyse ascend performance data."""
|
|
@@ -1029,23 +1158,21 @@ class Profiler:
|
|
|
1029
1158
|
self._rank_size = get_group_size()
|
|
1030
1159
|
else:
|
|
1031
1160
|
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
1161
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
1032
1162
|
|
|
1033
1163
|
if self._has_started:
|
|
1034
1164
|
self.stop()
|
|
1035
1165
|
else:
|
|
1036
1166
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1167
|
+
self._ascend_profiler.finalize()
|
|
1037
1168
|
# export op data before analyse
|
|
1038
1169
|
self._ascend_graph_analyse()
|
|
1039
1170
|
|
|
1040
|
-
def _minddata_analyse(self
|
|
1171
|
+
def _minddata_analyse(self):
|
|
1041
1172
|
"""Analyse mindadata for ascend graph model."""
|
|
1042
1173
|
if not self._data_process:
|
|
1043
1174
|
return
|
|
1044
1175
|
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1045
|
-
# Parsing minddata AICPU profiling
|
|
1046
|
-
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1047
|
-
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1048
|
-
MinddataParser.execute(source_path, self._output_path, store_id)
|
|
1049
1176
|
|
|
1050
1177
|
# parse minddata pipeline operator and queue
|
|
1051
1178
|
try:
|
|
@@ -1065,6 +1192,16 @@ class Profiler:
|
|
|
1065
1192
|
finally:
|
|
1066
1193
|
pass
|
|
1067
1194
|
|
|
1195
|
+
def _minddata_aicpu_analyse(self, source_path, job_id):
|
|
1196
|
+
"""Analyse minddata aicpu after ascend."""
|
|
1197
|
+
if not self._data_process:
|
|
1198
|
+
return
|
|
1199
|
+
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1200
|
+
# Parsing minddata AICPU profiling
|
|
1201
|
+
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1202
|
+
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1203
|
+
MinddataParser.execute(source_path, self._output_path, job_id, store_id)
|
|
1204
|
+
|
|
1068
1205
|
def _ascend_fpbp_analyse(self, op_summary, steptrace):
|
|
1069
1206
|
"""
|
|
1070
1207
|
Ascned graph model op analyse.
|
|
@@ -1088,7 +1225,7 @@ class Profiler:
|
|
|
1088
1225
|
pass
|
|
1089
1226
|
return points
|
|
1090
1227
|
|
|
1091
|
-
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status):
|
|
1228
|
+
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status, launch_ops: List):
|
|
1092
1229
|
"""
|
|
1093
1230
|
Ascend graph model hwts analyse.
|
|
1094
1231
|
|
|
@@ -1115,12 +1252,12 @@ class Profiler:
|
|
|
1115
1252
|
else:
|
|
1116
1253
|
output_timeline_data_path = None
|
|
1117
1254
|
|
|
1118
|
-
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status)
|
|
1255
|
+
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status, launch_ops)
|
|
1119
1256
|
op_analyser.parse()
|
|
1120
1257
|
op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
|
|
1121
1258
|
aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
|
|
1122
|
-
except ProfilerException as err:
|
|
1123
|
-
logger.warning(err
|
|
1259
|
+
except (ProfilerException, RuntimeError) as err:
|
|
1260
|
+
logger.warning(str(err))
|
|
1124
1261
|
finally:
|
|
1125
1262
|
pass
|
|
1126
1263
|
|
|
@@ -1142,19 +1279,22 @@ class Profiler:
|
|
|
1142
1279
|
finally:
|
|
1143
1280
|
pass
|
|
1144
1281
|
|
|
1145
|
-
def _ascend_timeline_analyse(self, op_summary, steptrace):
|
|
1282
|
+
def _ascend_timeline_analyse(self, op_summary, steptrace, source_path, mindstudio_profiler_output) -> List:
|
|
1146
1283
|
"""Analyse timeline info."""
|
|
1147
1284
|
try:
|
|
1148
1285
|
logger.info("Profiling: analyzing the timeline data")
|
|
1149
|
-
timeline_analyser = AscendTimelineGenerator(self._output_path,
|
|
1150
|
-
context.get_context('mode')
|
|
1151
|
-
|
|
1152
|
-
timeline_analyser.
|
|
1286
|
+
timeline_analyser = AscendTimelineGenerator(self._output_path, source_path, mindstudio_profiler_output,
|
|
1287
|
+
self._rank_id, self._rank_size, context.get_context('mode'),
|
|
1288
|
+
self._model_iteration_dict.get(DEFAULT_MODEL_ID))
|
|
1289
|
+
timeline_analyser.parse_cluster_data(op_summary, steptrace)
|
|
1290
|
+
timeline_analyser.parse_timeline_data(pretty=self._pretty_json)
|
|
1291
|
+
timeline_analyser.write_timeline_display()
|
|
1153
1292
|
timeline_analyser.write_timeline_summary()
|
|
1154
1293
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1155
1294
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1156
1295
|
finally:
|
|
1157
1296
|
pass
|
|
1297
|
+
return timeline_analyser.get_kernel_event_list()
|
|
1158
1298
|
|
|
1159
1299
|
def _ascend_dynamic_net_analyse(self, op_summary):
|
|
1160
1300
|
"""Analyse dynamic shape network info."""
|
|
@@ -1168,7 +1308,7 @@ class Profiler:
|
|
|
1168
1308
|
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1169
1309
|
dynamic_parser.write_dynamic_shape_data(op_summary)
|
|
1170
1310
|
|
|
1171
|
-
def _ascend_flops_analyse(self, op_summary):
|
|
1311
|
+
def _ascend_flops_analyse(self, op_summary, launch_ops):
|
|
1172
1312
|
"""Get op FLOPs from op_summary, write output_op_flops_x.csv."""
|
|
1173
1313
|
if 'vector_fops' not in op_summary.dtype.names and 'cube_fops' not in op_summary.dtype.names:
|
|
1174
1314
|
logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
|
|
@@ -1183,16 +1323,16 @@ class Profiler:
|
|
|
1183
1323
|
flops_path = validate_and_normalize_path(flops_path)
|
|
1184
1324
|
flops_summary_path = validate_and_normalize_path(flops_summary_path)
|
|
1185
1325
|
|
|
1186
|
-
flops_analyser = AscendFlopsGenerator(op_summary, pretty=self._pretty_json)
|
|
1326
|
+
flops_analyser = AscendFlopsGenerator(op_summary, launch_ops, pretty=self._pretty_json)
|
|
1187
1327
|
flops_analyser.parse()
|
|
1188
1328
|
flops_analyser.write(flops_path, flops_summary_path)
|
|
1189
1329
|
|
|
1190
|
-
except ProfilerException as err:
|
|
1191
|
-
logger.warning(err
|
|
1330
|
+
except (ProfilerException, RuntimeError) as err:
|
|
1331
|
+
logger.warning(str(err))
|
|
1192
1332
|
finally:
|
|
1193
1333
|
pass
|
|
1194
1334
|
|
|
1195
|
-
def _ascend_graph_memory_analyse(self
|
|
1335
|
+
def _ascend_graph_memory_analyse(self):
|
|
1196
1336
|
"""Analyse memory usage info."""
|
|
1197
1337
|
if not self._profile_memory:
|
|
1198
1338
|
return
|
|
@@ -1201,7 +1341,7 @@ class Profiler:
|
|
|
1201
1341
|
"PyNative mode currently.")
|
|
1202
1342
|
try:
|
|
1203
1343
|
logger.info("Profiling: analyzing the memory usage info.")
|
|
1204
|
-
self._analyse_memory_usage(
|
|
1344
|
+
self._analyse_memory_usage()
|
|
1205
1345
|
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
1206
1346
|
logger.warning(err.message)
|
|
1207
1347
|
finally:
|
|
@@ -1209,30 +1349,52 @@ class Profiler:
|
|
|
1209
1349
|
|
|
1210
1350
|
def _ascend_ms_analyze(self, source_path):
|
|
1211
1351
|
"""Ascend ms generate"""
|
|
1212
|
-
|
|
1352
|
+
|
|
1353
|
+
timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime(time.time()))
|
|
1213
1354
|
if self._rank_id:
|
|
1214
|
-
ascend_ms_path = f"rank-{self._rank_id}_{
|
|
1355
|
+
ascend_ms_path = f"rank-{self._rank_id}_{timestamp}_ascend_ms"
|
|
1215
1356
|
else:
|
|
1216
|
-
ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{
|
|
1217
|
-
|
|
1218
|
-
if not os.path.exists(self._ascend_ms_path):
|
|
1219
|
-
os.makedirs(self._ascend_ms_path, exist_ok=True)
|
|
1220
|
-
os.chmod(self._ascend_ms_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1357
|
+
ascend_ms_path = f"{socket.gethostname()}--{os.getpid()}_{timestamp}_ascend_ms"
|
|
1358
|
+
ascend_ms_path = os.path.join(self._output_path, ascend_ms_path)
|
|
1221
1359
|
|
|
1222
1360
|
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1223
|
-
ascend_profiler_output_path = os.path.join(
|
|
1224
|
-
|
|
1361
|
+
ascend_profiler_output_path = os.path.join(ascend_ms_path, 'ASCEND_PROFILER_OUTPUT')
|
|
1362
|
+
PathManager.make_dir_safety(ascend_profiler_output_path)
|
|
1225
1363
|
|
|
1226
1364
|
source_profiler_info_path = os.path.join(self._output_path, f"profiler_info_{dev_id}.json")
|
|
1227
|
-
target_profiler_info_path = os.path.join(
|
|
1228
|
-
|
|
1365
|
+
target_profiler_info_path = os.path.join(ascend_ms_path, f"profiler_info_{dev_id}.json")
|
|
1366
|
+
PathManager.copy_file(source_profiler_info_path, target_profiler_info_path)
|
|
1367
|
+
|
|
1368
|
+
source_profiler_metadata_path = os.path.join(self._output_path, f"profiler_metadata.json")
|
|
1369
|
+
target_profiler_metadata_path = os.path.join(ascend_ms_path, f"profiler_metadata.json")
|
|
1370
|
+
PathManager.copy_file(source_profiler_metadata_path, target_profiler_metadata_path)
|
|
1229
1371
|
|
|
1230
1372
|
source_timeline_path = os.path.join(self._output_path, f"ascend_timeline_display_{dev_id}.json")
|
|
1231
1373
|
target_timeline_path = os.path.join(ascend_profiler_output_path, f"trace_view.json")
|
|
1232
|
-
|
|
1374
|
+
PathManager.copy_file(source_timeline_path, target_timeline_path)
|
|
1375
|
+
|
|
1376
|
+
src_op_mem_file = os.path.join(self._output_path, f"operator_memory_{dev_id}.csv")
|
|
1377
|
+
dst_op_mem_file = os.path.join(ascend_profiler_output_path, f"operator_memory.csv")
|
|
1378
|
+
PathManager.copy_file(src_op_mem_file, dst_op_mem_file)
|
|
1379
|
+
|
|
1380
|
+
ms_output_path = os.path.realpath(
|
|
1381
|
+
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1382
|
+
static_op_mem_path = os.path.join(ms_output_path, f"static_op_mem_*.csv")
|
|
1383
|
+
src_static_op_mem_path = glob.glob(static_op_mem_path)
|
|
1384
|
+
if src_static_op_mem_path:
|
|
1385
|
+
dst_static_op_mem_file = os.path.join(ascend_profiler_output_path, f"static_op_mem.csv")
|
|
1386
|
+
PathManager.copy_file(src_static_op_mem_path[0], dst_static_op_mem_file)
|
|
1387
|
+
|
|
1388
|
+
src_op_statistics_path = os.path.join(ms_output_path, "op_statistic_*.csv")
|
|
1389
|
+
src_op_statistics_path = glob.glob(src_op_statistics_path)
|
|
1390
|
+
if src_op_statistics_path:
|
|
1391
|
+
dst_op_statistics_path = os.path.join(ascend_profiler_output_path, f"op_statistic.csv")
|
|
1392
|
+
PathManager.copy_file(src_op_statistics_path[0], dst_op_statistics_path)
|
|
1233
1393
|
|
|
1234
1394
|
self._ascend_graph_cluster_analyse(source_path, ascend_profiler_output_path)
|
|
1235
1395
|
self._ascend_graph_communicate_analyse(source_path, ascend_profiler_output_path)
|
|
1396
|
+
AscendIntegrateGenerator(source_path, ascend_profiler_output_path).parse()
|
|
1397
|
+
AscendMemoryGenerator(self._output_path, self._rank_id, source_path, ascend_profiler_output_path).parse()
|
|
1236
1398
|
|
|
1237
1399
|
def _ascend_graph_cluster_analyse(self, source_path, ascend_profiler_output_path):
|
|
1238
1400
|
"""Analyse step trace time info"""
|
|
@@ -1243,7 +1405,7 @@ class Profiler:
|
|
|
1243
1405
|
step_trace_time_path = os.path.join(ascend_profiler_output_path, f'step_trace_time.csv')
|
|
1244
1406
|
step_trace_time_path = validate_and_normalize_path(step_trace_time_path)
|
|
1245
1407
|
|
|
1246
|
-
cluster_analyse = AscendClusterGenerator(
|
|
1408
|
+
cluster_analyse = AscendClusterGenerator(source_path)
|
|
1247
1409
|
cluster_analyse.parse()
|
|
1248
1410
|
cluster_analyse.write(step_trace_time_path)
|
|
1249
1411
|
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
@@ -1262,10 +1424,11 @@ class Profiler:
|
|
|
1262
1424
|
communication_file_path = os.path.join(ascend_profiler_output_path, f'communication.json')
|
|
1263
1425
|
communication_file_path = validate_and_normalize_path(communication_file_path)
|
|
1264
1426
|
|
|
1265
|
-
communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
|
|
1427
|
+
communication_matrix_file_path = os.path.join(ascend_profiler_output_path,
|
|
1428
|
+
f"communication_matrix.json")
|
|
1266
1429
|
communication_matrix_file_path = validate_and_normalize_path(communication_matrix_file_path)
|
|
1267
1430
|
|
|
1268
|
-
analyze_path = os.path.
|
|
1431
|
+
analyze_path = os.path.realpath(os.path.join(source_path, os.path.pardir, 'analyze'))
|
|
1269
1432
|
communicate_analyser = AscendCommunicationGenerator(analyze_path)
|
|
1270
1433
|
communicate_analyser.parse()
|
|
1271
1434
|
communicate_analyser.write(communication_file_path, communication_matrix_file_path)
|
|
@@ -1274,7 +1437,7 @@ class Profiler:
|
|
|
1274
1437
|
finally:
|
|
1275
1438
|
pass
|
|
1276
1439
|
|
|
1277
|
-
def _ascend_graph_hccl_analyse(self,
|
|
1440
|
+
def _ascend_graph_hccl_analyse(self, mindstudio_profiler_output, steptrace):
|
|
1278
1441
|
"""Analyse hccl profiler info."""
|
|
1279
1442
|
if not self._profile_communication:
|
|
1280
1443
|
return
|
|
@@ -1288,10 +1451,7 @@ class Profiler:
|
|
|
1288
1451
|
|
|
1289
1452
|
hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
|
|
1290
1453
|
hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
|
|
1291
|
-
|
|
1292
|
-
hccl_analyse = AscendHCCLGenerator(os.path.join(source_path, 'timeline'), steptrace)
|
|
1293
|
-
else:
|
|
1294
|
-
hccl_analyse = AscendHCCLGeneratorOld(os.path.join(source_path, 'timeline'))
|
|
1454
|
+
hccl_analyse = AscendHCCLGenerator(mindstudio_profiler_output, steptrace)
|
|
1295
1455
|
hccl_analyse.parse()
|
|
1296
1456
|
hccl_analyse.write(hccl_raw_path)
|
|
1297
1457
|
|
|
@@ -1300,62 +1460,87 @@ class Profiler:
|
|
|
1300
1460
|
finally:
|
|
1301
1461
|
pass
|
|
1302
1462
|
|
|
1303
|
-
def
|
|
1304
|
-
"""
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
1463
|
+
def _get_kernel_op_map(self, op_summary, kernels: List[CANNEvent]) -> List:
|
|
1464
|
+
"""Get the mapping between framework operator and device kernel."""
|
|
1465
|
+
if not kernels:
|
|
1466
|
+
return []
|
|
1467
|
+
kernel_map = {}
|
|
1468
|
+
for kernel in kernels:
|
|
1469
|
+
key = kernel.name if kernel.name.startswith('hcom_') else (kernel.name, str(kernel.ts))
|
|
1470
|
+
kernel_map[key] = kernel.parent
|
|
1471
|
+
launch_ops = [None] * len(op_summary)
|
|
1472
|
+
for index, summary in enumerate(op_summary):
|
|
1473
|
+
ts = str(summary['Task Start Time(us)']).strip("\t")
|
|
1474
|
+
name = summary['Op Name']
|
|
1475
|
+
key = name if name.startswith("hcom_") else (name, ts)
|
|
1476
|
+
launch_op = kernel_map.get(key)
|
|
1477
|
+
if not launch_op:
|
|
1478
|
+
continue
|
|
1479
|
+
launch_ops[index] = launch_op.name
|
|
1480
|
+
return launch_ops
|
|
1322
1481
|
|
|
1323
|
-
def _ascend_graph_analyse(self):
|
|
1324
|
-
|
|
1325
|
-
|
|
1482
|
+
def _ascend_graph_analyse(self, offline_path=None):
|
|
1483
|
+
if offline_path or self._analyse_mode == ANALYSIS_SYNC_MODE:
|
|
1484
|
+
self._ascend_graph_analyse_inner(offline_path)
|
|
1485
|
+
else:
|
|
1486
|
+
MultiProcessPool().add_async_job(self._ascend_graph_analyse_inner)
|
|
1326
1487
|
|
|
1327
|
-
|
|
1488
|
+
@timeit("Profiler analyse done")
|
|
1489
|
+
def _ascend_graph_analyse_inner(self, offline_path=None):
|
|
1490
|
+
"""Ascend graph mode analyse."""
|
|
1491
|
+
job_id = self._get_profiling_job_id(offline_path)
|
|
1328
1492
|
if not job_id:
|
|
1329
1493
|
return
|
|
1330
1494
|
logger.info("Profiling: job id is %s ", job_id)
|
|
1331
1495
|
|
|
1332
1496
|
self._check_output_path(output_path=self._output_path)
|
|
1333
1497
|
source_path = os.path.join(self._output_path, job_id)
|
|
1334
|
-
self._minddata_analyse(
|
|
1498
|
+
self._minddata_analyse()
|
|
1335
1499
|
if self._op_time:
|
|
1336
|
-
|
|
1500
|
+
mindstudio_profiler_output = os.path.realpath(
|
|
1501
|
+
os.path.join(source_path, os.path.pardir, 'mindstudio_profiler_output'))
|
|
1502
|
+
flag = _ascend_graph_msprof_generator(mindstudio_profiler_output, self._model_iteration_dict)
|
|
1337
1503
|
if not flag:
|
|
1338
1504
|
logger.warning('Current driver package not support all export mode, use single export mode, '
|
|
1339
1505
|
'this may lead to performance degradation. Suggest upgrading the driver package.')
|
|
1340
1506
|
ProfilerInfo.set_export_flag(flag)
|
|
1341
|
-
op_summary, op_statistic, steptrace
|
|
1342
|
-
|
|
1343
|
-
self._ascend_timeline_analyse(op_summary, steptrace)
|
|
1507
|
+
op_summary, op_statistic, steptrace, steptrace_model \
|
|
1508
|
+
= _ascend_graph_msprof_analyse(mindstudio_profiler_output)
|
|
1509
|
+
kernels = self._ascend_timeline_analyse(op_summary, steptrace, source_path, mindstudio_profiler_output)
|
|
1510
|
+
|
|
1511
|
+
if isinstance(op_statistic, np.ndarray) and op_statistic.shape[0] == 0 or \
|
|
1512
|
+
not isinstance(op_statistic, np.ndarray) and not op_statistic:
|
|
1513
|
+
logger.warning('Op statistic data is empty!')
|
|
1514
|
+
return
|
|
1515
|
+
|
|
1516
|
+
launch_ops = self._get_kernel_op_map(op_summary, kernels)
|
|
1517
|
+
self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status, launch_ops)
|
|
1344
1518
|
graph_ids = np.unique(op_summary['Model ID']).tolist()
|
|
1345
|
-
|
|
1519
|
+
self._ascend_fpbp_analyse(op_summary, steptrace)
|
|
1346
1520
|
if len(graph_ids) == 1:
|
|
1347
1521
|
self._ascend_step_trace_analyse(steptrace)
|
|
1522
|
+
else:
|
|
1523
|
+
self._ascend_step_trace_analyse(steptrace_model)
|
|
1348
1524
|
if self._dynamic_status:
|
|
1349
1525
|
self._ascend_dynamic_net_analyse(op_summary)
|
|
1350
|
-
self._ascend_flops_analyse(op_summary)
|
|
1351
|
-
self._ascend_graph_memory_analyse(
|
|
1352
|
-
self._ascend_ms_analyze(
|
|
1353
|
-
self._ascend_graph_hccl_analyse(
|
|
1354
|
-
self.
|
|
1526
|
+
self._ascend_flops_analyse(op_summary, launch_ops)
|
|
1527
|
+
self._ascend_graph_memory_analyse()
|
|
1528
|
+
self._ascend_ms_analyze(mindstudio_profiler_output)
|
|
1529
|
+
self._ascend_graph_hccl_analyse(mindstudio_profiler_output, steptrace)
|
|
1530
|
+
self._minddata_aicpu_analyse(self._output_path, job_id)
|
|
1355
1531
|
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1532
|
+
try:
|
|
1533
|
+
ProfilerInfo.set_data_simplification(self._data_simplification)
|
|
1534
|
+
ProfilerPathManager.simplify_data(self._output_path, self._data_simplification)
|
|
1535
|
+
except RuntimeError as err:
|
|
1536
|
+
logger.error('Profilier simplify data failed, %s', str(err))
|
|
1356
1537
|
|
|
1357
1538
|
def _ascend_graph_start(self):
|
|
1358
1539
|
"""Ascend graph mode start profiling."""
|
|
1540
|
+
op_range_file = os.path.join(self._framework_path, "op_range_" + str(self._rank_id))
|
|
1541
|
+
if os.path.exists(op_range_file):
|
|
1542
|
+
os.remove(op_range_file)
|
|
1543
|
+
logger.info("Clear old op range filer.")
|
|
1359
1544
|
self._ascend_profiler.start()
|
|
1360
1545
|
|
|
1361
1546
|
def _gpu_analyse(self):
|
|
@@ -1370,12 +1555,14 @@ class Profiler:
|
|
|
1370
1555
|
else:
|
|
1371
1556
|
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
1372
1557
|
|
|
1558
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
1559
|
+
|
|
1373
1560
|
if self._has_started:
|
|
1374
1561
|
self.stop()
|
|
1375
1562
|
else:
|
|
1376
1563
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1377
1564
|
|
|
1378
|
-
self._minddata_analyse(
|
|
1565
|
+
self._minddata_analyse()
|
|
1379
1566
|
|
|
1380
1567
|
try:
|
|
1381
1568
|
self._analyse_step_relation_info()
|
|
@@ -1438,13 +1625,14 @@ class Profiler:
|
|
|
1438
1625
|
if self._has_started:
|
|
1439
1626
|
self.stop()
|
|
1440
1627
|
else:
|
|
1441
|
-
logger.info("No need to stop profiler because profiler has been stopped
|
|
1628
|
+
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1629
|
+
|
|
1442
1630
|
if not self._op_time:
|
|
1443
1631
|
return
|
|
1444
1632
|
try:
|
|
1445
1633
|
timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
|
|
1446
1634
|
timeline_generator.init_timeline(pretty=self._pretty_json)
|
|
1447
|
-
timeline_generator.write_timeline(
|
|
1635
|
+
timeline_generator.write_timeline()
|
|
1448
1636
|
timeline_generator.write_timeline_summary()
|
|
1449
1637
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1450
1638
|
logger.warning('Fail to write timeline data: %s', err)
|
|
@@ -1453,15 +1641,13 @@ class Profiler:
|
|
|
1453
1641
|
raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
|
|
1454
1642
|
"data.")
|
|
1455
1643
|
|
|
1456
|
-
def _analyse_step_trace(self,
|
|
1457
|
-
is_gpu_kernel_async_launch_flag=False):
|
|
1644
|
+
def _analyse_step_trace(self, is_training_mode_flag=True, is_gpu_kernel_async_launch_flag=False):
|
|
1458
1645
|
"""
|
|
1459
1646
|
Analyse step trace data and save the result.
|
|
1460
1647
|
|
|
1461
1648
|
Args:
|
|
1462
|
-
source_path (str): The directory that contains the step trace original data.
|
|
1463
|
-
framework_parser (FrameworkParser): The framework parse instance.
|
|
1464
1649
|
is_training_mode_flag (bool): Whether in training mode or not.
|
|
1650
|
+
is_gpu_kernel_async_launch_flag (bool): Whether gpu kernel launches are asynchronous
|
|
1465
1651
|
"""
|
|
1466
1652
|
logger.info("Begin to parse step trace.")
|
|
1467
1653
|
# construct output path
|
|
@@ -1492,68 +1678,35 @@ class Profiler:
|
|
|
1492
1678
|
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1493
1679
|
logger.info("The point info is: %s", point_info)
|
|
1494
1680
|
|
|
1495
|
-
return point_info, is_training_mode_flag
|
|
1496
|
-
return {}, is_training_mode_flag
|
|
1497
|
-
|
|
1498
|
-
# whether keep the first step
|
|
1499
|
-
skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
|
|
1500
|
-
# recognize inference or training mode
|
|
1501
|
-
is_training_mode_flag = framework_parser.check_op_name("Gradients")
|
|
1502
|
-
# parser the step trace files and save the result to disk
|
|
1503
|
-
source_path = validate_and_normalize_path(source_path)
|
|
1504
|
-
parser = AscendStepTraceParser(input_dir=source_path,
|
|
1505
|
-
output_file_path=step_trace_intermediate_file_path,
|
|
1506
|
-
skip_first_step=skip_first_step_flag,
|
|
1507
|
-
is_training_mode=is_training_mode_flag)
|
|
1508
|
-
parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
|
|
1509
|
-
parser.parse_and_save()
|
|
1510
|
-
point_info = parser.record_point_info(point_info_file_path)
|
|
1511
|
-
|
|
1512
|
-
# print parser result
|
|
1513
|
-
parser.show()
|
|
1514
|
-
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1515
|
-
logger.info("The point info is: %s", point_info)
|
|
1516
|
-
|
|
1517
|
-
return point_info, is_training_mode_flag
|
|
1518
|
-
|
|
1519
1681
|
def _generate_timeline(self, reduce_op_type):
|
|
1520
1682
|
"""Used for gpu, generate timeline info, write to json format file."""
|
|
1521
1683
|
try:
|
|
1522
1684
|
timeline_generator = GpuTimelineGenerator(self._output_path, self._dev_id, self._rank_size,
|
|
1523
1685
|
context.get_context("mode"))
|
|
1524
1686
|
timeline_generator.init_timeline(reduce_op_type)
|
|
1525
|
-
self._timeline_meta = timeline_generator.write_timeline(
|
|
1687
|
+
self._timeline_meta = timeline_generator.write_timeline()
|
|
1526
1688
|
timeline_generator.write_timeline_summary()
|
|
1689
|
+
timeline_generator.parse_fwk_data()
|
|
1690
|
+
timeline_generator.write_fwk_timeline()
|
|
1527
1691
|
return timeline_generator
|
|
1528
1692
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1529
1693
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1530
1694
|
raise RuntimeError('Fail to write timeline data.') from err
|
|
1531
1695
|
|
|
1532
|
-
def _analyse_memory_usage(self
|
|
1696
|
+
def _analyse_memory_usage(self):
|
|
1533
1697
|
"""Analyse memory usage data."""
|
|
1534
1698
|
integrator = Integrator(self._output_path, self._rank_id)
|
|
1535
|
-
|
|
1536
|
-
memory_parser = MemoryUsageParser(self._output_path, self._rank_id, pretty=self._pretty_json)
|
|
1537
|
-
memory_parser.init_memory_usage_info(aicore_detail_data, points)
|
|
1538
|
-
memory_parser.write_memory_files()
|
|
1699
|
+
integrator.get_aicore_detail_data()
|
|
1539
1700
|
|
|
1540
|
-
def _get_profiling_job_id(self):
|
|
1701
|
+
def _get_profiling_job_id(self, offline_path):
|
|
1541
1702
|
"""Get profiling job id, which was generated by ada service.
|
|
1542
1703
|
|
|
1543
1704
|
Returns:
|
|
1544
|
-
str, profiling job id
|
|
1705
|
+
str, profiling job id, eg: PROF_XXX/device_*.
|
|
1545
1706
|
"""
|
|
1546
1707
|
|
|
1547
|
-
if
|
|
1548
|
-
|
|
1549
|
-
job_id = self._ascend_job_id.rstrip('/').split('/')[-1]
|
|
1550
|
-
if job_id.startswith('PROF'):
|
|
1551
|
-
device_dir = [dir for dir in os.listdir(self._ascend_job_id) if dir.startswith('device')]
|
|
1552
|
-
info_file_path = get_file_path(os.path.join(self._ascend_job_id, device_dir[0]), "info.json")
|
|
1553
|
-
training_rank_id, _ = self._parse_info_json(info_file_path)
|
|
1554
|
-
self._rank_id = int(training_rank_id)
|
|
1555
|
-
return os.path.join(job_id, device_dir[0])
|
|
1556
|
-
return job_id
|
|
1708
|
+
if offline_path:
|
|
1709
|
+
self._output_path = os.path.join(offline_path, 'profiler')
|
|
1557
1710
|
|
|
1558
1711
|
job_id = ""
|
|
1559
1712
|
job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
|
|
@@ -1562,16 +1715,12 @@ class Profiler:
|
|
|
1562
1715
|
job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
|
|
1563
1716
|
|
|
1564
1717
|
for dir_name in sorted_job_dirs:
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
|
|
1570
|
-
else:
|
|
1571
|
-
job_dir = os.path.join(self._output_path, dir_name)
|
|
1718
|
+
prof_dir = os.path.join(self._output_path, dir_name)
|
|
1719
|
+
device_dir = [dir for dir in os.listdir(prof_dir) \
|
|
1720
|
+
if dir.startswith('device') and os.path.isdir(os.path.join(prof_dir, dir))]
|
|
1721
|
+
job_dir = os.path.join(self._output_path, dir_name, device_dir[0])
|
|
1572
1722
|
|
|
1573
|
-
|
|
1574
|
-
if start_file_path is None:
|
|
1723
|
+
if get_file_path(job_dir, "start_info") is None:
|
|
1575
1724
|
logger.warning("Find profiling job path %s, but host_start.log not exist, "
|
|
1576
1725
|
"profiler will ignore this job dir.", job_dir)
|
|
1577
1726
|
continue
|
|
@@ -1582,25 +1731,26 @@ class Profiler:
|
|
|
1582
1731
|
"profiler will ignore this job dir.", job_dir)
|
|
1583
1732
|
continue
|
|
1584
1733
|
|
|
1585
|
-
|
|
1586
|
-
|
|
1734
|
+
prof_rank_id = ProfilerInfo.get_rank_id(self._output_path)
|
|
1735
|
+
prof_device_id = ProfilerInfo.get_device_id(prof_dir)
|
|
1736
|
+
job_start_time = self._parse_job_start_time(prof_dir)
|
|
1587
1737
|
|
|
1588
|
-
if
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1738
|
+
if offline_path:
|
|
1739
|
+
self._start_time = int(job_start_time)
|
|
1740
|
+
else:
|
|
1741
|
+
if self._dev_id != prof_device_id and self._rank_id != prof_rank_id:
|
|
1742
|
+
logger.warning("Find profiling find job path %s, but not current training device id. "
|
|
1743
|
+
"Current training rank id %s, but job path rank id: %s, "
|
|
1744
|
+
"profiler will ignore this job dir.", job_dir, self._rank_id, prof_rank_id)
|
|
1745
|
+
continue
|
|
1593
1746
|
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1747
|
+
if job_start_time < self._start_time:
|
|
1748
|
+
logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
|
|
1749
|
+
"start_time(%d), profiler will ignore this job dir.",
|
|
1750
|
+
job_dir, job_start_time, self._start_time)
|
|
1751
|
+
continue
|
|
1599
1752
|
|
|
1600
|
-
|
|
1601
|
-
job_id = os.path.join(dir_name, device_dir[0])
|
|
1602
|
-
else:
|
|
1603
|
-
job_id = dir_name
|
|
1753
|
+
job_id = os.path.join(dir_name, device_dir[0])
|
|
1604
1754
|
break
|
|
1605
1755
|
|
|
1606
1756
|
if not job_id:
|
|
@@ -1700,15 +1850,21 @@ class Profiler:
|
|
|
1700
1850
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1701
1851
|
else:
|
|
1702
1852
|
output_path = kwargs.pop("output_path")
|
|
1853
|
+
if not isinstance(output_path, str):
|
|
1854
|
+
logger.warning(
|
|
1855
|
+
f"The output_path must be a string, but got type {type(output_path)}, it will be set to 'data'.")
|
|
1856
|
+
output_path = "data"
|
|
1703
1857
|
self._output_path = validate_and_normalize_path(output_path)
|
|
1704
1858
|
|
|
1705
1859
|
self._output_path = os.path.join(self._output_path, "profiler")
|
|
1706
1860
|
if not os.path.exists(self._output_path):
|
|
1707
|
-
os.makedirs(self._output_path, exist_ok=True)
|
|
1708
|
-
os.chmod(self._output_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1861
|
+
os.makedirs(self._output_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1709
1862
|
else:
|
|
1710
1863
|
logger.warning("The target dir already exists. "
|
|
1711
1864
|
"There may be some old profiling data, and they will be rewritten in the end.")
|
|
1865
|
+
self._framework_path = os.path.join(self._output_path, "FRAMEWORK")
|
|
1866
|
+
if not os.path.exists(self._framework_path):
|
|
1867
|
+
os.makedirs(self._framework_path, exist_ok=True, mode=stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
|
|
1712
1868
|
|
|
1713
1869
|
def _parser_kwargs(self, kwargs):
|
|
1714
1870
|
"""Parse kwargs vale."""
|
|
@@ -1729,11 +1885,11 @@ class Profiler:
|
|
|
1729
1885
|
f"but got type {type(self._op_time)}, it will be set to True.")
|
|
1730
1886
|
self._op_time = True
|
|
1731
1887
|
|
|
1732
|
-
self._data_process = kwargs.pop("data_process",
|
|
1888
|
+
self._data_process = kwargs.pop("data_process", False)
|
|
1733
1889
|
if not isinstance(self._data_process, bool):
|
|
1734
1890
|
logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
|
|
1735
|
-
f"but got type {type(self._data_process)}, it will be set to
|
|
1736
|
-
self._data_process =
|
|
1891
|
+
f"but got type {type(self._data_process)}, it will be set to False.")
|
|
1892
|
+
self._data_process = False
|
|
1737
1893
|
|
|
1738
1894
|
timeline_limit = kwargs.pop("timeline_limit", 500)
|
|
1739
1895
|
if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
|
|
@@ -1745,55 +1901,22 @@ class Profiler:
|
|
|
1745
1901
|
"[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
|
|
1746
1902
|
timeline_limit = 500
|
|
1747
1903
|
self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
|
|
1748
|
-
self._profile_framework = kwargs.pop("profile_framework",
|
|
1749
|
-
if self._profile_framework not in ["
|
|
1750
|
-
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of [
|
|
1751
|
-
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to
|
|
1752
|
-
self._profile_framework =
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1758
|
-
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
|
|
1768
|
-
host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
|
|
1769
|
-
timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
|
|
1770
|
-
memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
|
|
1771
|
-
dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
|
|
1772
|
-
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
1773
|
-
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
1774
|
-
logger.info("Profile HostInfo finished.")
|
|
1775
|
-
|
|
1776
|
-
|
|
1777
|
-
def _offline_parse(offline_path):
|
|
1778
|
-
"""Parse data in abnormal scenario, only support for host_info at present."""
|
|
1779
|
-
logger.info("Profiling HostInfo offline start.")
|
|
1780
|
-
host_dir = os.path.join(offline_path, 'profiler', 'host_info')
|
|
1781
|
-
host_dir = validate_and_normalize_path(host_dir)
|
|
1782
|
-
if not os.path.exists(host_dir):
|
|
1783
|
-
logger.error("Host info directory: %s not exist.", host_dir)
|
|
1784
|
-
return
|
|
1785
|
-
files = os.listdir(host_dir)
|
|
1786
|
-
for file in files:
|
|
1787
|
-
if not file.startswith("host_info_") or not file.endswith(".csv"):
|
|
1788
|
-
continue
|
|
1789
|
-
rank_id = file.split('_')[-1].split('.')[0]
|
|
1790
|
-
if not rank_id.isdigit():
|
|
1791
|
-
logger.info("Cannot get rank_id from file: %s, skip it", file)
|
|
1792
|
-
return
|
|
1793
|
-
host_info_file = os.path.join(host_dir, file)
|
|
1794
|
-
timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
|
|
1795
|
-
memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
|
|
1796
|
-
dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
|
|
1797
|
-
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
1798
|
-
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
1799
|
-
logger.info("Profile HostInfo offline finished.")
|
|
1904
|
+
self._profile_framework = kwargs.pop("profile_framework", None)
|
|
1905
|
+
if self._profile_framework not in ["time", "all", None]:
|
|
1906
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ["
|
|
1907
|
+
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to None.")
|
|
1908
|
+
self._profile_framework = None
|
|
1909
|
+
|
|
1910
|
+
if not isinstance(self._data_simplification, bool):
|
|
1911
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter data_simplification must be bool, "
|
|
1912
|
+
f"but got type {type(self._data_simplification)}, it will be set to True.")
|
|
1913
|
+
self._data_simplification = True
|
|
1914
|
+
|
|
1915
|
+
self._with_stack = kwargs.pop("with_stack", False)
|
|
1916
|
+
if not isinstance(self._with_stack, bool):
|
|
1917
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter with_stack must be bool, but got "
|
|
1918
|
+
f"type {type(self._with_stack)}, it will be set to False.")
|
|
1919
|
+
self._with_stack = False
|
|
1920
|
+
if self._with_stack and self._profile_framework not in ["time", "all"]:
|
|
1921
|
+
logger.warning("When using the with_stack parameter, the profile_framework parameter must be enabled.")
|
|
1922
|
+
self._with_stack = False
|