mindspore 2.0.0rc1__cp38-cp38-manylinux1_x86_64.whl → 2.2.0__cp38-cp38-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-x86_64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu10.1/libnvidia_collective.so +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libnvidia_collective.so +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libnvidia_collective.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +647 -818
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2020-
|
|
1
|
+
# Copyright 2020-2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -19,38 +19,41 @@ import time
|
|
|
19
19
|
import json
|
|
20
20
|
import glob
|
|
21
21
|
import subprocess
|
|
22
|
+
import csv
|
|
22
23
|
from enum import Enum
|
|
24
|
+
import numpy as np
|
|
23
25
|
|
|
24
26
|
from mindspore import log as logger, context
|
|
25
|
-
from mindspore.
|
|
27
|
+
from mindspore.context import get_auto_parallel_context
|
|
28
|
+
from mindspore.communication.management import GlobalComm, get_rank, get_group_size, get_local_rank
|
|
26
29
|
import mindspore._c_expression as c_expression
|
|
27
30
|
import mindspore._c_dataengine as cde
|
|
28
31
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerFileNotFoundException, \
|
|
29
32
|
ProfilerIOException, ProfilerException, ProfilerRawFileException
|
|
30
33
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerPathErrorException
|
|
31
34
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerDirNotFoundException
|
|
32
|
-
from mindspore.profiler.common.util import get_file_path
|
|
33
|
-
from mindspore.profiler.common.validator.validate_path import
|
|
34
|
-
|
|
35
|
-
from mindspore.profiler.parser.aicpu_data_parser import DataPreProcessParser
|
|
36
|
-
from mindspore.profiler.parser.framework_parser import FrameworkParser, GpuFrameWorkParser, DynamicFrameWorkParser
|
|
37
|
-
from mindspore.profiler.parser.hwts_log_parser import HWTSLogParser
|
|
35
|
+
from mindspore.profiler.common.util import get_file_path
|
|
36
|
+
from mindspore.profiler.common.validator.validate_path import validate_and_normalize_path
|
|
37
|
+
from mindspore.profiler.parser.framework_parser import GpuFrameWorkParser, DynamicFrameWorkParser
|
|
38
38
|
from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
39
39
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
40
40
|
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
41
41
|
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
42
42
|
from mindspore.profiler.parser.minddata_parser import MinddataParser
|
|
43
43
|
from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
|
|
44
|
-
from mindspore.profiler.parser.flops_parser import FlopsParser
|
|
45
44
|
from mindspore.profiler.parser.minddata_pipeline_parser import \
|
|
46
45
|
MinddataPipelineParser
|
|
47
|
-
from mindspore.profiler.parser.optime_parser import OPComputeTimeParser
|
|
48
46
|
from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, AscendStepTraceParser
|
|
49
|
-
from mindspore.profiler.parser.hccl_parser import HcclParser
|
|
50
|
-
from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
|
|
51
47
|
from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
52
48
|
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
53
49
|
from mindspore.common.api import _pynative_executor
|
|
50
|
+
from mindspore.profiler.parser.ascend_msprof_exporter import AscendMsprofExporter
|
|
51
|
+
from mindspore.profiler.parser.ascend_msprof_generator import AscendMsprofDataGenerator
|
|
52
|
+
from mindspore.profiler.parser.ascend_fpbp_generator import AscendFPBPGenerator
|
|
53
|
+
from mindspore.profiler.parser.ascend_op_generator import AscendOPGenerator
|
|
54
|
+
from mindspore.profiler.parser.ascend_steptrace_generator import AscendStepTraceGenerator
|
|
55
|
+
from mindspore.profiler.parser.ascend_flops_generator import AscendFlopsGenerator
|
|
56
|
+
from mindspore.profiler.parser.ascend_hccl_generator import AscendHCCLGenerator
|
|
54
57
|
|
|
55
58
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
56
59
|
|
|
@@ -67,14 +70,22 @@ AICORE_METRICS_DICT = {
|
|
|
67
70
|
|
|
68
71
|
class DeviceSupportParam(Enum):
|
|
69
72
|
"""The device target enum."""
|
|
70
|
-
CPU = ['start', 'start_profile', 'output_path', 'timeline_limit']
|
|
71
|
-
GPU = [
|
|
72
|
-
|
|
73
|
-
|
|
73
|
+
CPU = ['start', 'start_profile', 'output_path', 'timeline_limit', 'profile_framework', 'op_time']
|
|
74
|
+
GPU = [
|
|
75
|
+
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'sync_enable', 'op_time',
|
|
76
|
+
'profile_framework'
|
|
77
|
+
]
|
|
78
|
+
ASCEND = [
|
|
79
|
+
'start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
80
|
+
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'op_time', 'ascend_job_id',
|
|
81
|
+
'profile_framework'
|
|
82
|
+
]
|
|
74
83
|
|
|
75
84
|
|
|
76
|
-
ALWAYS_VALID_PARAM = [
|
|
77
|
-
|
|
85
|
+
ALWAYS_VALID_PARAM = [
|
|
86
|
+
'start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
|
|
87
|
+
'ascend_job_id', 'op_time', 'profile_framework'
|
|
88
|
+
]
|
|
78
89
|
|
|
79
90
|
|
|
80
91
|
def _environment_check():
|
|
@@ -82,30 +93,241 @@ def _environment_check():
|
|
|
82
93
|
raise RuntimeError("Profiler is not supported when MindSpore is compiled with \'-s on\'.")
|
|
83
94
|
|
|
84
95
|
|
|
96
|
+
class ExecutionCalculator:
|
|
97
|
+
"""Calculate the average execution time and counts for each stage."""
|
|
98
|
+
|
|
99
|
+
def __init__(self, event, stage, custom_info):
|
|
100
|
+
self.event = event
|
|
101
|
+
self.stage = stage
|
|
102
|
+
self.custom_info = custom_info
|
|
103
|
+
self.count = 0
|
|
104
|
+
self.average_execution = 0
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _calculate_dataset_item(row, execution_time_map, ts_map):
|
|
108
|
+
"""Calculate dataset execution time for one row."""
|
|
109
|
+
start_end = row['start_end']
|
|
110
|
+
event = row['event']
|
|
111
|
+
stage = row['stage']
|
|
112
|
+
custom_info = row['custom_info']
|
|
113
|
+
event_stage_tid_pid = event + '_' + stage + '_' + row['tid'] + '_' + row['pid']
|
|
114
|
+
if start_end == '1' and event_stage_tid_pid in ts_map:
|
|
115
|
+
title = event + '::' + stage + '::' + custom_info
|
|
116
|
+
ts_end = int(row['time_stamp(us)'])
|
|
117
|
+
ts = ts_map[event_stage_tid_pid]
|
|
118
|
+
dur = ts_end - ts
|
|
119
|
+
if title not in execution_time_map:
|
|
120
|
+
execution_time_map[title] = ExecutionCalculator(event=event, stage=stage, custom_info=custom_info)
|
|
121
|
+
execution_time_map[title].count += 1
|
|
122
|
+
if execution_time_map[title].count != 0:
|
|
123
|
+
execution_time_map[title].average_execution += \
|
|
124
|
+
(dur - execution_time_map[title].average_execution) / execution_time_map[title].count
|
|
125
|
+
del ts_map[event_stage_tid_pid]
|
|
126
|
+
elif start_end == '0':
|
|
127
|
+
ts = int(row['time_stamp(us)'])
|
|
128
|
+
ts_map[event_stage_tid_pid] = ts
|
|
129
|
+
elif start_end == '2':
|
|
130
|
+
logger.info("It is a instant event, skip to calculate execution time. item: %s.", row)
|
|
131
|
+
else:
|
|
132
|
+
logger.warning("Can not map the start time for item: %s.", row)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _calculate_dataset_execution_time(input_file, output_file):
|
|
136
|
+
r"""
|
|
137
|
+
Parse the host info into timeline file, so as to show on UI.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
input_file: the original host_info file, in csv format.
|
|
141
|
+
output_file: the output file, in csv format.
|
|
142
|
+
"""
|
|
143
|
+
input_file = validate_and_normalize_path(input_file)
|
|
144
|
+
# execution_time_map is used to store the ExecutionCalculator for each stage.
|
|
145
|
+
execution_time_map = {}
|
|
146
|
+
# ts_map is used to store the start time of each event_stage_tid_pid.
|
|
147
|
+
ts_map = {}
|
|
148
|
+
with open(input_file, 'r') as f:
|
|
149
|
+
for row in csv.DictReader(f):
|
|
150
|
+
try:
|
|
151
|
+
module_name = row['module_name']
|
|
152
|
+
if module_name != 'Dataset':
|
|
153
|
+
continue
|
|
154
|
+
_calculate_dataset_item(row, execution_time_map, ts_map)
|
|
155
|
+
except KeyError as e:
|
|
156
|
+
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
157
|
+
continue
|
|
158
|
+
if ts_map:
|
|
159
|
+
logger.warning("Only start time is record for these items:")
|
|
160
|
+
for k, v in ts_map.items():
|
|
161
|
+
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
162
|
+
output_file = validate_and_normalize_path(output_file)
|
|
163
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
|
|
164
|
+
modes = stat.S_IWUSR | stat.S_IRUSR
|
|
165
|
+
with os.fdopen(os.open(output_file, flags, modes), 'w') as f:
|
|
166
|
+
csv_writer = csv.writer(f)
|
|
167
|
+
csv_writer.writerow(['Operation', 'Stage', 'Occurrences', 'Avg. time (us)', 'Custom Info'])
|
|
168
|
+
for _, v in execution_time_map.items():
|
|
169
|
+
csv_writer.writerow([v.event, v.stage, v.count, v.average_execution, v.custom_info])
|
|
170
|
+
os.chmod(output_file, modes)
|
|
171
|
+
logger.info('Successfully calculate the execution time and write it to file: %s.', output_file)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _extract_timeline_item(row, time_line, ts_map):
|
|
175
|
+
"""Process one row, try to extract a timeline item."""
|
|
176
|
+
start_end = row['start_end']
|
|
177
|
+
event_stage_tid_pid = row['event'] + '_' + row['stage'] + '_' + row['tid'] + '_' + row['pid']
|
|
178
|
+
# map start and end, put the mapped event into timeline.
|
|
179
|
+
if start_end == '1' and event_stage_tid_pid in ts_map:
|
|
180
|
+
title = row['event'] + '::' + row['stage']
|
|
181
|
+
event = {'name': title, 'cat': row['module_name']}
|
|
182
|
+
ts_end = int(row['time_stamp(us)'])
|
|
183
|
+
ts = ts_map[event_stage_tid_pid]
|
|
184
|
+
event['ts'] = ts
|
|
185
|
+
event['dur'] = ts_end - ts
|
|
186
|
+
event['ph'] = 'X'
|
|
187
|
+
event['pid'] = row['pid']
|
|
188
|
+
event['tid'] = row['tid']
|
|
189
|
+
event['args'] = {'parent_pid': row['parent_pid']}
|
|
190
|
+
time_line.append(event)
|
|
191
|
+
del ts_map[event_stage_tid_pid]
|
|
192
|
+
elif start_end == '0':
|
|
193
|
+
ts = int(row['time_stamp(us)'])
|
|
194
|
+
ts_map[event_stage_tid_pid] = ts
|
|
195
|
+
# Put the instance event into timeline.
|
|
196
|
+
elif start_end == '2':
|
|
197
|
+
title = row['event'] + '::' + row['stage']
|
|
198
|
+
event = {
|
|
199
|
+
'name': title, 'cat': row['module_name'], 'ts': int(row['time_stamp(us)']), 'ph': 'i',
|
|
200
|
+
'pid': row['pid'], 'tid': row['tid'], 'args': {'parent_pid': row['parent_pid']}
|
|
201
|
+
}
|
|
202
|
+
time_line.append(event)
|
|
203
|
+
else:
|
|
204
|
+
logger.warning("Can not map the start time for item: %s.", row)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _parse_host_info(input_file, output_timeline_file, output_memory_file, is_develop_user=True):
|
|
208
|
+
r"""
|
|
209
|
+
Parse the host info into timeline file, so as to show on UI.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
input_file: the original host_info file, in csv format.
|
|
213
|
+
output_timeline_file: the output timeline file, in json format.
|
|
214
|
+
output_memory_file: the output memory_usage file, in csv format.
|
|
215
|
+
is_develop_user: some data only shown to develop users, other users no need to analyse it.
|
|
216
|
+
"""
|
|
217
|
+
input_file = validate_and_normalize_path(input_file)
|
|
218
|
+
time_line = []
|
|
219
|
+
# ts_map is used to store the start time of each event_stage_tid_pid
|
|
220
|
+
ts_map = {}
|
|
221
|
+
memory_header = [
|
|
222
|
+
'tid', 'pid', 'parent_pid', 'module_name', 'event', 'stage', 'level', 'start_end', 'custom_info',
|
|
223
|
+
'memory_usage(kB)', 'time_stamp(us)'
|
|
224
|
+
]
|
|
225
|
+
memory_info = []
|
|
226
|
+
with open(input_file, 'r') as f:
|
|
227
|
+
for row in csv.DictReader(f):
|
|
228
|
+
try:
|
|
229
|
+
level = row['level']
|
|
230
|
+
if level == '0' and not is_develop_user:
|
|
231
|
+
continue
|
|
232
|
+
if int(row['time_stamp(us)']) > 0:
|
|
233
|
+
_extract_timeline_item(row, time_line, ts_map)
|
|
234
|
+
if int(row['memory_usage(kB)']) > 0:
|
|
235
|
+
memory_info.append(row)
|
|
236
|
+
except KeyError as e:
|
|
237
|
+
logger.error("Error occur when analyse line: %s, Details is: %s", row, e)
|
|
238
|
+
continue
|
|
239
|
+
if memory_info:
|
|
240
|
+
with os.fdopen(os.open(output_memory_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as csv_file:
|
|
241
|
+
csv_writer = csv.DictWriter(csv_file, fieldnames=memory_header)
|
|
242
|
+
csv_writer.writeheader()
|
|
243
|
+
for item in memory_info:
|
|
244
|
+
csv_writer.writerow(item)
|
|
245
|
+
os.chmod(output_memory_file, stat.S_IREAD | stat.S_IWRITE)
|
|
246
|
+
else:
|
|
247
|
+
logger.warning("No memory_usage is record in file: %s", input_file)
|
|
248
|
+
|
|
249
|
+
if ts_map:
|
|
250
|
+
logger.warning("Only start time is record for these items:")
|
|
251
|
+
for k, v in ts_map.items():
|
|
252
|
+
logger.warning("event_stage_tid_pid: %s, time: %d us.", k, v)
|
|
253
|
+
last_dash = k.rfind('_')
|
|
254
|
+
if last_dash == -1:
|
|
255
|
+
logger.error("Can't find pid in the event_stage_tid_pid string: %s", k)
|
|
256
|
+
continue
|
|
257
|
+
second_last_dash = k.rfind('_', 0, last_dash - 1)
|
|
258
|
+
if second_last_dash == -1:
|
|
259
|
+
logger.error("Can't find tid in the event_stage_tid_pid string: %s", k)
|
|
260
|
+
continue
|
|
261
|
+
pid = k[last_dash + 1:]
|
|
262
|
+
tid = k[second_last_dash + 1: last_dash]
|
|
263
|
+
title = k[:second_last_dash]
|
|
264
|
+
unfinished_timeline = {'name': title, 'pid': pid, 'tid': tid, 'ph': 'B', 'ts': int(v)}
|
|
265
|
+
time_line.append(unfinished_timeline)
|
|
266
|
+
|
|
267
|
+
if time_line:
|
|
268
|
+
timeline_file = validate_and_normalize_path(output_timeline_file)
|
|
269
|
+
with os.fdopen(os.open(timeline_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
270
|
+
json.dump(time_line, json_file)
|
|
271
|
+
os.chmod(timeline_file, stat.S_IREAD | stat.S_IWRITE)
|
|
272
|
+
else:
|
|
273
|
+
logger.warning("No valid time_stamp is record in file: %s", input_file)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _ascend_graph_msprof_generator(source_path, model_iteration_dict):
|
|
277
|
+
try:
|
|
278
|
+
msprof_exporter = AscendMsprofExporter(source_path)
|
|
279
|
+
msprof_exporter.export(model_iteration_dict)
|
|
280
|
+
except ProfilerException as err:
|
|
281
|
+
logger.warning(err.message)
|
|
282
|
+
finally:
|
|
283
|
+
pass
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
def _ascend_graph_msprof_analyse(source_path):
|
|
287
|
+
"""
|
|
288
|
+
Ascend graph model msprof data analyse.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
list[obj]: The list is : df_op_summary, df_op_statistic, df_step_trace
|
|
292
|
+
"""
|
|
293
|
+
df_op_summary = []
|
|
294
|
+
df_op_statistic = []
|
|
295
|
+
df_step_trace = []
|
|
296
|
+
try:
|
|
297
|
+
msprof_analyser = AscendMsprofDataGenerator(os.path.join(source_path, 'summary'))
|
|
298
|
+
df_op_summary, df_op_statistic, df_step_trace = msprof_analyser.parse()
|
|
299
|
+
except ProfilerException as err:
|
|
300
|
+
logger.warning(err.message)
|
|
301
|
+
finally:
|
|
302
|
+
pass
|
|
303
|
+
return df_op_summary, df_op_statistic, df_step_trace
|
|
304
|
+
|
|
305
|
+
|
|
85
306
|
class Profiler:
|
|
86
307
|
r"""
|
|
87
308
|
This class to enable the profiling of MindSpore neural networks.
|
|
88
309
|
MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
|
|
89
310
|
and use Profiler.analyse() to stop profiling and analyse the results.
|
|
90
|
-
Users can visualize the results using the
|
|
311
|
+
Users can visualize the results using the `MindSpore Insight
|
|
312
|
+
<https://www.mindspore.cn/mindinsight/docs/en/r2.2/index.html>`_ tool.
|
|
91
313
|
Now, Profiler supports AICORE operator, AICPU operator, HostCPU operator, memory,
|
|
92
314
|
correspondence, cluster, etc data analysis.
|
|
93
315
|
|
|
94
316
|
Args:
|
|
95
|
-
output_path (str, optional): Output data path. Default: "./data".
|
|
96
|
-
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True
|
|
317
|
+
output_path (str, optional): Output data path. Default: ``"./data"`` .
|
|
318
|
+
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: ``True``.
|
|
97
319
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
98
320
|
a multi devices training,collect when True. Setting this parameter has no effect during single device
|
|
99
|
-
training. When using this parameter, `op_time` must be set to True. Default: False.
|
|
100
|
-
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
|
|
101
|
-
When using this parameter, `op_time` must be set to True. Default: False.
|
|
321
|
+
training. When using this parameter, `op_time` must be set to ``True`` . Default: ``False`` .
|
|
322
|
+
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when ``True`` .
|
|
323
|
+
When using this parameter, `op_time` must be set to True. Default: ``False`` .
|
|
102
324
|
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
103
|
-
Default value:
|
|
325
|
+
Default value: ``True`` .
|
|
104
326
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
105
|
-
data collection based on conditions. Default: True.
|
|
327
|
+
data collection based on conditions. Default: ``True`` .
|
|
106
328
|
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
107
|
-
parameter, `op_time` must be set to True, and the value must be in [-1, 0, 1, 2, 3, 4, 5],
|
|
108
|
-
data items contained in each metric are as follows:
|
|
329
|
+
parameter, `op_time` must be set to ``True`` , and the value must be in [-1, 0, 1, 2, 3, 4, 5],
|
|
330
|
+
Default: ``0`` , the data items contained in each metric are as follows:
|
|
109
331
|
|
|
110
332
|
- -1: Does not collect AICORE data.
|
|
111
333
|
- 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
|
|
@@ -116,9 +338,10 @@ class Profiler:
|
|
|
116
338
|
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
117
339
|
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
118
340
|
|
|
119
|
-
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
341
|
+
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True.
|
|
342
|
+
Default: ``False`` .
|
|
120
343
|
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
121
|
-
Default: True.
|
|
344
|
+
Default: ``True`` .
|
|
122
345
|
|
|
123
346
|
- True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
|
|
124
347
|
Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
|
|
@@ -126,9 +349,18 @@ class Profiler:
|
|
|
126
349
|
- False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
|
|
127
350
|
This method can reduce the impact of adding profiler on overall training time.
|
|
128
351
|
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
129
|
-
Default value: True.
|
|
130
|
-
timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M).
|
|
131
|
-
parameter, `op_time` must be set to True. Default value: 500.
|
|
352
|
+
Default value: ``True`` .
|
|
353
|
+
timeline_limit (int, optional): (Ascend/GPU) Set the maximum storage size of the timeline file (unit M).
|
|
354
|
+
When using this parameter, `op_time` must be set to True. Default value: ``500`` .
|
|
355
|
+
profile_framework (str, optional): (Ascend/GPU) The host information to collect, it must be one of
|
|
356
|
+
["all", "time", "memory", None], When is not set to None, a subdirectory host_info will be generated in the
|
|
357
|
+
specified profiler directory, which stores the collected memory and time files on the Host side.
|
|
358
|
+
Default: "all".
|
|
359
|
+
|
|
360
|
+
- "all": Record both host timestamp and host memory usage.
|
|
361
|
+
- "time": Only record host timestamp.
|
|
362
|
+
- "memory": Only record host memory usage.
|
|
363
|
+
- None: Not record host information.
|
|
132
364
|
|
|
133
365
|
Raises:
|
|
134
366
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
@@ -144,7 +376,6 @@ class Profiler:
|
|
|
144
376
|
>>> import mindspore.dataset as ds
|
|
145
377
|
>>> from mindspore import Profiler
|
|
146
378
|
>>>
|
|
147
|
-
>>>
|
|
148
379
|
>>> class Net(nn.Cell):
|
|
149
380
|
... def __init__(self):
|
|
150
381
|
... super(Net, self).__init__()
|
|
@@ -160,7 +391,7 @@ class Profiler:
|
|
|
160
391
|
... optimizer = nn.Momentum(net.trainable_params(), 1, 0.9)
|
|
161
392
|
... loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
162
393
|
... data = ds.GeneratorDataset(generator, ["data", "label"])
|
|
163
|
-
... model = ms.Model(net, loss, optimizer)
|
|
394
|
+
... model = ms.train.Model(net, loss, optimizer)
|
|
164
395
|
... model.train(1, data)
|
|
165
396
|
>>>
|
|
166
397
|
>>> if __name__ == '__main__':
|
|
@@ -219,9 +450,13 @@ class Profiler:
|
|
|
219
450
|
self._sync_enable = True
|
|
220
451
|
self._stop_time = 0
|
|
221
452
|
self._dynamic_status = False
|
|
453
|
+
self._model_iteration_dict = None
|
|
454
|
+
self._profile_framework = "all"
|
|
222
455
|
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
223
456
|
if self._msprof_enable:
|
|
224
457
|
return
|
|
458
|
+
self._start_time = int(time.time() * 1000000)
|
|
459
|
+
logger.info("Profiling: start time: %d", self._start_time)
|
|
225
460
|
if kwargs.get("env_enable"):
|
|
226
461
|
self._profiler_init(kwargs)
|
|
227
462
|
return
|
|
@@ -268,6 +503,25 @@ class Profiler:
|
|
|
268
503
|
|
|
269
504
|
return job_start_time
|
|
270
505
|
|
|
506
|
+
@staticmethod
|
|
507
|
+
def _parse_info_json(info_file):
|
|
508
|
+
"""
|
|
509
|
+
Parse info log file, get the rank id and device id of the job.
|
|
510
|
+
Args:
|
|
511
|
+
input_file (str): The file path of the parse info log file.
|
|
512
|
+
|
|
513
|
+
Returns:
|
|
514
|
+
rank id, device id
|
|
515
|
+
"""
|
|
516
|
+
with open(info_file, "r") as f:
|
|
517
|
+
info_dict = json.load(f)
|
|
518
|
+
|
|
519
|
+
rank_id = info_dict.get("rank_id", 0)
|
|
520
|
+
dev_info = info_dict.get("DeviceInfo", [])
|
|
521
|
+
dev_id = dev_info[0].get("id", -1)
|
|
522
|
+
|
|
523
|
+
return str(rank_id), str(dev_id)
|
|
524
|
+
|
|
271
525
|
def op_analyse(self, op_name, device_id=None):
|
|
272
526
|
"""
|
|
273
527
|
Profiler users can use this interface to obtain operator performance data.
|
|
@@ -276,11 +530,11 @@ class Profiler:
|
|
|
276
530
|
op_name (str or list): The primitive operator name to query.
|
|
277
531
|
device_id (int, optional): ID of the target device. This parameter is optional during network training or
|
|
278
532
|
inference, and users can use device_id parameter to specify which card operator performance data to
|
|
279
|
-
parse. If this interface is used for offline data parsing, Default: 0.
|
|
533
|
+
parse. If this interface is used for offline data parsing, Default: ``0`` .
|
|
280
534
|
|
|
281
535
|
Raises:
|
|
282
|
-
TypeError: If the op_name parameter type is incorrect.
|
|
283
|
-
TypeError: If the device_id parameter type is incorrect.
|
|
536
|
+
TypeError: If the `op_name` parameter type is incorrect.
|
|
537
|
+
TypeError: If the `device_id` parameter type is incorrect.
|
|
284
538
|
RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
|
|
285
539
|
|
|
286
540
|
Supported Platforms:
|
|
@@ -288,24 +542,25 @@ class Profiler:
|
|
|
288
542
|
|
|
289
543
|
Examples:
|
|
290
544
|
>>> from mindspore import Profiler
|
|
545
|
+
>>> from mindspore import nn
|
|
546
|
+
>>> from mindspore import Model
|
|
547
|
+
>>> # Profiler init.
|
|
548
|
+
>>> profiler = Profiler()
|
|
549
|
+
>>> # Train Model or eval Model, taking LeNet5 as an example.
|
|
550
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py
|
|
551
|
+
>>> net = LeNet5()
|
|
552
|
+
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
|
|
553
|
+
>>> loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True)
|
|
554
|
+
>>> # Create the dataset taking MNIST as an example.
|
|
555
|
+
>>> # Refer to https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/mnist.py
|
|
556
|
+
>>> dataloader = create_dataset()
|
|
557
|
+
>>> model = Model(net, loss, optimizer)
|
|
558
|
+
>>> model.train(5, dataloader, dataset_sink_mode=False)
|
|
291
559
|
>>>
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
...
|
|
295
|
-
... # Train Model or eval Model.
|
|
296
|
-
... net = Net()
|
|
297
|
-
... train(net)
|
|
298
|
-
...
|
|
299
|
-
... # Profiler end
|
|
300
|
-
... profiler.analyse()
|
|
301
|
-
...
|
|
302
|
-
... profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
|
|
303
|
-
...
|
|
304
|
-
>>> from mindspore import Profiler
|
|
560
|
+
>>> # Profiler end
|
|
561
|
+
>>> profiler.analyse()
|
|
305
562
|
>>>
|
|
306
|
-
|
|
307
|
-
... profiler = Profiler(output_path="my_profiler_path")
|
|
308
|
-
... profiler.op_analyse(op_name="Conv2D")
|
|
563
|
+
>>> profiler.op_analyse(op_name=["BiasAdd", "Conv2D"])
|
|
309
564
|
"""
|
|
310
565
|
if self._device_target == 'ascend':
|
|
311
566
|
raise RuntimeError("The Interface 'Profiler.op_analyse()' is not supported on Ascend currently.")
|
|
@@ -332,10 +587,33 @@ class Profiler:
|
|
|
332
587
|
return message
|
|
333
588
|
return op_info
|
|
334
589
|
|
|
335
|
-
def analyse(self):
|
|
590
|
+
def analyse(self, offline_path=None):
|
|
336
591
|
"""
|
|
337
592
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
593
|
+
|
|
594
|
+
Args:
|
|
595
|
+
offline_path (Union[str, None], optional): The data path which need to be analysed with offline mode.
|
|
596
|
+
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
597
|
+
for online mode. Default: ``None``.
|
|
598
|
+
"""
|
|
599
|
+
self._analyse(offline_path=offline_path)
|
|
600
|
+
|
|
601
|
+
def _analyse(self, offline_path=None, model_iteration_dict=None):
|
|
338
602
|
"""
|
|
603
|
+
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
604
|
+
|
|
605
|
+
Args:
|
|
606
|
+
offline_path (Union[str, None], optional): The data path which need to be analysed with offline mode.
|
|
607
|
+
Offline mode isused in abnormal exit scenario. This parameter should be set to ``None``
|
|
608
|
+
for online mode. Default: ``None``.
|
|
609
|
+
model_iteration_dict: Dictionary with model id as the key and iteration id as the value, Default: ``None``.
|
|
610
|
+
"""
|
|
611
|
+
self._model_iteration_dict = model_iteration_dict
|
|
612
|
+
if offline_path:
|
|
613
|
+
if self._is_offline_parser():
|
|
614
|
+
self._ascend_graph_analyse()
|
|
615
|
+
_offline_parse(offline_path)
|
|
616
|
+
return
|
|
339
617
|
if self._msprof_enable:
|
|
340
618
|
return
|
|
341
619
|
|
|
@@ -360,8 +638,19 @@ class Profiler:
|
|
|
360
638
|
|
|
361
639
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
362
640
|
self._ascend_analyse()
|
|
641
|
+
if self._profile_framework:
|
|
642
|
+
if self._device_target != DeviceTarget.CPU.value:
|
|
643
|
+
self._host_info_analyse()
|
|
644
|
+
else:
|
|
645
|
+
logger.warning("The parameter 'profile_framework' is not support for CPU, so there no host_info"
|
|
646
|
+
" directory in the output path.")
|
|
363
647
|
logger.info("Profiling: all the data have been analyzed.")
|
|
364
648
|
self._init_profiler_info()
|
|
649
|
+
self._is_support_step_info_collect()
|
|
650
|
+
parallel_mode = get_auto_parallel_context("parallel_mode")
|
|
651
|
+
stage_num = get_auto_parallel_context("pipeline_stages")
|
|
652
|
+
|
|
653
|
+
ProfilerInfo.set_parallel_info(parallel_mode, stage_num)
|
|
365
654
|
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
366
655
|
ProfilerInfo.set_rank_size(self._rank_size)
|
|
367
656
|
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
@@ -374,37 +663,36 @@ class Profiler:
|
|
|
374
663
|
Raises:
|
|
375
664
|
RuntimeError: If the profiler has already started.
|
|
376
665
|
RuntimeError: If MD profiling has stopped, repeated start action is not supported.
|
|
377
|
-
RuntimeError: If the start_profile parameter is not set or is set to True
|
|
666
|
+
RuntimeError: If the `start_profile` parameter is not set or is set to ``True``.
|
|
378
667
|
|
|
379
668
|
Examples:
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
669
|
+
>>> from mindspore.train import Callback
|
|
670
|
+
>>> from mindspore import Profiler
|
|
671
|
+
>>> class StopAtStep(Callback):
|
|
672
|
+
... def __init__(self, start_step, stop_step):
|
|
673
|
+
... super(StopAtStep, self).__init__()
|
|
674
|
+
... self.start_step = start_step
|
|
675
|
+
... self.stop_step = stop_step
|
|
676
|
+
... self.profiler = Profiler(start_profile=False)
|
|
677
|
+
...
|
|
678
|
+
... def step_begin(self, run_context):
|
|
679
|
+
... cb_params = run_context.original_args()
|
|
680
|
+
... step_num = cb_params.cur_step_num
|
|
681
|
+
... if step_num == self.start_step:
|
|
682
|
+
... self.profiler.start()
|
|
683
|
+
...
|
|
684
|
+
... def step_end(self, run_context):
|
|
685
|
+
... cb_params = run_context.original_args()
|
|
686
|
+
... step_num = cb_params.cur_step_num
|
|
687
|
+
... if step_num == self.stop_step:
|
|
688
|
+
... self.profiler.stop()
|
|
689
|
+
...
|
|
690
|
+
... def end(self, run_context):
|
|
691
|
+
... self.profiler.analyse()
|
|
401
692
|
"""
|
|
402
693
|
if self._msprof_enable:
|
|
403
694
|
return
|
|
404
695
|
|
|
405
|
-
self._start_time = int(time.time() * 1000000)
|
|
406
|
-
logger.info("Profiling: start time: %d", self._start_time)
|
|
407
|
-
|
|
408
696
|
if not self._has_started:
|
|
409
697
|
if not self._has_started_twice:
|
|
410
698
|
self._has_started = True
|
|
@@ -421,13 +709,17 @@ class Profiler:
|
|
|
421
709
|
return
|
|
422
710
|
|
|
423
711
|
self._cpu_profiler.step_profiling_enable(True)
|
|
712
|
+
if self._op_time:
|
|
713
|
+
self._cpu_profiler.enable_op_time()
|
|
424
714
|
|
|
425
715
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
426
716
|
if self._data_process:
|
|
427
717
|
self._md_profiler.start()
|
|
428
718
|
self._gpu_profiler.data_process_enable(True)
|
|
429
|
-
if self._op_time:
|
|
719
|
+
if self._profile_framework or self._op_time:
|
|
430
720
|
self._gpu_profiler.step_profiling_enable(True)
|
|
721
|
+
if self._op_time:
|
|
722
|
+
self._gpu_profiler.enable_op_time()
|
|
431
723
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
432
724
|
if self._data_process:
|
|
433
725
|
self._md_profiler.start()
|
|
@@ -442,27 +734,29 @@ class Profiler:
|
|
|
442
734
|
RuntimeError: If the profiler has not started, this function is disabled.
|
|
443
735
|
|
|
444
736
|
Examples:
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
737
|
+
>>> from mindspore.train import Callback
|
|
738
|
+
>>> from mindspore import Profiler
|
|
739
|
+
>>> class StopAtEpoch(Callback):
|
|
740
|
+
... def __init__(self, start_epoch, stop_epoch):
|
|
741
|
+
... super(StopAtEpoch, self).__init__()
|
|
742
|
+
... self.start_epoch = start_epoch
|
|
743
|
+
... self.stop_epoch = stop_epoch
|
|
744
|
+
... self.profiler = Profiler(start_profile=False)
|
|
745
|
+
...
|
|
746
|
+
... def epoch_begin(self, run_context):
|
|
747
|
+
... cb_params = run_context.original_args()
|
|
748
|
+
... epoch_num = cb_params.cur_epoch_num
|
|
749
|
+
... if epoch_num == self.start_epoch:
|
|
750
|
+
... self.profiler.start()
|
|
751
|
+
...
|
|
752
|
+
... def epoch_end(self, run_context):
|
|
753
|
+
... cb_params = run_context.original_args()
|
|
754
|
+
... epoch_num = cb_params.cur_epoch_num
|
|
755
|
+
... if epoch_num == self.stop_epoch:
|
|
756
|
+
... self.profiler.stop()
|
|
757
|
+
...
|
|
758
|
+
... def end(self, run_context):
|
|
759
|
+
... self.profiler.analyse()
|
|
466
760
|
"""
|
|
467
761
|
if self._msprof_enable:
|
|
468
762
|
return
|
|
@@ -506,6 +800,7 @@ class Profiler:
|
|
|
506
800
|
self._profile_communication = options.get('profile_communication')
|
|
507
801
|
self._op_time = options.get('op_time')
|
|
508
802
|
self._device_target = context.get_context("device_target").lower()
|
|
803
|
+
self._profile_framework = options.get('profile_framework', 'all')
|
|
509
804
|
self._profiler_manager = c_expression.ProfilerManager.get_instance()
|
|
510
805
|
self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
|
|
511
806
|
if self._data_process:
|
|
@@ -529,6 +824,10 @@ class Profiler:
|
|
|
529
824
|
"""Complete Profiler initialization according to device_target"""
|
|
530
825
|
profiler_manager = c_expression.ProfilerManager
|
|
531
826
|
self._profiler_manager = profiler_manager.get_instance()
|
|
827
|
+
if self._profile_framework is None:
|
|
828
|
+
self._profiler_manager.set_profile_framework("NULL")
|
|
829
|
+
else:
|
|
830
|
+
self._profiler_manager.set_profile_framework(self._profile_framework)
|
|
532
831
|
if self._device_target:
|
|
533
832
|
cpu_profiler = c_expression.Profiler
|
|
534
833
|
self._cpu_profiler = cpu_profiler.get_instance("CPU")
|
|
@@ -614,13 +913,14 @@ class Profiler:
|
|
|
614
913
|
"hccl": "on" if self._op_time and self._profile_communication else "off",
|
|
615
914
|
"l2_cache": self._l2_cache,
|
|
616
915
|
"parallel_strategy": "on" if self._parallel_strategy else "off",
|
|
617
|
-
"op_time": "on" if self._op_time else "off"
|
|
916
|
+
"op_time": "on" if self._op_time else "off",
|
|
917
|
+
"profile_framework": self._profile_framework
|
|
618
918
|
}
|
|
619
919
|
|
|
620
920
|
return profiling_options
|
|
621
921
|
|
|
622
922
|
def _parse_parameter_for_gpu(self, kwargs):
|
|
623
|
-
"""Parse parameter in
|
|
923
|
+
"""Parse parameter in Profiler when the device target is GPU."""
|
|
624
924
|
self.start_profile = kwargs.pop("start_profile", True)
|
|
625
925
|
if not isinstance(self.start_profile, bool):
|
|
626
926
|
raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
|
|
@@ -632,7 +932,7 @@ class Profiler:
|
|
|
632
932
|
self._sync_enable = True
|
|
633
933
|
|
|
634
934
|
def _parse_parameter_for_ascend(self, kwargs):
|
|
635
|
-
"""Parse parameter in
|
|
935
|
+
"""Parse parameter in Profiler when the device target is Ascend."""
|
|
636
936
|
ascend_job_id = kwargs.pop("ascend_job_id", "")
|
|
637
937
|
self._set_ascend_job_id(ascend_job_id)
|
|
638
938
|
self.start_profile = kwargs.pop("start_profile", True)
|
|
@@ -708,29 +1008,6 @@ class Profiler:
|
|
|
708
1008
|
return bool(self._ascend_job_id)
|
|
709
1009
|
return False
|
|
710
1010
|
|
|
711
|
-
def _ascend_pynative_analyse(self):
|
|
712
|
-
"""Collect and analyse ascend pynative mode performance data."""
|
|
713
|
-
self._ascend_profiler.finalize()
|
|
714
|
-
op_intermediate_parser = OPIntermediateParser(self._output_path, self._rank_id)
|
|
715
|
-
op_intermediate_parser.parser_pynative_op_type()
|
|
716
|
-
op_intermediate_parser.parser_pynative_op_intermediate_detail()
|
|
717
|
-
|
|
718
|
-
job_id = self._get_profiling_job_id()
|
|
719
|
-
logger.info("Profiling: job id is %s ", job_id)
|
|
720
|
-
self._check_output_path(output_path=self._output_path)
|
|
721
|
-
source_path = os.path.join(self._output_path, job_id)
|
|
722
|
-
MinddataParser.execute(source_path, self._output_path, self._rank_id)
|
|
723
|
-
|
|
724
|
-
pipeline_parser = MinddataPipelineParser(self._output_path, self._rank_id, self._output_path)
|
|
725
|
-
logger.info("Profiling: analyzing the minddata pipeline operator and queue.")
|
|
726
|
-
pipeline_parser.parse()
|
|
727
|
-
|
|
728
|
-
timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id,
|
|
729
|
-
self._rank_size, context.get_context("mode"))
|
|
730
|
-
timeline_analyser.init_pynative_timeline()
|
|
731
|
-
timeline_analyser.write_timeline(self._timeline_size_limit_byte)
|
|
732
|
-
timeline_analyser.write_timeline_summary()
|
|
733
|
-
|
|
734
1011
|
def _ascend_analyse(self):
|
|
735
1012
|
"""Collect and analyse ascend performance data."""
|
|
736
1013
|
self._rank_size = 1
|
|
@@ -746,30 +1023,128 @@ class Profiler:
|
|
|
746
1023
|
self.stop()
|
|
747
1024
|
else:
|
|
748
1025
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
1026
|
+
# export op data before analyse
|
|
749
1027
|
self._ascend_graph_analyse()
|
|
750
1028
|
|
|
751
|
-
def
|
|
752
|
-
"""Analyse
|
|
1029
|
+
def _minddata_analyse(self, source_path):
|
|
1030
|
+
"""Analyse mindadata for ascend graph model."""
|
|
1031
|
+
if not self._data_process:
|
|
1032
|
+
return
|
|
1033
|
+
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1034
|
+
# Parsing minddata AICPU profiling
|
|
1035
|
+
if self._device_target == DeviceTarget.ASCEND.value:
|
|
1036
|
+
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
1037
|
+
MinddataParser.execute(source_path, self._output_path, store_id)
|
|
1038
|
+
|
|
1039
|
+
# parse minddata pipeline operator and queue
|
|
753
1040
|
try:
|
|
754
|
-
self.
|
|
755
|
-
except
|
|
756
|
-
logger.warning(
|
|
1041
|
+
MinddataPipelineParser(self._output_path, store_id, self._output_path).parse()
|
|
1042
|
+
except ProfilerException as err:
|
|
1043
|
+
logger.warning(err.message)
|
|
757
1044
|
finally:
|
|
758
1045
|
pass
|
|
759
1046
|
|
|
760
|
-
|
|
1047
|
+
# Analyze minddata information
|
|
1048
|
+
logger.info("Profiling: analyzing the minddata information.")
|
|
1049
|
+
try:
|
|
1050
|
+
MinddataProfilingAnalyzer(self._output_path, store_id, self._output_path).analyze()
|
|
1051
|
+
except ProfilerException as err:
|
|
1052
|
+
logger.warning(err.message)
|
|
1053
|
+
finally:
|
|
1054
|
+
pass
|
|
1055
|
+
|
|
1056
|
+
def _ascend_fpbp_analyse(self, op_summary, steptrace):
|
|
1057
|
+
"""
|
|
1058
|
+
Ascned graph model op analyse.
|
|
1059
|
+
|
|
1060
|
+
Returns:
|
|
1061
|
+
dict[obj]: points: the fp bp information
|
|
1062
|
+
"""
|
|
1063
|
+
points = None
|
|
1064
|
+
try:
|
|
1065
|
+
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1066
|
+
step_trace_point_info_path = os.path.join(self._output_path, f'step_trace_point_info_{dev_id}.json')
|
|
1067
|
+
|
|
1068
|
+
step_trace_point_info_path = validate_and_normalize_path(step_trace_point_info_path)
|
|
1069
|
+
|
|
1070
|
+
fpbp_analyse = AscendFPBPGenerator(op_summary, steptrace)
|
|
1071
|
+
points, _ = fpbp_analyse.parse()
|
|
1072
|
+
fpbp_analyse.write(step_trace_point_info_path)
|
|
1073
|
+
except ProfilerException as err:
|
|
1074
|
+
logger.warning(err.message)
|
|
1075
|
+
finally:
|
|
1076
|
+
pass
|
|
1077
|
+
return points
|
|
1078
|
+
|
|
1079
|
+
def _ascend_op_analyse(self, op_summary, op_statistic, dynamic_status):
|
|
1080
|
+
"""
|
|
1081
|
+
Ascend graph model hwts analyse.
|
|
1082
|
+
|
|
1083
|
+
Returns:
|
|
1084
|
+
list[obj]: The list is: framework_parser, aicpu_data_parser, optime_parser, op_task_dict
|
|
1085
|
+
"""
|
|
1086
|
+
try:
|
|
1087
|
+
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1088
|
+
|
|
1089
|
+
op_intermediate_detail_path = os.path.join(self._output_path,
|
|
1090
|
+
f'aicore_intermediate_{dev_id}_detail.csv')
|
|
1091
|
+
op_intermediate_type_path = os.path.join(self._output_path, f'aicore_intermediate_{dev_id}_type.csv')
|
|
1092
|
+
aicpu_intermediate_detail_path = os.path.join(self._output_path, f'aicpu_intermediate_{dev_id}.csv')
|
|
1093
|
+
framework_raw_path = os.path.join(self._output_path, f'framework_raw_{dev_id}.csv')
|
|
1094
|
+
|
|
1095
|
+
op_intermediate_detail_path = validate_and_normalize_path(op_intermediate_detail_path)
|
|
1096
|
+
op_intermediate_type_path = validate_and_normalize_path(op_intermediate_type_path)
|
|
1097
|
+
aicpu_intermediate_detail_path = validate_and_normalize_path(aicpu_intermediate_detail_path)
|
|
1098
|
+
framework_raw_path = validate_and_normalize_path(framework_raw_path)
|
|
1099
|
+
|
|
1100
|
+
if context.get_context("mode") == context.GRAPH_MODE:
|
|
1101
|
+
output_timeline_data_path = os.path.join(self._output_path, f'output_timeline_data_{dev_id}.txt')
|
|
1102
|
+
output_timeline_data_path = validate_and_normalize_path(output_timeline_data_path)
|
|
1103
|
+
else:
|
|
1104
|
+
output_timeline_data_path = None
|
|
1105
|
+
|
|
1106
|
+
op_analyser = AscendOPGenerator(op_summary, op_statistic, dynamic_status)
|
|
1107
|
+
op_analyser.parse()
|
|
1108
|
+
op_analyser.write(op_intermediate_detail_path, op_intermediate_type_path,
|
|
1109
|
+
aicpu_intermediate_detail_path, framework_raw_path, output_timeline_data_path)
|
|
1110
|
+
except ProfilerException as err:
|
|
1111
|
+
logger.warning(err.message)
|
|
1112
|
+
finally:
|
|
1113
|
+
pass
|
|
1114
|
+
|
|
1115
|
+
def _ascend_step_trace_analyse(self, steptrace):
|
|
761
1116
|
"""Analyse step trace info."""
|
|
762
|
-
points, is_training_mode_flag = None, False
|
|
763
1117
|
try:
|
|
764
|
-
if
|
|
765
|
-
|
|
1118
|
+
if not self._dynamic_status:
|
|
1119
|
+
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1120
|
+
step_trace_intermediate_path = os.path.join(self._output_path,
|
|
1121
|
+
f'step_trace_raw_{dev_id}_detail_time.csv')
|
|
1122
|
+
|
|
1123
|
+
step_trace_intermediate_path = validate_and_normalize_path(step_trace_intermediate_path)
|
|
1124
|
+
|
|
1125
|
+
steptrace_analyser = AscendStepTraceGenerator(steptrace)
|
|
1126
|
+
steptrace_analyser.parse()
|
|
1127
|
+
steptrace_analyser.write(step_trace_intermediate_path)
|
|
766
1128
|
except ProfilerException as err:
|
|
767
1129
|
logger.warning(err.message)
|
|
768
1130
|
finally:
|
|
769
1131
|
pass
|
|
770
|
-
return points, is_training_mode_flag
|
|
771
1132
|
|
|
772
|
-
def
|
|
1133
|
+
def _ascend_timeline_analyse(self, op_summary, steptrace):
|
|
1134
|
+
"""Analyse timeline info."""
|
|
1135
|
+
try:
|
|
1136
|
+
logger.info("Profiling: analyzing the timeline data")
|
|
1137
|
+
timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id, self._rank_size,
|
|
1138
|
+
context.get_context('mode'))
|
|
1139
|
+
timeline_analyser.init_timeline(op_summary, steptrace)
|
|
1140
|
+
timeline_analyser.write_timeline(self._timeline_size_limit_byte)
|
|
1141
|
+
timeline_analyser.write_timeline_summary()
|
|
1142
|
+
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
1143
|
+
logger.warning('Fail to write timeline data: %s', err)
|
|
1144
|
+
finally:
|
|
1145
|
+
pass
|
|
1146
|
+
|
|
1147
|
+
def _ascend_dynamic_net_analyse(self, op_summary):
|
|
773
1148
|
"""Analyse dynamic shape network info."""
|
|
774
1149
|
if self._profile_communication:
|
|
775
1150
|
logger.warning(
|
|
@@ -779,14 +1154,31 @@ class Profiler:
|
|
|
779
1154
|
logger.warning(
|
|
780
1155
|
"[Profiler]Dynamic Shape network does not support collecting step trace performance data currently.")
|
|
781
1156
|
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id)
|
|
782
|
-
dynamic_parser.write_dynamic_shape_data()
|
|
1157
|
+
dynamic_parser.write_dynamic_shape_data(op_summary)
|
|
1158
|
+
|
|
1159
|
+
def _ascend_flops_analyse(self, op_summary):
|
|
1160
|
+
"""Get op FLOPs from op_summary, write output_op_flops_x.csv."""
|
|
1161
|
+
if len(op_summary.dtype) != 18:
|
|
1162
|
+
logger.warning("[Profiler] Can not found cube fops and vector fops data in the op summary.")
|
|
1163
|
+
return
|
|
1164
|
+
|
|
1165
|
+
try:
|
|
1166
|
+
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1167
|
+
|
|
1168
|
+
flops_path = os.path.join(self._output_path, f'flops_{dev_id}.txt')
|
|
1169
|
+
flops_summary_path = os.path.join(self._output_path, f'flops_summary_{dev_id}.json')
|
|
1170
|
+
|
|
1171
|
+
flops_path = validate_and_normalize_path(flops_path)
|
|
1172
|
+
flops_summary_path = validate_and_normalize_path(flops_summary_path)
|
|
783
1173
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
1174
|
+
flops_analyser = AscendFlopsGenerator(op_summary)
|
|
1175
|
+
flops_analyser.parse()
|
|
1176
|
+
flops_analyser.write(flops_path, flops_summary_path)
|
|
1177
|
+
|
|
1178
|
+
except ProfilerException as err:
|
|
1179
|
+
logger.warning(err.message)
|
|
1180
|
+
finally:
|
|
1181
|
+
pass
|
|
790
1182
|
|
|
791
1183
|
def _ascend_graph_memory_analyse(self, points):
|
|
792
1184
|
"""Analyse memory usage info."""
|
|
@@ -803,16 +1195,25 @@ class Profiler:
|
|
|
803
1195
|
finally:
|
|
804
1196
|
pass
|
|
805
1197
|
|
|
806
|
-
def _ascend_graph_hccl_analyse(self):
|
|
1198
|
+
def _ascend_graph_hccl_analyse(self, source_path):
|
|
807
1199
|
"""Analyse hccl profiler info."""
|
|
808
1200
|
if not self._profile_communication:
|
|
809
1201
|
return
|
|
810
1202
|
if self._profile_communication and context.get_context("mode") == context.PYNATIVE_MODE:
|
|
811
1203
|
logger.warning("[Profiler]The parameter profile_communication is not supported on Ascend "
|
|
812
1204
|
"PyNative mode currently.")
|
|
1205
|
+
return
|
|
813
1206
|
try:
|
|
814
1207
|
logger.info("Profiling: analyzing the hccl profiler info.")
|
|
815
|
-
self.
|
|
1208
|
+
dev_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
1209
|
+
|
|
1210
|
+
hccl_raw_path = os.path.join(self._output_path, f'hccl_raw_{dev_id}.csv')
|
|
1211
|
+
hccl_raw_path = validate_and_normalize_path(hccl_raw_path)
|
|
1212
|
+
|
|
1213
|
+
hccl_analyse = AscendHCCLGenerator(os.path.join(source_path, 'timeline'))
|
|
1214
|
+
hccl_analyse.parse()
|
|
1215
|
+
hccl_analyse.write(hccl_raw_path)
|
|
1216
|
+
|
|
816
1217
|
except (ProfilerIOException, ProfilerFileNotFoundException, ProfilerRawFileException) as err:
|
|
817
1218
|
logger.warning(err.message)
|
|
818
1219
|
finally:
|
|
@@ -838,106 +1239,34 @@ class Profiler:
|
|
|
838
1239
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
839
1240
|
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
840
1241
|
|
|
841
|
-
def _ascend_graph_op_analyse(self, source_path):
|
|
842
|
-
"""
|
|
843
|
-
Ascend graph model hwts analyse.
|
|
844
|
-
|
|
845
|
-
Returns:
|
|
846
|
-
list[obj]: The list is: framework_parser, aicpu_data_parser, optime_parser, op_task_dict
|
|
847
|
-
"""
|
|
848
|
-
# parse hwts.log.data.45.dev file, and get task profiling data
|
|
849
|
-
hwts_output_filename = self._hwts_output_filename_target + self._rank_id + ".txt"
|
|
850
|
-
hwts_output_filename = os.path.join(self._output_path, hwts_output_filename)
|
|
851
|
-
source_path = validate_and_normalize_path(source_path)
|
|
852
|
-
hwts_output_filename = validate_and_normalize_path(hwts_output_filename)
|
|
853
|
-
hwtslog_parser = HWTSLogParser(source_path, hwts_output_filename, self._dynamic_status)
|
|
854
|
-
logger.info("Profiling: analyzing hwts data.")
|
|
855
|
-
hwtslog_parser.execute()
|
|
856
|
-
|
|
857
|
-
# parse Framework file, and get the relation of op and tasks
|
|
858
|
-
framework_parser = FrameworkParser(source_path, self._rank_id, self._output_path)
|
|
859
|
-
logger.info("Profiling: analyzing framework data.")
|
|
860
|
-
framework_parser.parse()
|
|
861
|
-
op_task_dict = framework_parser.to_task_id_full_op_name_dict()
|
|
862
|
-
if not op_task_dict:
|
|
863
|
-
raise RuntimeError('Profiling: fail to parse framework files.')
|
|
864
|
-
|
|
865
|
-
# get op compute time from hwts data and framework data, write output_op_compute_time.txt
|
|
866
|
-
opcompute_output_filename = self._opcompute_output_filename_target + self._rank_id + ".txt"
|
|
867
|
-
opcompute_output_filename = os.path.join(self._output_path, opcompute_output_filename)
|
|
868
|
-
opcompute_output_filename = validate_and_normalize_path(opcompute_output_filename)
|
|
869
|
-
optime_parser = OPComputeTimeParser(
|
|
870
|
-
hwts_output_filename, opcompute_output_filename,
|
|
871
|
-
op_task_dict, self._output_path, self._rank_id
|
|
872
|
-
)
|
|
873
|
-
logger.info("Profiling: analyzing the operation compute time.")
|
|
874
|
-
optime_parser.execute()
|
|
875
|
-
|
|
876
|
-
# parse DATA_PREPROCESS.dev.AICPU file, write output_data_preprocess_aicpu_x.txt
|
|
877
|
-
output_data_preprocess_aicpu = self._aicpu_op_output_filename_target + self._rank_id + ".txt"
|
|
878
|
-
output_data_preprocess_aicpu = os.path.join(self._output_path, output_data_preprocess_aicpu)
|
|
879
|
-
output_data_preprocess_aicpu = validate_and_normalize_path(output_data_preprocess_aicpu)
|
|
880
|
-
aicpu_data_parser = DataPreProcessParser(source_path, output_data_preprocess_aicpu, op_task_dict)
|
|
881
|
-
logger.info("Profiling: analyzing the data preprocess data.")
|
|
882
|
-
aicpu_data_parser.execute()
|
|
883
|
-
|
|
884
|
-
# analyse op compute time info
|
|
885
|
-
try:
|
|
886
|
-
self._analyser_op_info()
|
|
887
|
-
except ProfilerException as err:
|
|
888
|
-
logger.warning(err.message)
|
|
889
|
-
finally:
|
|
890
|
-
pass
|
|
891
|
-
return [framework_parser, aicpu_data_parser, optime_parser, op_task_dict]
|
|
892
|
-
|
|
893
|
-
def _minddata_analyse(self, source_path):
|
|
894
|
-
"""Analyse mindadata for ascend graph model."""
|
|
895
|
-
if not self._data_process:
|
|
896
|
-
return
|
|
897
|
-
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
898
|
-
# Parsing minddata AICPU profiling
|
|
899
|
-
if self._device_target == DeviceTarget.ASCEND.value:
|
|
900
|
-
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
901
|
-
MinddataParser.execute(source_path, self._output_path, store_id)
|
|
902
|
-
|
|
903
|
-
# parse minddata pipeline operator and queue
|
|
904
|
-
try:
|
|
905
|
-
MinddataPipelineParser(self._output_path, store_id, self._output_path).parse()
|
|
906
|
-
except ProfilerException as err:
|
|
907
|
-
logger.warning(err.message)
|
|
908
|
-
finally:
|
|
909
|
-
pass
|
|
910
|
-
|
|
911
|
-
# Analyze minddata information
|
|
912
|
-
logger.info("Profiling: analyzing the minddata information.")
|
|
913
|
-
try:
|
|
914
|
-
MinddataProfilingAnalyzer(self._output_path, store_id, self._output_path).analyze()
|
|
915
|
-
except ProfilerException as err:
|
|
916
|
-
logger.warning(err.message)
|
|
917
|
-
finally:
|
|
918
|
-
pass
|
|
919
|
-
|
|
920
1242
|
def _ascend_graph_analyse(self):
|
|
921
1243
|
"""Ascend graph mode analyse."""
|
|
922
1244
|
self._ascend_profiler.finalize()
|
|
923
1245
|
|
|
924
1246
|
job_id = self._get_profiling_job_id()
|
|
1247
|
+
if not job_id:
|
|
1248
|
+
return
|
|
925
1249
|
logger.info("Profiling: job id is %s ", job_id)
|
|
926
1250
|
|
|
927
1251
|
self._check_output_path(output_path=self._output_path)
|
|
928
1252
|
source_path = os.path.join(self._output_path, job_id)
|
|
929
1253
|
self._minddata_analyse(source_path)
|
|
930
1254
|
if self._op_time:
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
self._ascend_timeline_analyse(
|
|
1255
|
+
_ascend_graph_msprof_generator(source_path, self._model_iteration_dict)
|
|
1256
|
+
op_summary, op_statistic, steptrace = _ascend_graph_msprof_analyse(source_path)
|
|
1257
|
+
self._ascend_op_analyse(op_summary, op_statistic, self._dynamic_status)
|
|
1258
|
+
self._ascend_timeline_analyse(op_summary, steptrace)
|
|
1259
|
+
graph_ids = np.unique(op_summary['Model ID']).tolist()
|
|
1260
|
+
points = self._ascend_fpbp_analyse(op_summary, steptrace)
|
|
1261
|
+
if len(graph_ids) == 1:
|
|
1262
|
+
self._ascend_step_trace_analyse(steptrace)
|
|
935
1263
|
if self._dynamic_status:
|
|
936
|
-
self._ascend_dynamic_net_analyse()
|
|
937
|
-
self._ascend_flops_analyse(
|
|
1264
|
+
self._ascend_dynamic_net_analyse(op_summary)
|
|
1265
|
+
self._ascend_flops_analyse(op_summary)
|
|
938
1266
|
self._ascend_graph_memory_analyse(points)
|
|
939
|
-
self._ascend_graph_hccl_analyse()
|
|
1267
|
+
self._ascend_graph_hccl_analyse(source_path)
|
|
940
1268
|
self._ascend_graph_msadvisor_analyse(job_id)
|
|
1269
|
+
ProfilerInfo.set_graph_ids(graph_ids)
|
|
941
1270
|
|
|
942
1271
|
def _ascend_graph_start(self):
|
|
943
1272
|
"""Ascend graph mode start profiling."""
|
|
@@ -973,7 +1302,7 @@ class Profiler:
|
|
|
973
1302
|
"""Whether iteration related information needs to be parsed."""
|
|
974
1303
|
profiler_info = ProfilerInfo.get_profiler_info()
|
|
975
1304
|
graph_ids = profiler_info.get("graph_ids")
|
|
976
|
-
if len(graph_ids) > 1:
|
|
1305
|
+
if graph_ids and len(graph_ids) > 1:
|
|
977
1306
|
analyse_step_trace = False
|
|
978
1307
|
logger.warning(
|
|
979
1308
|
"[Profiler]Current model has multiple sub graphs, the segmentation of steps may be inaccurate.")
|
|
@@ -997,13 +1326,12 @@ class Profiler:
|
|
|
997
1326
|
parser = GpuFrameWorkParser(self._output_path, self._dev_id)
|
|
998
1327
|
graph_ids = parser.get_graph_ids()
|
|
999
1328
|
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
parser.analyse_dynamic_shape_data(self._timeline_meta)
|
|
1329
|
+
self._analyse_step_trace(
|
|
1330
|
+
is_training_mode_flag=timeline_generator.check_op_name('Gradients'),
|
|
1331
|
+
is_gpu_kernel_async_launch_flag=timeline_generator.is_gpu_kernel_async_launch()
|
|
1332
|
+
)
|
|
1333
|
+
if self._dynamic_status:
|
|
1334
|
+
parser.analyse_dynamic_shape_data(self._timeline_meta)
|
|
1007
1335
|
|
|
1008
1336
|
def _get_step_reduce_op_type(self):
|
|
1009
1337
|
"""Gets all communication operator names."""
|
|
@@ -1021,7 +1349,8 @@ class Profiler:
|
|
|
1021
1349
|
|
|
1022
1350
|
def _cpu_analyse(self):
|
|
1023
1351
|
"""Collect and analyse cpu performance data."""
|
|
1024
|
-
|
|
1352
|
+
if not self._op_time:
|
|
1353
|
+
return
|
|
1025
1354
|
try:
|
|
1026
1355
|
timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
|
|
1027
1356
|
timeline_generator.init_timeline()
|
|
@@ -1031,7 +1360,8 @@ class Profiler:
|
|
|
1031
1360
|
logger.warning('Fail to write timeline data: %s', err)
|
|
1032
1361
|
raise RuntimeError('Fail to write timeline data.') from err
|
|
1033
1362
|
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
1034
|
-
raise RuntimeError("
|
|
1363
|
+
raise RuntimeError("Currently, the CPU platform does not support Pynative mode to collect performance "
|
|
1364
|
+
"data.")
|
|
1035
1365
|
|
|
1036
1366
|
def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
|
|
1037
1367
|
is_gpu_kernel_async_launch_flag=False):
|
|
@@ -1058,29 +1388,37 @@ class Profiler:
|
|
|
1058
1388
|
point_info_file_path = validate_and_normalize_path(point_info_file_path)
|
|
1059
1389
|
|
|
1060
1390
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1391
|
+
if context.get_context("mode") != context.PYNATIVE_MODE:
|
|
1392
|
+
input_file_path = os.path.join(self._output_path, f'step_trace_profiling_{self._dev_id}.txt')
|
|
1393
|
+
input_file_path = validate_and_normalize_path(input_file_path)
|
|
1394
|
+
parser = GpuStepTraceParser(input_dir=input_file_path,
|
|
1395
|
+
output_file_path=step_trace_intermediate_file_path,
|
|
1396
|
+
is_training_mode=is_training_mode_flag,
|
|
1397
|
+
is_gpu_kernel_async_launch=is_gpu_kernel_async_launch_flag)
|
|
1398
|
+
parser.parse_and_save()
|
|
1399
|
+
point_info = parser.record_point_info(point_info_file_path)
|
|
1400
|
+
# print parser result
|
|
1401
|
+
parser.show()
|
|
1402
|
+
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
1403
|
+
logger.info("The point info is: %s", point_info)
|
|
1404
|
+
|
|
1405
|
+
return point_info, is_training_mode_flag
|
|
1406
|
+
return {}, is_training_mode_flag
|
|
1407
|
+
|
|
1408
|
+
# whether keep the first step
|
|
1409
|
+
skip_first_step_flag = framework_parser.check_op_name(INIT_OP_NAME)
|
|
1410
|
+
# recognize inference or training mode
|
|
1411
|
+
is_training_mode_flag = framework_parser.check_op_name("Gradients")
|
|
1412
|
+
# parser the step trace files and save the result to disk
|
|
1413
|
+
source_path = validate_and_normalize_path(source_path)
|
|
1414
|
+
parser = AscendStepTraceParser(input_dir=source_path,
|
|
1415
|
+
output_file_path=step_trace_intermediate_file_path,
|
|
1416
|
+
skip_first_step=skip_first_step_flag,
|
|
1417
|
+
is_training_mode=is_training_mode_flag)
|
|
1418
|
+
parser.set_task_id_op_name_dict(framework_parser.to_task_id_full_op_name_dict())
|
|
1419
|
+
parser.parse_and_save()
|
|
1420
|
+
point_info = parser.record_point_info(point_info_file_path)
|
|
1421
|
+
|
|
1084
1422
|
# print parser result
|
|
1085
1423
|
parser.show()
|
|
1086
1424
|
logger.info("Finish saving the intermediate result: %s", step_trace_intermediate_file_path)
|
|
@@ -1088,45 +1426,6 @@ class Profiler:
|
|
|
1088
1426
|
|
|
1089
1427
|
return point_info, is_training_mode_flag
|
|
1090
1428
|
|
|
1091
|
-
def _analyse_timeline(self, aicpu_parser, optime_parser, source_path):
|
|
1092
|
-
"""
|
|
1093
|
-
Analyse and parse timeline info.
|
|
1094
|
-
|
|
1095
|
-
Args:
|
|
1096
|
-
aicpu_parser (DataPreProcessParser): The parser instance for AI CPU operator
|
|
1097
|
-
execution time calculation.
|
|
1098
|
-
optime_parser (OPComputeTimeParserParser): The parser instance for AI Core
|
|
1099
|
-
operator execution time calculation.
|
|
1100
|
-
"""
|
|
1101
|
-
logger.info("Profiling: analyzing the timeline data.")
|
|
1102
|
-
timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id,
|
|
1103
|
-
self._rank_size, context.get_context("mode"))
|
|
1104
|
-
# Get framework info
|
|
1105
|
-
integrator = Integrator(self._output_path, self._rank_id)
|
|
1106
|
-
aicore_detail_data = integrator.get_aicore_detail_data()
|
|
1107
|
-
aicore_detail_data_size = len(aicore_detail_data)
|
|
1108
|
-
col_names = ['op_name', 'op_type', 'avg_execution_time', 'subgraph',
|
|
1109
|
-
'full_op_name', 'op_info']
|
|
1110
|
-
framework_info = {
|
|
1111
|
-
'col_name': col_names,
|
|
1112
|
-
'object': aicore_detail_data,
|
|
1113
|
-
'size': aicore_detail_data_size
|
|
1114
|
-
}
|
|
1115
|
-
|
|
1116
|
-
all_reduce_info = integrator.query_for_all_reduce()
|
|
1117
|
-
|
|
1118
|
-
# Get timeline info
|
|
1119
|
-
logger.info('Start writing timeline info...')
|
|
1120
|
-
logger.info('Warm Prompt: It could take a few minutes if you are training '
|
|
1121
|
-
'with a complex network or more than 10 steps.')
|
|
1122
|
-
# Add info into timeline, such as AI CPU, AllReduce, framework info.
|
|
1123
|
-
aicpu_info = aicpu_parser.query_aicpu_data()
|
|
1124
|
-
min_cycle_counter = min(aicpu_parser.min_cycle_counter, optime_parser.min_cycle_counter)
|
|
1125
|
-
timeline_analyser.init_timeline(all_reduce_info, framework_info, aicpu_info,
|
|
1126
|
-
min_cycle_counter, source_path)
|
|
1127
|
-
timeline_analyser.write_timeline(self._timeline_size_limit_byte)
|
|
1128
|
-
timeline_analyser.write_timeline_summary()
|
|
1129
|
-
|
|
1130
1429
|
def _generate_timeline(self, reduce_op_type):
|
|
1131
1430
|
"""Used for gpu, generate timeline info, write to json format file."""
|
|
1132
1431
|
try:
|
|
@@ -1164,11 +1463,10 @@ class Profiler:
|
|
|
1164
1463
|
return job_id
|
|
1165
1464
|
|
|
1166
1465
|
job_id = ""
|
|
1167
|
-
job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
reverse=True)
|
|
1466
|
+
job_dirs = filter(lambda item: item.startswith('JOB') or item.startswith('PROF') and os.path.isdir(
|
|
1467
|
+
os.path.join(self._output_path, item)), os.listdir(self._output_path))
|
|
1468
|
+
sorted_job_dirs = sorted(
|
|
1469
|
+
job_dirs, key=lambda x: os.path.getmtime(os.path.join(self._output_path, x)), reverse=True)
|
|
1172
1470
|
|
|
1173
1471
|
for dir_name in sorted_job_dirs:
|
|
1174
1472
|
if dir_name.startswith('PROF'):
|
|
@@ -1185,22 +1483,21 @@ class Profiler:
|
|
|
1185
1483
|
"profiler will ignore this job dir.", job_dir)
|
|
1186
1484
|
continue
|
|
1187
1485
|
|
|
1188
|
-
|
|
1486
|
+
info_file_path = get_file_path(job_dir, "info.json")
|
|
1487
|
+
if info_file_path is None:
|
|
1488
|
+
logger.warning("Find profiling job path %s, but info.json not exist, "
|
|
1489
|
+
"profiler will ignore this job dir.", job_dir)
|
|
1490
|
+
continue
|
|
1491
|
+
|
|
1492
|
+
_, training_device_id = self._parse_info_json(info_file_path)
|
|
1493
|
+
job_start_time = self._parse_start_log(start_file_path)
|
|
1494
|
+
|
|
1189
1495
|
if self._dev_id != training_device_id:
|
|
1190
1496
|
logger.debug("Find profiling find job path %s, but not current training device id. "
|
|
1191
1497
|
"Current training device id %s, but job path device id: %s, "
|
|
1192
1498
|
"profiler will ignore this job dir.", job_dir, self._dev_id, training_device_id)
|
|
1193
1499
|
continue
|
|
1194
1500
|
|
|
1195
|
-
if not os.listdir(os.path.join(job_dir, 'data')):
|
|
1196
|
-
continue
|
|
1197
|
-
|
|
1198
|
-
job_start_time = self._parse_start_log(start_file_path)
|
|
1199
|
-
if not job_start_time:
|
|
1200
|
-
logger.warning("Find profiling job path %s, but fail to get job start info, "
|
|
1201
|
-
"profiler will ignore this job dir.", job_start_time)
|
|
1202
|
-
continue
|
|
1203
|
-
|
|
1204
1501
|
if int(job_start_time) < self._start_time:
|
|
1205
1502
|
logger.warning("Find profiling job path %s, but start_time(%d) is earlier than this training "
|
|
1206
1503
|
"start_time(%d), profiler will ignore this job dir.",
|
|
@@ -1218,38 +1515,10 @@ class Profiler:
|
|
|
1218
1515
|
"please check whether job dir or prof dir(name startswith JOB or PROF) in output path " \
|
|
1219
1516
|
"was generated, or may be the device id from job dir dismatch the " \
|
|
1220
1517
|
"device_id in current process.".format(self._output_path)
|
|
1221
|
-
|
|
1518
|
+
logger.warning(msg)
|
|
1222
1519
|
|
|
1223
1520
|
return job_id
|
|
1224
1521
|
|
|
1225
|
-
def _analyser_op_info(self):
|
|
1226
|
-
"""Analyse the operator information."""
|
|
1227
|
-
logger.info("Profiling: analyzing the operation compute time.")
|
|
1228
|
-
integrator = Integrator(self._output_path, self._rank_id)
|
|
1229
|
-
integrator.integrate()
|
|
1230
|
-
|
|
1231
|
-
aicore_type_result = self._query_op_type_info()
|
|
1232
|
-
detail_file_path = os.path.join(
|
|
1233
|
-
self._output_path,
|
|
1234
|
-
'output_op_compute_time_detail_{}.txt'.format(self._rank_id)
|
|
1235
|
-
)
|
|
1236
|
-
fwrite_format(detail_file_path, data_source='title:op compute time')
|
|
1237
|
-
display_names = [
|
|
1238
|
-
'optype_name', 'compute_time(ms, per-step)',
|
|
1239
|
-
'called_times(per-step)', 'percent'
|
|
1240
|
-
]
|
|
1241
|
-
fwrite_format(detail_file_path, data_source=" ".join(display_names), is_print=True)
|
|
1242
|
-
fwrite_format(detail_file_path, data_source=aicore_type_result, is_print=True)
|
|
1243
|
-
|
|
1244
|
-
op_type_order = [item[0] for item in aicore_type_result]
|
|
1245
|
-
aicore_detail_result = self._query_op_detail_info(op_type_order)
|
|
1246
|
-
|
|
1247
|
-
fwrite_format(detail_file_path, data_source='', is_print=True)
|
|
1248
|
-
fwrite_format(detail_file_path, data_source='Detail:', is_print=True)
|
|
1249
|
-
fwrite_format(detail_file_path, data_source=" ".join(aicore_detail_result.get('col_name_detail')),
|
|
1250
|
-
is_print=True)
|
|
1251
|
-
fwrite_format(detail_file_path, data_source=aicore_detail_result.get('object'), is_print=True)
|
|
1252
|
-
|
|
1253
1522
|
def _query_op_type_info(self):
|
|
1254
1523
|
"""
|
|
1255
1524
|
Query AICORE operator type information.
|
|
@@ -1295,7 +1564,8 @@ class Profiler:
|
|
|
1295
1564
|
logger.error("Profiling: fail to get context, %s", err)
|
|
1296
1565
|
|
|
1297
1566
|
if not dev_id or not dev_id.isdigit():
|
|
1298
|
-
dev_id =
|
|
1567
|
+
dev_id = str(get_local_rank()) if GlobalComm.INITED and device_target == DeviceTarget.ASCEND.value \
|
|
1568
|
+
else os.getenv('DEVICE_ID')
|
|
1299
1569
|
if not dev_id or not dev_id.isdigit():
|
|
1300
1570
|
dev_id = "0"
|
|
1301
1571
|
logger.warning("Fail to get DEVICE_ID, use 0 instead.")
|
|
@@ -1305,7 +1575,8 @@ class Profiler:
|
|
|
1305
1575
|
msg = "Profiling: unsupported backend: %s" % device_target
|
|
1306
1576
|
raise RuntimeError(msg)
|
|
1307
1577
|
|
|
1308
|
-
rank_id =
|
|
1578
|
+
rank_id = str(get_rank()) if GlobalComm.INITED and device_target == DeviceTarget.ASCEND.value \
|
|
1579
|
+
else os.getenv("RANK_ID")
|
|
1309
1580
|
if not rank_id or not rank_id.isdigit():
|
|
1310
1581
|
rank_id = "0"
|
|
1311
1582
|
logger.warning(f"For '{self.__class__.__name__}', fail to get RANK_ID from environment, "
|
|
@@ -1313,7 +1584,10 @@ class Profiler:
|
|
|
1313
1584
|
|
|
1314
1585
|
self._dev_id = dev_id
|
|
1315
1586
|
self._device_target = device_target.lower()
|
|
1316
|
-
|
|
1587
|
+
if device_target == DeviceTarget.GPU.value:
|
|
1588
|
+
self._rank_id = dev_id
|
|
1589
|
+
else:
|
|
1590
|
+
self._rank_id = rank_id
|
|
1317
1591
|
|
|
1318
1592
|
def _get_output_path(self, kwargs):
|
|
1319
1593
|
"""Get output path of profiling data."""
|
|
@@ -1377,28 +1651,55 @@ class Profiler:
|
|
|
1377
1651
|
"[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
|
|
1378
1652
|
timeline_limit = 500
|
|
1379
1653
|
self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
|
|
1654
|
+
self._profile_framework = kwargs.pop("profile_framework", "all")
|
|
1655
|
+
if self._profile_framework not in ["memory", "time", "all", None]:
|
|
1656
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_framework must be one of ['memory',"
|
|
1657
|
+
f" 'time', 'all', None], but got {self._profile_framework}, it will be set to 'all'.")
|
|
1658
|
+
self._profile_framework = "all"
|
|
1380
1659
|
|
|
1381
|
-
def
|
|
1382
|
-
"""
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
logger.info("
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1660
|
+
def _host_info_analyse(self):
|
|
1661
|
+
"""
|
|
1662
|
+
Read data from the csv file, and write it into timeline file, so the timeline can be show on tracing tool.
|
|
1663
|
+
"""
|
|
1664
|
+
logger.info("Profiling HostInfo start.")
|
|
1665
|
+
host_dir = os.path.join(self._output_path, 'host_info')
|
|
1666
|
+
host_dir = validate_and_normalize_path(host_dir)
|
|
1667
|
+
if not os.path.exists(host_dir):
|
|
1668
|
+
logger.error("Host info directory: %s not exist.", host_dir)
|
|
1669
|
+
return
|
|
1670
|
+
csv_file_name = 'host_info_' + str(self._rank_id) + '.csv'
|
|
1671
|
+
json_file_name = 'timeline_' + str(self._rank_id) + '.json'
|
|
1672
|
+
memory_file_name = 'host_memory_' + str(self._rank_id) + '.csv'
|
|
1673
|
+
dataset_file_name = 'dataset_' + str(self._rank_id) + '.csv'
|
|
1674
|
+
host_info_file = os.path.join(self._output_path, 'host_info', csv_file_name)
|
|
1675
|
+
timeline_file = os.path.join(self._output_path, 'host_info', json_file_name)
|
|
1676
|
+
memory_file = os.path.join(self._output_path, 'host_info', memory_file_name)
|
|
1677
|
+
dataset_execution_file = os.path.join(self._output_path, 'host_info', dataset_file_name)
|
|
1678
|
+
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
1679
|
+
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
1680
|
+
logger.info("Profile HostInfo finished.")
|
|
1681
|
+
|
|
1682
|
+
|
|
1683
|
+
def _offline_parse(offline_path):
|
|
1684
|
+
"""Parse data in abnormal scenario, only support for host_info at present."""
|
|
1685
|
+
logger.info("Profiling HostInfo offline start.")
|
|
1686
|
+
host_dir = os.path.join(offline_path, 'profiler', 'host_info')
|
|
1687
|
+
host_dir = validate_and_normalize_path(host_dir)
|
|
1688
|
+
if not os.path.exists(host_dir):
|
|
1689
|
+
logger.error("Host info directory: %s not exist.", host_dir)
|
|
1690
|
+
return
|
|
1691
|
+
files = os.listdir(host_dir)
|
|
1692
|
+
for file in files:
|
|
1693
|
+
if not file.startswith("host_info_") or not file.endswith(".csv"):
|
|
1694
|
+
continue
|
|
1695
|
+
rank_id = file.split('_')[-1].split('.')[0]
|
|
1696
|
+
if not rank_id.isdigit():
|
|
1697
|
+
logger.info("Cannot get rank_id from file: %s, skip it", file)
|
|
1698
|
+
return
|
|
1699
|
+
host_info_file = os.path.join(host_dir, file)
|
|
1700
|
+
timeline_file = os.path.join(host_dir, f'timeline_{rank_id}.json')
|
|
1701
|
+
memory_file = os.path.join(host_dir, f'host_memory_{rank_id}.csv')
|
|
1702
|
+
dataset_execution_file = os.path.join(host_dir, f'dataset_{rank_id}.csv')
|
|
1703
|
+
_parse_host_info(host_info_file, timeline_file, memory_file)
|
|
1704
|
+
_calculate_dataset_execution_time(host_info_file, dataset_execution_file)
|
|
1705
|
+
logger.info("Profile HostInfo offline finished.")
|