mindspore 1.10.0__cp37-none-any.whl → 2.0.0rc1__cp37-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +9064 -0
- mindspore/__init__.py +9 -4
- mindspore/_akg/akg/composite/build_module.py +11 -0
- mindspore/_akg/akg/config/repository_cuda.json +11 -0
- mindspore/_akg/akg/tvm/contrib/nvcc.py +4 -3
- mindspore/_c_dataengine.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +102 -0
- mindspore/_checkparam.py +1066 -1001
- mindspore/_extends/builtin_operations.py +32 -4
- mindspore/_extends/graph_kernel/model/graph_split.py +66 -222
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +12 -9
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +119 -26
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +50 -50
- mindspore/_extends/parallel_compile/akg_compiler/util.py +9 -6
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +4 -25
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +9 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -27
- mindspore/_extends/parse/__init__.py +5 -3
- mindspore/_extends/parse/namespace.py +17 -2
- mindspore/_extends/parse/parser.py +193 -34
- mindspore/_extends/parse/resources.py +7 -8
- mindspore/_extends/parse/standard_method.py +1780 -435
- mindspore/_extends/parse/trope.py +3 -1
- mindspore/_mindspore_offline_debug.cpython-37m-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +53 -58
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/adasum.py +3 -2
- mindspore/boost/boost.py +2 -2
- mindspore/boost/boost_cell_wrapper.py +46 -26
- mindspore/boost/dim_reduce.py +6 -5
- mindspore/boost/grad_accumulation.py +2 -1
- mindspore/boost/group_loss_scale_manager.py +1 -1
- mindspore/common/__init__.py +11 -10
- mindspore/common/_decorator.py +2 -0
- mindspore/common/_register_for_adapter.py +55 -0
- mindspore/common/_stub_tensor.py +201 -0
- mindspore/common/_utils.py +57 -0
- mindspore/common/api.py +582 -297
- mindspore/common/dtype.py +66 -18
- mindspore/common/dump.py +2 -2
- mindspore/common/initializer.py +38 -1
- mindspore/common/jit_config.py +25 -13
- mindspore/common/mutable.py +53 -24
- mindspore/common/parameter.py +60 -37
- mindspore/common/seed.py +8 -24
- mindspore/common/sparse_tensor.py +927 -0
- mindspore/common/tensor.py +1627 -3900
- mindspore/communication/__init__.py +10 -5
- mindspore/communication/_comm_helper.py +78 -214
- mindspore/communication/_hccl_management.py +2 -1
- mindspore/communication/management.py +136 -47
- mindspore/config/op_info.config +501 -1008
- mindspore/config/super_bar_config.json +512 -0
- mindspore/context.py +291 -56
- mindspore/dataset/__init__.py +12 -8
- mindspore/dataset/audio/__init__.py +9 -9
- mindspore/dataset/audio/transforms.py +1090 -228
- mindspore/dataset/audio/utils.py +87 -39
- mindspore/dataset/audio/validators.py +223 -1
- mindspore/dataset/callback/ds_callback.py +17 -15
- mindspore/dataset/core/config.py +246 -17
- mindspore/dataset/core/py_util_helpers.py +4 -3
- mindspore/dataset/core/validator_helpers.py +10 -10
- mindspore/{parallel/nn/layers.py → dataset/debug/__init__.py} +7 -8
- mindspore/dataset/debug/debug_hook.py +65 -0
- mindspore/dataset/debug/pre_defined_hook.py +67 -0
- mindspore/dataset/engine/__init__.py +7 -3
- mindspore/dataset/engine/cache_client.py +9 -9
- mindspore/dataset/engine/datasets.py +648 -477
- mindspore/dataset/engine/datasets_audio.py +165 -167
- mindspore/dataset/engine/datasets_standard_format.py +93 -67
- mindspore/dataset/engine/datasets_text.py +492 -342
- mindspore/dataset/engine/datasets_user_defined.py +85 -50
- mindspore/dataset/engine/datasets_vision.py +1224 -699
- mindspore/dataset/engine/graphdata.py +134 -69
- mindspore/dataset/engine/iterators.py +50 -9
- mindspore/dataset/engine/offload.py +52 -31
- mindspore/dataset/engine/samplers.py +27 -24
- mindspore/dataset/engine/serializer_deserializer.py +14 -15
- mindspore/dataset/engine/validators.py +213 -52
- mindspore/dataset/text/__init__.py +10 -8
- mindspore/dataset/text/transforms.py +152 -57
- mindspore/dataset/text/utils.py +98 -49
- mindspore/dataset/text/validators.py +25 -0
- mindspore/dataset/transforms/__init__.py +4 -2
- mindspore/dataset/transforms/c_transforms.py +11 -13
- mindspore/dataset/transforms/py_transforms.py +2 -2
- mindspore/dataset/transforms/py_transforms_util.py +10 -0
- mindspore/dataset/transforms/transforms.py +13 -15
- mindspore/dataset/transforms/validators.py +7 -7
- mindspore/dataset/utils/__init__.py +2 -1
- mindspore/dataset/utils/browse_dataset.py +13 -13
- mindspore/dataset/utils/line_reader.py +121 -0
- mindspore/dataset/vision/__init__.py +8 -7
- mindspore/dataset/vision/c_transforms.py +125 -126
- mindspore/dataset/vision/py_transforms.py +37 -37
- mindspore/dataset/vision/py_transforms_util.py +23 -20
- mindspore/dataset/vision/transforms.py +316 -315
- mindspore/dataset/vision/utils.py +313 -17
- mindspore/dataset/vision/validators.py +6 -6
- mindspore/default_config.py +0 -1
- mindspore/{compression → experimental}/__init__.py +6 -5
- mindspore/experimental/map_parameter.py +275 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/callback/callback.h +9 -13
- mindspore/include/api/callback/ckpt_saver.h +2 -2
- mindspore/include/api/callback/loss_monitor.h +2 -2
- mindspore/include/api/callback/lr_scheduler.h +5 -5
- mindspore/include/api/callback/time_monitor.h +2 -2
- mindspore/include/api/callback/train_accuracy.h +4 -6
- mindspore/include/api/cfg.h +19 -6
- mindspore/include/api/context.h +70 -9
- mindspore/include/api/delegate.h +8 -1
- mindspore/include/api/dual_abi_helper.h +8 -24
- mindspore/include/api/metrics/accuracy.h +2 -2
- mindspore/include/api/metrics/metrics.h +4 -3
- mindspore/include/api/model.h +9 -4
- mindspore/include/api/model_group.h +68 -0
- mindspore/include/api/model_parallel_runner.h +17 -17
- mindspore/include/api/net.h +12 -11
- mindspore/include/api/serialization.h +20 -4
- mindspore/include/api/status.h +7 -1
- mindspore/include/api/types.h +25 -21
- mindspore/include/api/visible.h +4 -0
- mindspore/include/c_api/model_c.h +5 -0
- mindspore/include/c_api/status_c.h +1 -1
- mindspore/include/dataset/config.h +1 -1
- mindspore/include/dataset/constants.h +14 -0
- mindspore/include/dataset/text.h +59 -0
- mindspore/include/dataset/vision.h +56 -117
- mindspore/include/dataset/vision_lite.h +102 -0
- mindspore/include/mindapi/base/type_id.h +42 -3
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libicudata.so.69 +0 -0
- mindspore/lib/libicui18n.so.69 +0 -0
- mindspore/lib/libicuuc.so.69 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libmpi_collective.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/{libakg.so → plugin/cpu/libakg.so} +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +28 -28
- mindspore/mindrecord/common/exceptions.py +2 -4
- mindspore/mindrecord/filereader.py +19 -1
- mindspore/mindrecord/filewriter.py +250 -88
- mindspore/mindrecord/mindpage.py +13 -13
- mindspore/mindrecord/shardheader.py +15 -15
- mindspore/mindrecord/shardreader.py +9 -0
- mindspore/mindrecord/shardwriter.py +29 -29
- mindspore/mindrecord/tools/cifar100_to_mr.py +9 -9
- mindspore/mindrecord/tools/cifar10_to_mr.py +9 -9
- mindspore/mindrecord/tools/csv_to_mr.py +4 -4
- mindspore/mindrecord/tools/imagenet_to_mr.py +70 -65
- mindspore/mindrecord/tools/mnist_to_mr.py +41 -41
- mindspore/mindrecord/tools/tfrecord_to_mr.py +6 -6
- mindspore/nn/__init__.py +1 -5
- mindspore/nn/cell.py +297 -234
- mindspore/nn/dynamic_lr.py +1 -1
- mindspore/nn/grad/cell_grad.py +17 -42
- mindspore/nn/layer/__init__.py +7 -4
- mindspore/nn/layer/activation.py +131 -88
- mindspore/nn/layer/basic.py +313 -613
- mindspore/nn/layer/channel_shuffle.py +103 -0
- mindspore/nn/layer/combined.py +1 -1
- mindspore/nn/layer/container.py +52 -6
- mindspore/nn/layer/conv.py +112 -43
- mindspore/nn/layer/dense.py +10 -9
- mindspore/nn/layer/embedding.py +36 -34
- mindspore/nn/layer/image.py +123 -27
- mindspore/nn/layer/math.py +108 -107
- mindspore/nn/layer/normalization.py +212 -366
- mindspore/nn/layer/padding.py +370 -42
- mindspore/nn/layer/pooling.py +1443 -219
- mindspore/nn/layer/rnn_cells.py +11 -16
- mindspore/nn/layer/rnns.py +38 -39
- mindspore/nn/layer/thor_layer.py +24 -25
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +701 -0
- mindspore/nn/learning_rate_schedule.py +8 -8
- mindspore/nn/loss/__init__.py +9 -6
- mindspore/nn/loss/loss.py +678 -142
- mindspore/nn/metrics.py +53 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +2 -2
- mindspore/nn/optim/ada_grad.py +8 -8
- mindspore/nn/optim/adadelta.py +2 -3
- mindspore/nn/optim/adafactor.py +18 -14
- mindspore/nn/optim/adam.py +429 -87
- mindspore/nn/optim/adamax.py +5 -6
- mindspore/nn/optim/adasum.py +10 -8
- mindspore/nn/optim/asgd.py +7 -7
- mindspore/nn/optim/ftrl.py +81 -11
- mindspore/nn/optim/lamb.py +7 -8
- mindspore/nn/optim/lars.py +4 -4
- mindspore/nn/optim/lazyadam.py +82 -7
- mindspore/nn/optim/momentum.py +8 -7
- mindspore/nn/optim/optimizer.py +19 -10
- mindspore/nn/optim/proximal_ada_grad.py +6 -5
- mindspore/nn/optim/rmsprop.py +3 -3
- mindspore/nn/optim/rprop.py +20 -16
- mindspore/nn/optim/sgd.py +21 -15
- mindspore/nn/optim/thor.py +23 -21
- mindspore/nn/probability/__init__.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -6
- mindspore/nn/probability/bijector/invert.py +4 -2
- mindspore/nn/probability/bijector/softplus.py +2 -2
- mindspore/nn/probability/bnn_layers/dense_variational.py +1 -1
- mindspore/nn/probability/bnn_layers/layer_distribution.py +2 -2
- mindspore/nn/probability/distribution/__init__.py +6 -0
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -2
- mindspore/nn/probability/distribution/_utils/utils.py +11 -17
- mindspore/nn/probability/distribution/bernoulli.py +6 -6
- mindspore/nn/probability/distribution/beta.py +1 -1
- mindspore/nn/probability/distribution/categorical.py +9 -9
- mindspore/nn/probability/distribution/cauchy.py +8 -8
- mindspore/nn/probability/distribution/distribution.py +12 -6
- mindspore/nn/probability/distribution/exponential.py +5 -5
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +6 -5
- mindspore/nn/probability/distribution/gumbel.py +5 -5
- mindspore/nn/probability/distribution/half_normal.py +133 -0
- mindspore/nn/probability/distribution/laplace.py +128 -0
- mindspore/nn/probability/distribution/log_normal.py +0 -1
- mindspore/nn/probability/distribution/logistic.py +4 -5
- mindspore/nn/probability/distribution/normal.py +11 -15
- mindspore/nn/probability/distribution/poisson.py +6 -2
- mindspore/nn/probability/distribution/student_t.py +150 -0
- mindspore/nn/probability/distribution/transformed_distribution.py +4 -4
- mindspore/nn/probability/distribution/uniform.py +5 -5
- mindspore/nn/reinforcement/_tensors_queue.py +3 -3
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +8 -1
- mindspore/nn/wrap/cell_wrapper.py +55 -27
- mindspore/nn/wrap/grad_reducer.py +20 -11
- mindspore/nn/wrap/loss_scale.py +47 -30
- mindspore/numpy/array_creations.py +33 -22
- mindspore/numpy/array_ops.py +46 -42
- mindspore/numpy/logic_ops.py +6 -27
- mindspore/numpy/math_ops.py +26 -19
- mindspore/numpy/utils.py +1 -8
- mindspore/numpy/utils_const.py +112 -62
- mindspore/ops/__init__.py +6 -3
- mindspore/ops/_constants.py +0 -6
- mindspore/ops/_grad/__init__.py +2 -1
- mindspore/ops/_grad/grad_array_ops.py +209 -152
- mindspore/ops/_grad/grad_base.py +55 -17
- mindspore/ops/_grad/grad_clip_ops.py +11 -3
- mindspore/ops/_grad/grad_comm_ops.py +58 -47
- mindspore/ops/_grad/grad_implementations.py +21 -61
- mindspore/ops/_grad/grad_inner_ops.py +48 -6
- mindspore/ops/_grad/grad_math_ops.py +306 -161
- mindspore/ops/_grad/grad_nn_ops.py +192 -181
- mindspore/ops/_grad/grad_other_ops.py +1 -1
- mindspore/ops/_grad/grad_quant_ops.py +5 -5
- mindspore/ops/_grad/grad_sequence_ops.py +296 -0
- mindspore/ops/_grad/grad_sparse.py +15 -9
- mindspore/ops/_grad_experimental/__init__.py +1 -0
- mindspore/ops/_grad_experimental/grad_array_ops.py +441 -55
- mindspore/ops/_grad_experimental/grad_image_ops.py +25 -7
- mindspore/ops/_grad_experimental/grad_inner_ops.py +3 -44
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +16 -21
- mindspore/ops/_grad_experimental/grad_math_ops.py +979 -49
- mindspore/ops/_grad_experimental/grad_nn_ops.py +78 -8
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +112 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +197 -13
- mindspore/ops/_op_impl/__init__.py +3 -3
- mindspore/ops/_op_impl/_custom_op/__init__.py +0 -1
- mindspore/ops/_op_impl/_custom_op/_basic.py +0 -1
- mindspore/ops/_op_impl/_custom_op/batch_matmul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold.py +4 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold2_grad_reduce.py +5 -5
- mindspore/ops/_op_impl/_custom_op/batchnorm_fold_grad.py +3 -3
- mindspore/ops/_op_impl/_custom_op/cholesky_trsm_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/correction_mul.py +3 -3
- mindspore/ops/_op_impl/_custom_op/correction_mul_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +4 -8
- mindspore/ops/_op_impl/_custom_op/dsd_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perchannel_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_learned_scale_quant_perlayer_grad_reduce.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perchannel_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fake_quant_perlayer_grad.py +2 -2
- mindspore/ops/_op_impl/_custom_op/fused_abs_max1_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/img2col_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_right_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_left_cast_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_fracz_right_mul_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/matmul_cube_impl.py +2 -2
- mindspore/ops/_op_impl/_custom_op/matmul_dds_grad_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matmul_dds_impl.py +0 -1
- mindspore/ops/_op_impl/_custom_op/matrix_combine_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/minmax_update_perchannel.py +2 -2
- mindspore/ops/_op_impl/_custom_op/minmax_update_perlayer.py +2 -2
- mindspore/ops/_op_impl/_custom_op/transpose02314_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +238 -3
- mindspore/ops/_op_impl/aicpu/abs.py +36 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_2d_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_avg_pool_3d_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d.py +42 -0
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_3d_grad.py +152 -0
- mindspore/ops/_op_impl/aicpu/add.py +43 -0
- mindspore/ops/_op_impl/aicpu/addcdiv.py +0 -32
- mindspore/ops/_op_impl/aicpu/addcmul.py +0 -84
- mindspore/ops/_op_impl/aicpu/affine_grid_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/arg_max.py +75 -0
- mindspore/ops/_op_impl/aicpu/arg_min.py +75 -0
- mindspore/ops/_op_impl/aicpu/argmin_with_value.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_matmul.py +43 -0
- mindspore/ops/_op_impl/aicpu/batch_norm_grad_grad.py +49 -0
- mindspore/ops/_op_impl/aicpu/bernoulli.py +48 -0
- mindspore/ops/_op_impl/aicpu/bessel_i0.py +31 -0
- mindspore/ops/_op_impl/aicpu/bias_add.py +44 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +43 -0
- mindspore/ops/_op_impl/aicpu/bincount.py +33 -0
- mindspore/{nn/probability/infer/variational/__init__.py → ops/_op_impl/aicpu/cauchy.py} +17 -10
- mindspore/ops/_op_impl/aicpu/channel_shuffle.py +40 -0
- mindspore/ops/_op_impl/aicpu/cholesky.py +1 -1
- mindspore/ops/_op_impl/{cpu/bias_add.py → aicpu/choleskygrad.py} +9 -7
- mindspore/ops/_op_impl/aicpu/combined_non_max_suppression.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset.py +42 -0
- mindspore/ops/_op_impl/aicpu/concat_offset_v1.py +31 -0
- mindspore/ops/_op_impl/aicpu/conj.py +11 -0
- mindspore/ops/_op_impl/aicpu/crop_and_resize_grad_image.py +38 -0
- mindspore/ops/_op_impl/aicpu/cumulative_logsumexp.py +36 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets.py +38 -0
- mindspore/ops/_op_impl/aicpu/deformable_offsets_grad.py +2 -2
- mindspore/ops/_op_impl/aicpu/dense_to_sparse_set_operation.py +48 -0
- mindspore/ops/_op_impl/aicpu/diag.py +36 -0
- mindspore/ops/_op_impl/aicpu/diag_part.py +36 -0
- mindspore/ops/_op_impl/aicpu/diagonal.py +35 -0
- mindspore/ops/_op_impl/{cpu/bias_add_grad.py → aicpu/digamma.py} +9 -7
- mindspore/ops/_op_impl/aicpu/eig.py +35 -0
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +41 -0
- mindspore/ops/_op_impl/aicpu/flatten.py +1 -0
- mindspore/ops/_op_impl/aicpu/fmax.py +36 -0
- mindspore/ops/_op_impl/aicpu/fmin.py +37 -0
- mindspore/ops/_op_impl/aicpu/fractional_max_pool3d_with_fixed_ksize.py +1 -1
- mindspore/ops/_op_impl/aicpu/fse_decode.py +43 -0
- mindspore/ops/_op_impl/aicpu/glu.py +33 -0
- mindspore/ops/_op_impl/aicpu/glu_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/greater.py +41 -0
- mindspore/ops/_op_impl/aicpu/greater_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/index_put.py +50 -0
- mindspore/ops/_op_impl/{tbe/scatter_add_ds.py → aicpu/inplace_index_add.py} +17 -21
- mindspore/ops/_op_impl/aicpu/instance_norm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/instance_norm_v2_grad.py +44 -0
- mindspore/ops/_op_impl/aicpu/layer_norm_grad_grad.py +47 -0
- mindspore/ops/_op_impl/aicpu/less.py +41 -0
- mindspore/ops/_op_impl/aicpu/less_equal.py +41 -0
- mindspore/ops/_op_impl/aicpu/lgamma.py +32 -0
- mindspore/ops/_op_impl/aicpu/log_normal_reverse.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit.py +33 -0
- mindspore/ops/_op_impl/aicpu/logit_grad.py +34 -0
- mindspore/ops/_op_impl/aicpu/masked_fill.py +42 -0
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +39 -0
- mindspore/ops/_op_impl/aicpu/matmul.py +39 -0
- mindspore/ops/_op_impl/aicpu/matrix_logarithm.py +31 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +32 -0
- mindspore/ops/_op_impl/aicpu/matrix_solve_ls.py +36 -0
- mindspore/ops/_op_impl/aicpu/matrix_triangular_solve.py +36 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad.py +2 -0
- mindspore/ops/_op_impl/aicpu/mirror_pad_grad.py +0 -4
- mindspore/ops/_op_impl/aicpu/mul.py +3 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +14 -6
- mindspore/ops/_op_impl/aicpu/multinomial_with_replacement.py +35 -0
- mindspore/ops/_op_impl/aicpu/nan_to_num.py +34 -0
- mindspore/ops/_op_impl/aicpu/nllloss.py +38 -0
- mindspore/ops/_op_impl/aicpu/nllloss_grad.py +39 -0
- mindspore/ops/_op_impl/aicpu/ones_like.py +0 -2
- mindspore/ops/_op_impl/aicpu/polar.py +32 -0
- mindspore/ops/_op_impl/aicpu/polygamma.py +34 -0
- mindspore/ops/_op_impl/aicpu/qr.py +36 -0
- mindspore/ops/_op_impl/aicpu/quant_dtype_cast.py +40 -0
- mindspore/ops/_op_impl/aicpu/quantile.py +35 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_sparse.py +73 -0
- mindspore/ops/_op_impl/aicpu/ragged_tensor_to_tensor.py +74 -0
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +3 -0
- mindspore/ops/_op_impl/aicpu/randperm_v2.py +41 -0
- mindspore/ops/_op_impl/aicpu/range.py +36 -0
- mindspore/ops/_op_impl/aicpu/reciprocal.py +34 -0
- mindspore/ops/_op_impl/aicpu/reciprocal_grad.py +35 -0
- mindspore/ops/_op_impl/aicpu/reduce_sum.py +57 -0
- mindspore/ops/_op_impl/aicpu/resize_bicubic.py +2 -8
- mindspore/ops/_op_impl/aicpu/resize_bicubic_grad.py +1 -1
- mindspore/ops/_op_impl/aicpu/resize_v2.py +68 -0
- mindspore/ops/_op_impl/aicpu/resize_v2_grad.py +68 -0
- mindspore/ops/_op_impl/aicpu/scatter_elements.py +4 -0
- mindspore/ops/_op_impl/aicpu/scatter_nd_update.py +2 -0
- mindspore/ops/_op_impl/aicpu/search_sorted.py +12 -6
- mindspore/ops/_op_impl/aicpu/self_adjoint_eig.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_add_offset.py +34 -0
- mindspore/ops/_op_impl/aicpu/sequence_addn.py +38 -0
- mindspore/ops/_op_impl/aicpu/slice_grad.py +76 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss.py +35 -0
- mindspore/ops/_op_impl/aicpu/smooth_l1_loss_grad.py +37 -0
- mindspore/ops/_op_impl/aicpu/sort.py +39 -0
- mindspore/ops/_op_impl/aicpu/sparse_apply_adagrad_da.py +0 -24
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +42 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_fill_empty_rows_grad.py +45 -0
- mindspore/ops/_op_impl/aicpu/sparse_matrix_mat_mul.py +56 -0
- mindspore/ops/_op_impl/{tbe/slice_ds.py → aicpu/sparse_segment_sum.py} +16 -24
- mindspore/ops/_op_impl/aicpu/sparse_segment_sum_with_num_segments.py +68 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice.py +63 -0
- mindspore/ops/_op_impl/aicpu/sparse_slice_grad.py +61 -0
- mindspore/ops/_op_impl/aicpu/squared_difference.py +2 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +93 -0
- mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +66 -0
- mindspore/ops/_op_impl/aicpu/tensor_scatter_update.py +59 -0
- mindspore/ops/_op_impl/{tbe/gather_v2.py → aicpu/tile.py} +24 -24
- mindspore/ops/_op_impl/aicpu/tridiagonal_solve.py +35 -0
- mindspore/ops/_op_impl/aicpu/tril_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/triu_indices.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform.py +34 -0
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +1 -0
- mindspore/ops/_op_impl/aicpu/unique_consecutive.py +10 -2
- mindspore/ops/_op_impl/cpu/__init__.py +1 -2
- mindspore/ops/_op_impl/cpu/dynamic_shape.py +5 -1
- mindspore/ops/_op_impl/cpu/maximum_grad.py +2 -0
- mindspore/{compression/common/__init__.py → ops/_op_impl/cpu/pyexecute.py} +13 -8
- mindspore/ops/_op_impl/cpu/reduce_sum.py +8 -0
- mindspore/ops/_op_impl/cpu/sparse_slice.py +62 -0
- mindspore/ops/_op_impl/cpu/sparse_slice_grad.py +60 -0
- mindspore/ops/_op_impl/cpu/tensor_shape.py +5 -1
- mindspore/ops/_op_impl/tbe/__init__.py +27 -608
- mindspore/ops/_op_impl/tbe/addcdiv_ds.py +42 -0
- mindspore/ops/_op_impl/tbe/addcmul_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/assign_add_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +1 -1
- mindspore/ops/_op_impl/tbe/basic_lstm_cell_c_state_grad_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd_v2.py +41 -0
- mindspore/ops/_op_impl/tbe/bce_with_logits_loss.py +1 -0
- mindspore/ops/_op_impl/tbe/bias_add_grad.py +2 -0
- mindspore/ops/_op_impl/tbe/bn_infer_grad.py +4 -2
- mindspore/ops/_op_impl/tbe/bn_infer_grad_ds.py +40 -0
- mindspore/ops/_op_impl/tbe/bn_training_update.py +0 -1
- mindspore/ops/_op_impl/tbe/bn_training_update_ds.py +0 -1
- mindspore/ops/_op_impl/tbe/broadcast_to_ds.py +6 -4
- mindspore/ops/_op_impl/tbe/cast.py +0 -2
- mindspore/ops/_op_impl/tbe/cast_ds.py +3 -3
- mindspore/ops/_op_impl/tbe/ctc_loss_v2.py +0 -2
- mindspore/ops/_op_impl/tbe/ctc_loss_v2_grad.py +0 -2
- mindspore/ops/_op_impl/tbe/data_format_dim_map_ds.py +1 -0
- mindspore/ops/_op_impl/tbe/deformable_offsets.py +1 -0
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +1 -1
- mindspore/ops/_op_impl/tbe/dynamic_atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_nd.py +1 -0
- mindspore/ops/_op_impl/tbe/greater.py +2 -0
- mindspore/ops/_op_impl/tbe/{index_add.py → inplace_index_add.py} +3 -6
- mindspore/ops/_op_impl/tbe/layer_norm_beta_gamma_backprop_v2.py +0 -1
- mindspore/ops/_op_impl/tbe/npu_clear_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/npu_get_float_status_v2.py +35 -0
- mindspore/ops/_op_impl/tbe/one_hot_ds.py +0 -6
- mindspore/ops/_op_impl/tbe/{greater_ds.py → reduce_all_ds.py} +13 -16
- mindspore/ops/_op_impl/tbe/reduce_any_ds.py +39 -0
- mindspore/ops/_op_impl/tbe/roi_align_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/roi_align_grad_ds.py +44 -0
- mindspore/ops/_op_impl/tbe/scatter_add.py +2 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +2 -2
- mindspore/ops/_op_impl/tbe/slice.py +26 -15
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +1 -1
- mindspore/ops/_op_impl/tbe/strided_slice_grad_d.py +1 -0
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +15 -5
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum.py +1 -1
- mindspore/ops/_op_impl/tbe/unsorted_segment_sum_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +3 -2
- mindspore/ops/_register_for_op.py +11 -0
- mindspore/ops/_utils/__init__.py +1 -1
- mindspore/ops/_utils/utils.py +20 -41
- mindspore/ops/_vmap/__init__.py +2 -2
- mindspore/ops/_vmap/vmap_array_ops.py +170 -78
- mindspore/ops/_vmap/vmap_base.py +24 -10
- mindspore/ops/_vmap/vmap_convolution_ops.py +7 -10
- mindspore/ops/_vmap/vmap_grad_math_ops.py +4 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +41 -9
- mindspore/ops/_vmap/vmap_image_ops.py +52 -0
- mindspore/ops/_vmap/vmap_math_ops.py +77 -6
- mindspore/ops/_vmap/vmap_nn_ops.py +78 -29
- mindspore/ops/_vmap/vmap_other_ops.py +3 -1
- mindspore/ops/_vmap/vmap_random_ops.py +55 -3
- mindspore/ops/_vmap/vmap_sparse_ops.py +1 -0
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +16 -16
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +150 -0
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +306 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +12 -8
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +240 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +247 -0
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +315 -0
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +278 -0
- mindspore/ops/bprop_mindir/DType_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +138 -0
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +22 -24
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +16 -14
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +58 -0
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +57 -0
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +13 -10
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +14 -11
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +126 -0
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +15 -0
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +30 -0
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +43 -0
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +22 -19
- mindspore/ops/bprop_mindir/Load_bprop.mindir +12 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +17 -18
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +74 -0
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +75 -0
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +65 -0
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +27 -0
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +35 -0
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +18 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +25 -23
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +13 -13
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +82 -0
- mindspore/ops/bprop_mindir/Range_bprop.mindir +21 -19
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +11 -11
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +18 -17
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +19 -23
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +60 -0
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +89 -0
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +52 -0
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Round_bprop.mindir +14 -13
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +24 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/Select_bprop.mindir +30 -34
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +12 -12
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +21 -0
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +26 -0
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +36 -0
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +33 -0
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +28 -0
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +54 -0
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +95 -0
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +98 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +28 -32
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +66 -0
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +29 -0
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +14 -0
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +23 -0
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +18 -15
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +11 -13
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +16 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +22 -0
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +32 -0
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +38 -0
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +13 -12
- mindspore/ops/bprop_mindir/__init__.py +1 -4
- mindspore/ops/bprop_mindir/generate_mindir.py +32 -20
- mindspore/ops/composite/__init__.py +12 -13
- mindspore/ops/composite/base.py +261 -254
- mindspore/ops/composite/env_ops.py +41 -0
- mindspore/ops/composite/math_ops.py +197 -156
- mindspore/ops/composite/multitype_ops/_compile_utils.py +428 -176
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +188 -87
- mindspore/ops/composite/multitype_ops/add_impl.py +23 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/equal_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +52 -5
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +31 -0
- mindspore/ops/composite/multitype_ops/in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +33 -2
- mindspore/ops/composite/multitype_ops/less_impl.py +33 -0
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +21 -7
- mindspore/ops/composite/multitype_ops/not_in_impl.py +15 -3
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +2 -4
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +62 -70
- mindspore/ops/composite/multitype_ops/sub_impl.py +3 -3
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +41 -4
- mindspore/ops/function/__init__.py +323 -8
- mindspore/ops/function/array_func.py +3511 -780
- mindspore/ops/function/clip_func.py +329 -0
- mindspore/ops/function/debug_func.py +6 -6
- mindspore/ops/function/grad/__init__.py +5 -1
- mindspore/ops/function/grad/grad_func.py +736 -65
- mindspore/ops/function/image_func.py +270 -0
- mindspore/ops/function/linalg_func.py +268 -8
- mindspore/ops/function/math_func.py +8032 -3164
- mindspore/ops/function/nn_func.py +5619 -1855
- mindspore/ops/function/other_func.py +115 -0
- mindspore/ops/function/parameter_func.py +11 -10
- mindspore/ops/function/random_func.py +939 -77
- mindspore/ops/function/sparse_func.py +249 -84
- mindspore/ops/function/sparse_unary_func.py +2303 -0
- mindspore/ops/function/spectral_func.py +146 -0
- mindspore/ops/function/vmap_func.py +114 -0
- mindspore/ops/functional.py +182 -254
- mindspore/ops/op_info_register.py +79 -34
- mindspore/ops/operations/__init__.py +210 -118
- mindspore/ops/operations/_csr_ops.py +7 -7
- mindspore/ops/operations/_embedding_cache_ops.py +25 -15
- mindspore/ops/operations/_grad_ops.py +447 -322
- mindspore/ops/operations/_inner_ops.py +547 -176
- mindspore/ops/operations/_map_tensor_ops.py +112 -0
- mindspore/ops/operations/_ms_kernel.py +29 -27
- mindspore/ops/operations/_ocr_ops.py +11 -11
- mindspore/ops/operations/_opaque_predicate_registry.py +41 -0
- mindspore/ops/operations/_quant_ops.py +186 -101
- mindspore/ops/operations/_rl_inner_ops.py +122 -61
- mindspore/ops/operations/_scalar_ops.py +466 -0
- mindspore/ops/operations/_sequence_ops.py +1047 -0
- mindspore/ops/operations/_tensor_array.py +10 -11
- mindspore/ops/operations/_thor_ops.py +4 -4
- mindspore/ops/operations/array_ops.py +1428 -1226
- mindspore/ops/operations/comm_ops.py +180 -117
- mindspore/ops/operations/control_ops.py +4 -2
- mindspore/ops/operations/custom_ops.py +185 -98
- mindspore/ops/operations/debug_ops.py +92 -54
- mindspore/ops/operations/image_ops.py +406 -211
- mindspore/ops/operations/inner_ops.py +42 -53
- mindspore/ops/operations/linalg_ops.py +32 -29
- mindspore/ops/operations/math_ops.py +2076 -897
- mindspore/ops/operations/nn_ops.py +1282 -1252
- mindspore/ops/operations/other_ops.py +124 -278
- mindspore/ops/operations/random_ops.py +345 -178
- mindspore/ops/operations/rl_ops.py +8 -9
- mindspore/ops/operations/sparse_ops.py +502 -157
- mindspore/ops/operations/spectral_ops.py +107 -0
- mindspore/ops/primitive.py +192 -15
- mindspore/ops/vm_impl_registry.py +23 -2
- mindspore/parallel/__init__.py +6 -1
- mindspore/parallel/_auto_parallel_context.py +199 -92
- mindspore/parallel/_cell_wrapper.py +4 -2
- mindspore/parallel/_cost_model_context.py +3 -0
- mindspore/parallel/_dp_allreduce_fusion.py +2 -1
- mindspore/parallel/_offload_context.py +185 -0
- mindspore/parallel/_parallel_serialization.py +167 -28
- mindspore/parallel/_ps_context.py +9 -5
- mindspore/parallel/_recovery_context.py +1 -1
- mindspore/parallel/_tensor.py +9 -1
- mindspore/{nn/transformer → parallel/_transformer}/__init__.py +6 -6
- mindspore/{nn/transformer → parallel/_transformer}/layers.py +59 -37
- mindspore/{nn/transformer → parallel/_transformer}/loss.py +4 -7
- mindspore/{nn/transformer → parallel/_transformer}/moe.py +160 -35
- mindspore/{nn/transformer → parallel/_transformer}/op_parallel_config.py +3 -3
- mindspore/{nn/transformer → parallel/_transformer}/transformer.py +235 -196
- mindspore/parallel/_utils.py +47 -7
- mindspore/parallel/algo_parameter_config.py +5 -1
- mindspore/parallel/checkpoint_transform.py +329 -0
- mindspore/parallel/shard.py +229 -0
- mindspore/profiler/__init__.py +2 -1
- mindspore/profiler/common/util.py +4 -3
- mindspore/profiler/common/validator/validate_path.py +2 -2
- mindspore/profiler/envprofiling.py +249 -0
- mindspore/profiler/parser/aicpu_data_parser.py +38 -39
- mindspore/profiler/parser/ascend_timeline_generator.py +497 -0
- mindspore/profiler/parser/base_timeline_generator.py +471 -0
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +684 -0
- mindspore/profiler/parser/framework_parser.py +42 -16
- mindspore/profiler/parser/hccl_parser.py +158 -158
- mindspore/profiler/parser/hwts_log_parser.py +7 -6
- mindspore/profiler/parser/integrator.py +18 -1579
- mindspore/profiler/parser/minddata_analyzer.py +8 -8
- mindspore/profiler/parser/msadvisor_analyzer.py +14 -27
- mindspore/profiler/parser/msadvisor_parser.py +2 -4
- mindspore/profiler/parser/optime_parser.py +17 -18
- mindspore/profiler/parser/profiler_info.py +108 -0
- mindspore/profiler/parser/step_trace_parser.py +1 -1
- mindspore/profiler/profiling.py +396 -194
- mindspore/rewrite/__init__.py +6 -2
- mindspore/rewrite/api/node.py +51 -110
- mindspore/rewrite/api/node_type.py +10 -6
- mindspore/rewrite/api/pattern_engine.py +51 -7
- mindspore/rewrite/api/scoped_value.py +64 -53
- mindspore/rewrite/api/symbol_tree.py +108 -61
- mindspore/rewrite/api/tree_node_helper.py +2 -3
- mindspore/{compression/quant/__init__.py → rewrite/ast_creator_register.py} +20 -11
- mindspore/rewrite/ast_helpers/__init__.py +6 -3
- mindspore/rewrite/ast_helpers/ast_creator.py +115 -0
- mindspore/rewrite/ast_helpers/ast_finder.py +99 -1
- mindspore/rewrite/ast_helpers/ast_modifier.py +17 -4
- mindspore/rewrite/ast_helpers/ast_replacer.py +1 -1
- mindspore/rewrite/ast_transformers/__init__.py +0 -1
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +46 -5
- mindspore/rewrite/ast_transformers/remove_return_out_of_if.py +6 -3
- mindspore/rewrite/common/__init__.py +2 -0
- mindspore/rewrite/common/event.py +1 -1
- mindspore/rewrite/common/observable.py +1 -1
- mindspore/rewrite/common/observer.py +1 -1
- mindspore/rewrite/common/rewrite_elog.py +35 -0
- mindspore/rewrite/namer.py +2 -2
- mindspore/rewrite/namespace.py +14 -4
- mindspore/rewrite/node.py +161 -13
- mindspore/rewrite/parser.py +0 -1
- mindspore/rewrite/parser_register.py +0 -1
- mindspore/rewrite/parsers/arguments_parser.py +3 -2
- mindspore/rewrite/parsers/assign_parser.py +267 -67
- mindspore/rewrite/parsers/attribute_parser.py +56 -0
- mindspore/rewrite/parsers/class_def_parser.py +191 -108
- mindspore/rewrite/parsers/constant_parser.py +101 -0
- mindspore/rewrite/parsers/container_parser.py +88 -0
- mindspore/rewrite/parsers/for_parser.py +28 -15
- mindspore/rewrite/parsers/function_def_parser.py +21 -5
- mindspore/rewrite/parsers/if_parser.py +11 -28
- mindspore/rewrite/parsers/module_parser.py +9 -6
- mindspore/rewrite/parsers/return_parser.py +3 -2
- mindspore/rewrite/sparsify/__init__.py +0 -0
- mindspore/rewrite/sparsify/sparse_transformer.py +448 -0
- mindspore/rewrite/sparsify/sparsify.py +109 -0
- mindspore/rewrite/sparsify/utils.py +173 -0
- mindspore/rewrite/symbol_tree.py +322 -109
- mindspore/rewrite/symbol_tree_builder.py +45 -8
- mindspore/rewrite/symbol_tree_dumper.py +0 -1
- mindspore/rewrite/topological_manager.py +1 -2
- mindspore/run_check/_check_version.py +209 -112
- mindspore/run_check/run_check.py +2 -1
- mindspore/scipy/linalg.py +13 -117
- mindspore/scipy/ops.py +5 -71
- mindspore/scipy/ops_grad.py +1 -25
- mindspore/scipy/ops_wrapper.py +1 -1
- mindspore/scipy/optimize/_bfgs.py +1 -1
- mindspore/scipy/optimize/_lagrange.py +200 -0
- mindspore/scipy/optimize/line_search.py +3 -2
- mindspore/scipy/optimize/minimize.py +43 -6
- mindspore/scipy/sparse/__init__.py +2 -2
- mindspore/scipy/sparse/linalg.py +5 -465
- mindspore/scipy/utils.py +2 -1
- mindspore/scipy/utils_const.py +7 -1
- mindspore/train/__init__.py +6 -4
- mindspore/train/_utils.py +28 -5
- mindspore/train/amp.py +321 -50
- mindspore/train/callback/__init__.py +3 -1
- mindspore/train/callback/_backup_and_restore.py +120 -0
- mindspore/train/callback/_callback.py +8 -8
- mindspore/train/callback/_checkpoint.py +12 -9
- mindspore/train/callback/_early_stop.py +13 -7
- mindspore/train/callback/_history.py +8 -8
- mindspore/train/callback/_lambda_callback.py +6 -6
- mindspore/train/callback/_landscape.py +36 -38
- mindspore/train/callback/_loss_monitor.py +12 -6
- mindspore/train/callback/_lr_scheduler_callback.py +2 -4
- mindspore/train/callback/_on_request_exit.py +212 -0
- mindspore/train/callback/_reduce_lr_on_plateau.py +13 -7
- mindspore/train/callback/_summary_collector.py +27 -19
- mindspore/train/callback/_time_monitor.py +13 -7
- mindspore/train/checkpoint_pb2.py +68 -8
- mindspore/train/data_sink.py +122 -33
- mindspore/train/dataset_helper.py +28 -87
- mindspore/train/loss_scale_manager.py +4 -7
- mindspore/{nn → train}/metrics/__init__.py +20 -20
- mindspore/{nn → train}/metrics/accuracy.py +12 -10
- mindspore/{nn → train}/metrics/auc.py +4 -4
- mindspore/{nn → train}/metrics/bleu_score.py +4 -4
- mindspore/{nn → train}/metrics/confusion_matrix.py +10 -8
- mindspore/{nn → train}/metrics/cosine_similarity.py +4 -4
- mindspore/{nn → train}/metrics/dice.py +6 -5
- mindspore/{nn → train}/metrics/error.py +7 -5
- mindspore/{nn → train}/metrics/fbeta.py +9 -7
- mindspore/{nn → train}/metrics/hausdorff_distance.py +8 -6
- mindspore/{nn → train}/metrics/loss.py +4 -3
- mindspore/{nn → train}/metrics/mean_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/metric.py +6 -5
- mindspore/{nn → train}/metrics/occlusion_sensitivity.py +4 -3
- mindspore/{nn → train}/metrics/perplexity.py +5 -4
- mindspore/{nn → train}/metrics/precision.py +5 -4
- mindspore/{nn → train}/metrics/recall.py +5 -4
- mindspore/{nn → train}/metrics/roc.py +7 -6
- mindspore/{nn → train}/metrics/root_mean_square_surface_distance.py +6 -5
- mindspore/{nn → train}/metrics/topk.py +7 -5
- mindspore/train/mind_ir_pb2.py +339 -32
- mindspore/train/model.py +113 -84
- mindspore/train/serialization.py +547 -167
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/summary_record.py +43 -12
- mindspore/train/train_thor/convert_utils.py +7 -1
- mindspore/train/train_thor/dataset_helper.py +3 -3
- mindspore/train/train_thor/model_thor.py +0 -4
- mindspore/version.py +1 -1
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/METADATA +4 -3
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/RECORD +899 -675
- mindspore/compression/common/constant.py +0 -124
- mindspore/compression/export/__init__.py +0 -19
- mindspore/compression/export/quant_export.py +0 -514
- mindspore/compression/quant/qat.py +0 -636
- mindspore/compression/quant/quant_utils.py +0 -462
- mindspore/compression/quant/quantizer.py +0 -68
- mindspore/nn/layer/quant.py +0 -1868
- mindspore/nn/layer/rnn_utils.py +0 -90
- mindspore/nn/probability/dpn/__init__.py +0 -22
- mindspore/nn/probability/dpn/vae/__init__.py +0 -25
- mindspore/nn/probability/dpn/vae/cvae.py +0 -138
- mindspore/nn/probability/dpn/vae/vae.py +0 -122
- mindspore/nn/probability/infer/__init__.py +0 -22
- mindspore/nn/probability/infer/variational/elbo.py +0 -70
- mindspore/nn/probability/infer/variational/svi.py +0 -84
- mindspore/nn/probability/toolbox/__init__.py +0 -22
- mindspore/nn/probability/toolbox/anomaly_detection.py +0 -99
- mindspore/nn/probability/toolbox/uncertainty_evaluation.py +0 -363
- mindspore/nn/probability/transforms/__init__.py +0 -22
- mindspore/nn/probability/transforms/transform_bnn.py +0 -262
- mindspore/nn/probability/zhusuan/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/framework/bn.py +0 -95
- mindspore/nn/probability/zhusuan/variational/__init__.py +0 -18
- mindspore/nn/probability/zhusuan/variational/elbo.py +0 -46
- mindspore/ops/_op_impl/tbe/bias_add_grad_ds.py +0 -52
- mindspore/ops/_op_impl/tbe/scatter_nd_add_ds.py +0 -43
- mindspore/ops/bprop_mindir/AssignAdd_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Identity_bprop.mindir +0 -9
- mindspore/ops/bprop_mindir/LogicalOr_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/ReLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/UpdateState_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/stop_gradient_bprop.mindir +0 -12
- mindspore/ops/composite/array_ops.py +0 -210
- mindspore/ops/composite/clip_ops.py +0 -238
- mindspore/ops/composite/random_ops.py +0 -426
- mindspore/ops/composite/vmap_ops.py +0 -38
- mindspore/ops/operations/sponge_ops.py +0 -3531
- mindspore/ops/operations/sponge_update_ops.py +0 -2546
- mindspore/parallel/nn/__init__.py +0 -42
- mindspore/parallel/nn/loss.py +0 -22
- mindspore/parallel/nn/moe.py +0 -21
- mindspore/parallel/nn/op_parallel_config.py +0 -22
- mindspore/parallel/nn/transformer.py +0 -31
- mindspore/run_check/_check_deps_version.py +0 -84
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/WHEEL +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/entry_points.txt +0 -0
- {mindspore-1.10.0.dist-info → mindspore-2.0.0rc1.dist-info}/top_level.txt +0 -0
mindspore/profiler/profiling.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2020-
|
|
1
|
+
# Copyright 2020-2022 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -17,6 +17,9 @@ import os
|
|
|
17
17
|
import stat
|
|
18
18
|
import time
|
|
19
19
|
import json
|
|
20
|
+
import glob
|
|
21
|
+
import subprocess
|
|
22
|
+
from enum import Enum
|
|
20
23
|
|
|
21
24
|
from mindspore import log as logger, context
|
|
22
25
|
from mindspore.communication.management import GlobalComm, get_rank, get_group_size
|
|
@@ -33,7 +36,8 @@ from mindspore.profiler.parser.aicpu_data_parser import DataPreProcessParser
|
|
|
33
36
|
from mindspore.profiler.parser.framework_parser import FrameworkParser, GpuFrameWorkParser, DynamicFrameWorkParser
|
|
34
37
|
from mindspore.profiler.parser.hwts_log_parser import HWTSLogParser
|
|
35
38
|
from mindspore.profiler.parser.integrator import Integrator, DeviceTarget
|
|
36
|
-
from mindspore.profiler.parser.
|
|
39
|
+
from mindspore.profiler.parser.cpu_gpu_timeline_generator import GpuTimelineGenerator, CpuTimelineGenerator
|
|
40
|
+
from mindspore.profiler.parser.ascend_timeline_generator import AscendTimelineGenerator
|
|
37
41
|
from mindspore.profiler.parser.memory_usage_parser import MemoryUsageParser
|
|
38
42
|
from mindspore.profiler.parser.minddata_parser import MinddataParser
|
|
39
43
|
from mindspore.profiler.parser.minddata_analyzer import MinddataProfilingAnalyzer
|
|
@@ -45,9 +49,33 @@ from mindspore.profiler.parser.step_trace_parser import GpuStepTraceParser, Asce
|
|
|
45
49
|
from mindspore.profiler.parser.hccl_parser import HcclParser
|
|
46
50
|
from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
|
|
47
51
|
from mindspore.profiler.parser.msadvisor_analyzer import Msadvisor
|
|
52
|
+
from mindspore.profiler.parser.profiler_info import ProfilerInfo
|
|
53
|
+
from mindspore.common.api import _pynative_executor
|
|
48
54
|
|
|
49
55
|
INIT_OP_NAME = 'Default/InitDataSetQueue'
|
|
50
56
|
|
|
57
|
+
AICORE_METRICS_DICT = {
|
|
58
|
+
0: "ArithmeticUtilization",
|
|
59
|
+
1: "PipeUtilization",
|
|
60
|
+
2: "Memory",
|
|
61
|
+
3: "MemoryL0",
|
|
62
|
+
4: "ResourceConflictRatio",
|
|
63
|
+
5: "MemoryUB",
|
|
64
|
+
-1: "None"
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class DeviceSupportParam(Enum):
|
|
69
|
+
"""The device target enum."""
|
|
70
|
+
CPU = ['start', 'start_profile', 'output_path', 'timeline_limit']
|
|
71
|
+
GPU = ['start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'sync_enable', 'op_time']
|
|
72
|
+
ASCEND = ['start', 'start_profile', 'output_path', 'data_process', 'timeline_limit', 'profile_memory',
|
|
73
|
+
'parallel_strategy', 'profile_communication', 'aicore_metrics', 'l2_cache', 'op_time', 'ascend_job_id']
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
ALWAYS_VALID_PARAM = ['start', 'start_profile', 'output_path', 'data_process', 'parallel_strategy', 'l2_cache',
|
|
77
|
+
'ascend_job_id', 'op_time']
|
|
78
|
+
|
|
51
79
|
|
|
52
80
|
def _environment_check():
|
|
53
81
|
if c_expression.security.enable_security():
|
|
@@ -55,7 +83,7 @@ def _environment_check():
|
|
|
55
83
|
|
|
56
84
|
|
|
57
85
|
class Profiler:
|
|
58
|
-
"""
|
|
86
|
+
r"""
|
|
59
87
|
This class to enable the profiling of MindSpore neural networks.
|
|
60
88
|
MindSpore users can import the mindspore.Profiler, initialize the Profiler object to start profiling,
|
|
61
89
|
and use Profiler.analyse() to stop profiling and analyse the results.
|
|
@@ -65,13 +93,42 @@ class Profiler:
|
|
|
65
93
|
|
|
66
94
|
Args:
|
|
67
95
|
output_path (str, optional): Output data path. Default: "./data".
|
|
96
|
+
op_time (bool, optional): (Ascend/GPU) Whether to collect operators performance data. Default value: True.
|
|
68
97
|
profile_communication (bool, optional): (Ascend only) Whether to collect communication performance data in
|
|
69
98
|
a multi devices training,collect when True. Setting this parameter has no effect during single device
|
|
70
|
-
training. Default: False.
|
|
99
|
+
training. When using this parameter, `op_time` must be set to True. Default: False.
|
|
71
100
|
profile_memory (bool, optional): (Ascend only) Whether to collect tensor memory data, collect when True.
|
|
72
|
-
Default: False.
|
|
101
|
+
When using this parameter, `op_time` must be set to True. Default: False.
|
|
102
|
+
parallel_strategy (bool, optional): (Ascend only) Whether to collect parallel policy performance data.
|
|
103
|
+
Default value: true.
|
|
73
104
|
start_profile (bool, optional): The start_profile parameter controls whether to enable or disable performance
|
|
74
105
|
data collection based on conditions. Default: True.
|
|
106
|
+
aicore_metrics (int, optional): (Ascend only) Types of AICORE performance data collected, when using this
|
|
107
|
+
parameter, `op_time` must be set to True, and the value must be in [-1, 0, 1, 2, 3, 4, 5], Default: 0, the
|
|
108
|
+
data items contained in each metric are as follows:
|
|
109
|
+
|
|
110
|
+
- -1: Does not collect AICORE data.
|
|
111
|
+
- 0: ArithmeticUtilization contains mac_fp16/int8_ratio, vec_fp32/fp16/int32_ratio, vec_misc_ratio etc.
|
|
112
|
+
- 1: PipeUtilization contains vec_ratio, mac_ratio, scalar_ratio, mte1/mte2/mte3_ratio, icache_miss_rate
|
|
113
|
+
etc.
|
|
114
|
+
- 2: Memory contains ub_read/write_bw, l1_read/write_bw, l2_read/write_bw, main_mem_read/write_bw etc.
|
|
115
|
+
- 3: MemoryL0 contains l0a_read/write_bw, l0b_read/write_bw, l0c_read/write_bw etc.
|
|
116
|
+
- 4: ResourceConflictRatio contains vec_bankgroup/bank/resc_cflt_ratio etc.
|
|
117
|
+
- 5: MemoryUB contains ub_read/write_bw_mte, ub_read/write_bw_vector, ub\_/write_bw_scalar etc.
|
|
118
|
+
|
|
119
|
+
l2_cache (bool, optional): (Ascend only) Whether to collect l2 cache data, collect when True. Default: False.
|
|
120
|
+
sync_enable (bool, optional): (GPU only) Whether the profiler collects operators in a synchronous way.
|
|
121
|
+
Default: True.
|
|
122
|
+
|
|
123
|
+
- True: The synchronous way. Before sending the operator to the GPU, the CPU records the start timestamp.
|
|
124
|
+
Then the operator is returned to the CPU after execution, and the end timestamp is recorded,
|
|
125
|
+
The duration of the operator is the difference between the two timestamps.
|
|
126
|
+
- False: The asynchronous way. The duration of the operator is that of sending from the CPU to the GPU.
|
|
127
|
+
This method can reduce the impact of adding profiler on overall training time.
|
|
128
|
+
data_process (bool, optional): (Ascend/GPU) Whether to collect data to prepare performance data.
|
|
129
|
+
Default value: True.
|
|
130
|
+
timeline_limit (int, optional): Set the maximum storage size of the timeline file (unit M). When using this
|
|
131
|
+
parameter, `op_time` must be set to True. Default value: 500.
|
|
75
132
|
|
|
76
133
|
Raises:
|
|
77
134
|
RuntimeError: When the version of CANN does not match the version of MindSpore,
|
|
@@ -131,15 +188,11 @@ class Profiler:
|
|
|
131
188
|
_ascend_job_id = ""
|
|
132
189
|
|
|
133
190
|
def __init__(self, **kwargs):
|
|
134
|
-
if Profiler._has_initialized:
|
|
135
|
-
msg = "Do not init twice in the profiler."
|
|
136
|
-
raise RuntimeError(msg)
|
|
137
|
-
Profiler._has_initialized = True
|
|
138
|
-
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
139
191
|
self._dev_id = None
|
|
140
192
|
self._cpu_profiler = None
|
|
141
193
|
self._gpu_profiler = None
|
|
142
194
|
self._md_profiler = None
|
|
195
|
+
self._is_heterogeneous = False
|
|
143
196
|
self._profiler_manager = None
|
|
144
197
|
self._timeline_meta = []
|
|
145
198
|
self._init_time = None
|
|
@@ -147,21 +200,40 @@ class Profiler:
|
|
|
147
200
|
self._job_id_env = None
|
|
148
201
|
self._filt_optype_names = ''
|
|
149
202
|
self._output_path = ''
|
|
150
|
-
self._rank_size =
|
|
203
|
+
self._rank_size = 1
|
|
204
|
+
self._rank_id = 0
|
|
151
205
|
self._ascend_profiler = None
|
|
206
|
+
self._timeline_size_limit_byte = 500 * 1024 * 1024 # 500MB
|
|
207
|
+
self._parallel_strategy = True
|
|
152
208
|
_environment_check()
|
|
153
|
-
#
|
|
154
|
-
self.
|
|
155
|
-
self.
|
|
209
|
+
# default aicore_metrics type is ArithmeticUtilization
|
|
210
|
+
self._aicore_metrics_id = 0
|
|
211
|
+
self._l2_cache = "off"
|
|
212
|
+
self._data_process = True
|
|
213
|
+
self._op_time = True
|
|
156
214
|
self._profile_communication = False
|
|
157
215
|
self._has_started = False
|
|
158
216
|
self._has_started_twice = False
|
|
159
217
|
self.start_profile = True
|
|
160
218
|
self._profile_memory = False
|
|
219
|
+
self._sync_enable = True
|
|
161
220
|
self._stop_time = 0
|
|
162
221
|
self._dynamic_status = False
|
|
222
|
+
self._msprof_enable = os.getenv("PROFILER_SAMPLECONFIG")
|
|
223
|
+
if self._msprof_enable:
|
|
224
|
+
return
|
|
225
|
+
if kwargs.get("env_enable"):
|
|
226
|
+
self._profiler_init(kwargs)
|
|
227
|
+
return
|
|
228
|
+
if Profiler._has_initialized:
|
|
229
|
+
msg = "Do not init twice in the profiler."
|
|
230
|
+
raise RuntimeError(msg)
|
|
231
|
+
Profiler._has_initialized = True
|
|
232
|
+
# get device_id and device_target
|
|
233
|
+
self._get_devid_rankid_and_devtarget()
|
|
234
|
+
self._parser_kwargs(kwargs)
|
|
235
|
+
self._get_output_path(kwargs)
|
|
163
236
|
self._decide_device_target(kwargs)
|
|
164
|
-
self._pynative_profiler = None
|
|
165
237
|
if self.start_profile:
|
|
166
238
|
self.start()
|
|
167
239
|
|
|
@@ -179,7 +251,7 @@ class Profiler:
|
|
|
179
251
|
return output_path
|
|
180
252
|
|
|
181
253
|
@staticmethod
|
|
182
|
-
def
|
|
254
|
+
def _parse_start_log(input_file):
|
|
183
255
|
"""
|
|
184
256
|
Parse host start log file, get the start time of the job.
|
|
185
257
|
|
|
@@ -190,12 +262,9 @@ class Profiler:
|
|
|
190
262
|
str, job start time.
|
|
191
263
|
"""
|
|
192
264
|
|
|
193
|
-
job_start_time =
|
|
265
|
+
job_start_time = 0
|
|
194
266
|
with open(input_file) as f:
|
|
195
|
-
|
|
196
|
-
if "clock_realtime" in line:
|
|
197
|
-
# 16 means the first digit of the timestamp, len(line)-3 means the last.
|
|
198
|
-
job_start_time = line[16:len(line) - 3]
|
|
267
|
+
job_start_time = json.load(f).get("collectionTimeBegin")
|
|
199
268
|
|
|
200
269
|
return job_start_time
|
|
201
270
|
|
|
@@ -212,7 +281,7 @@ class Profiler:
|
|
|
212
281
|
Raises:
|
|
213
282
|
TypeError: If the op_name parameter type is incorrect.
|
|
214
283
|
TypeError: If the device_id parameter type is incorrect.
|
|
215
|
-
|
|
284
|
+
RuntimeError: If MindSpore runs on Ascend, this interface cannot be used.
|
|
216
285
|
|
|
217
286
|
Supported Platforms:
|
|
218
287
|
``GPU`` ``CPU``
|
|
@@ -267,12 +336,22 @@ class Profiler:
|
|
|
267
336
|
"""
|
|
268
337
|
Collect and analyze training performance data, support calls during and after training. The example shows above.
|
|
269
338
|
"""
|
|
339
|
+
if self._msprof_enable:
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
# Stop data collection after all operators are executed.
|
|
343
|
+
_pynative_executor.sync()
|
|
344
|
+
|
|
270
345
|
Profiler._has_initialized = False
|
|
271
346
|
self._dynamic_status = self._profiler_manager.dynamic_status()
|
|
272
347
|
_environment_check()
|
|
273
348
|
|
|
274
349
|
self._cpu_profiler.stop()
|
|
275
350
|
|
|
351
|
+
cpu_op_file = glob.glob(os.path.join(self._output_path, 'cpu_op_type_info_*'))
|
|
352
|
+
if self._device_target and self._device_target != DeviceTarget.CPU.value and cpu_op_file:
|
|
353
|
+
self._is_heterogeneous = True
|
|
354
|
+
ProfilerInfo.set_analyse_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
276
355
|
if self._device_target and self._device_target == DeviceTarget.CPU.value:
|
|
277
356
|
self._cpu_analyse()
|
|
278
357
|
|
|
@@ -282,6 +361,11 @@ class Profiler:
|
|
|
282
361
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
283
362
|
self._ascend_analyse()
|
|
284
363
|
logger.info("Profiling: all the data have been analyzed.")
|
|
364
|
+
self._init_profiler_info()
|
|
365
|
+
ProfilerInfo.set_analyse_end_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
366
|
+
ProfilerInfo.set_rank_size(self._rank_size)
|
|
367
|
+
ProfilerInfo.set_heterogeneous(self._is_heterogeneous)
|
|
368
|
+
ProfilerInfo.save(self._output_path)
|
|
285
369
|
|
|
286
370
|
def start(self):
|
|
287
371
|
"""
|
|
@@ -315,10 +399,10 @@ class Profiler:
|
|
|
315
399
|
>>> def end(self, run_context):
|
|
316
400
|
... self.profiler.analyse()
|
|
317
401
|
"""
|
|
318
|
-
if
|
|
319
|
-
|
|
402
|
+
if self._msprof_enable:
|
|
403
|
+
return
|
|
320
404
|
|
|
321
|
-
self._start_time = int(time.time() *
|
|
405
|
+
self._start_time = int(time.time() * 1000000)
|
|
322
406
|
logger.info("Profiling: start time: %d", self._start_time)
|
|
323
407
|
|
|
324
408
|
if not self._has_started:
|
|
@@ -339,14 +423,16 @@ class Profiler:
|
|
|
339
423
|
self._cpu_profiler.step_profiling_enable(True)
|
|
340
424
|
|
|
341
425
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
342
|
-
self.
|
|
343
|
-
|
|
426
|
+
if self._data_process:
|
|
427
|
+
self._md_profiler.start()
|
|
428
|
+
self._gpu_profiler.data_process_enable(True)
|
|
429
|
+
if self._op_time:
|
|
430
|
+
self._gpu_profiler.step_profiling_enable(True)
|
|
344
431
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
345
|
-
self.
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
self._ascend_graph_start()
|
|
432
|
+
if self._data_process:
|
|
433
|
+
self._md_profiler.start()
|
|
434
|
+
self._ascend_graph_start()
|
|
435
|
+
ProfilerInfo.set_profiling_start_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
350
436
|
|
|
351
437
|
def stop(self):
|
|
352
438
|
"""
|
|
@@ -378,6 +464,9 @@ class Profiler:
|
|
|
378
464
|
>>> def end(self, run_context):
|
|
379
465
|
... self.profiler.analyse()
|
|
380
466
|
"""
|
|
467
|
+
if self._msprof_enable:
|
|
468
|
+
return
|
|
469
|
+
|
|
381
470
|
if self._has_started:
|
|
382
471
|
self._has_started = False
|
|
383
472
|
else:
|
|
@@ -388,18 +477,53 @@ class Profiler:
|
|
|
388
477
|
if self._is_offline_parser():
|
|
389
478
|
return
|
|
390
479
|
|
|
391
|
-
|
|
392
|
-
|
|
480
|
+
# Stop data collection after all operators are executed.
|
|
481
|
+
_pynative_executor.sync()
|
|
482
|
+
|
|
483
|
+
if self._data_process:
|
|
484
|
+
self._md_profiler.stop()
|
|
485
|
+
self._md_profiler.save(self._output_path)
|
|
393
486
|
|
|
394
487
|
if self._device_target and self._device_target == DeviceTarget.GPU.value:
|
|
395
488
|
self._gpu_profiler.stop()
|
|
396
489
|
elif self._device_target and self._device_target == DeviceTarget.ASCEND.value:
|
|
397
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
398
|
-
self._pynative_profiler.stop()
|
|
399
490
|
self._ascend_profiler.stop()
|
|
400
491
|
|
|
401
492
|
self._stop_time = int(time.time() * 10000000)
|
|
402
|
-
|
|
493
|
+
ProfilerInfo.set_profiling_stop_time(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
|
|
494
|
+
logger.info("Profiling: stop time: %d", self._stop_time)
|
|
495
|
+
|
|
496
|
+
def _profiler_init(self, kwargs):
|
|
497
|
+
"""Initialize variables when profiler is enabled by environment variables."""
|
|
498
|
+
options = kwargs.get("env_enable")
|
|
499
|
+
self._has_started = True
|
|
500
|
+
self._start_time = options.get("start_time")
|
|
501
|
+
self._output_path = options.get('file_output_path')
|
|
502
|
+
self._profile_memory = options.get('profile_memory')
|
|
503
|
+
self._parallel_strategy = options.get('parallel_strategy')
|
|
504
|
+
self._timeline_size_limit_byte = options.get('timeline_limit') * 1024 * 1024
|
|
505
|
+
self._data_process = options.get('data_process')
|
|
506
|
+
self._profile_communication = options.get('profile_communication')
|
|
507
|
+
self._op_time = options.get('op_time')
|
|
508
|
+
self._device_target = context.get_context("device_target").lower()
|
|
509
|
+
self._profiler_manager = c_expression.ProfilerManager.get_instance()
|
|
510
|
+
self._cpu_profiler = c_expression.Profiler.get_instance("CPU")
|
|
511
|
+
if self._data_process:
|
|
512
|
+
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
513
|
+
if self._device_target == DeviceTarget.GPU.value:
|
|
514
|
+
self._gpu_profiler = c_expression.Profiler.get_instance("GPU")
|
|
515
|
+
|
|
516
|
+
if self._device_target == DeviceTarget.ASCEND.value:
|
|
517
|
+
self._ascend_profiler = c_expression.Profiler.get_instance("Ascend")
|
|
518
|
+
self._get_devid_rankid_and_devtarget()
|
|
519
|
+
|
|
520
|
+
def _init_profiler_info(self):
|
|
521
|
+
"""Init profiler info filer."""
|
|
522
|
+
mode = "graph"
|
|
523
|
+
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
524
|
+
mode = "pynative"
|
|
525
|
+
store_id = self._dev_id if self._device_target == DeviceTarget.GPU.value else self._rank_id
|
|
526
|
+
ProfilerInfo.init_info(mode, store_id)
|
|
403
527
|
|
|
404
528
|
def _decide_device_target(self, kwargs):
|
|
405
529
|
"""Complete Profiler initialization according to device_target"""
|
|
@@ -421,9 +545,6 @@ class Profiler:
|
|
|
421
545
|
|
|
422
546
|
def _cpu_profiler_init(self, kwargs):
|
|
423
547
|
"""Cpu profiler init."""
|
|
424
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
425
|
-
raise RuntimeError("Pynative mode is not supported on CPU currently.")
|
|
426
|
-
|
|
427
548
|
self.start_profile = kwargs.pop("start_profile", True)
|
|
428
549
|
if not isinstance(self.start_profile, bool):
|
|
429
550
|
raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
|
|
@@ -432,24 +553,26 @@ class Profiler:
|
|
|
432
553
|
def _gpu_profiler_init(self, kwargs):
|
|
433
554
|
"""Gpu profiler init."""
|
|
434
555
|
# Setup and start MindData Profiling
|
|
435
|
-
self.
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
raise RuntimeError("Pynative mode is not supported on GPU currently.")
|
|
556
|
+
if self._data_process:
|
|
557
|
+
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
558
|
+
self._md_profiler.init()
|
|
439
559
|
self._parse_parameter_for_gpu(kwargs)
|
|
440
560
|
|
|
441
561
|
gpu_profiler = c_expression.Profiler
|
|
442
562
|
self._gpu_profiler = gpu_profiler.get_instance("GPU")
|
|
443
563
|
self._gpu_profiler.init(self._output_path)
|
|
564
|
+
self._gpu_profiler.sync_enable(self._sync_enable)
|
|
444
565
|
if GlobalComm.WORLD_COMM_GROUP == "nccl_world_group":
|
|
445
566
|
self._dev_id = str(get_rank())
|
|
446
567
|
os.environ['DEVICE_ID'] = self._dev_id
|
|
568
|
+
self._rank_id = self._dev_id
|
|
447
569
|
|
|
448
570
|
def _ascend_profiler_init(self, kwargs):
|
|
449
571
|
"""Ascend profiler init."""
|
|
450
572
|
# Setup and start MindData Profiling
|
|
451
|
-
self.
|
|
452
|
-
|
|
573
|
+
if self._data_process:
|
|
574
|
+
self._md_profiler = cde.GlobalContext.profiling_manager()
|
|
575
|
+
self._md_profiler.init()
|
|
453
576
|
self._init_time = int(time.time() * 10000000)
|
|
454
577
|
logger.info("Profiling: profiling init time: %d", self._init_time)
|
|
455
578
|
self._parse_parameter_for_ascend(kwargs)
|
|
@@ -476,13 +599,6 @@ class Profiler:
|
|
|
476
599
|
"""
|
|
477
600
|
Construct profiling options to determine which profiling data should be collected.
|
|
478
601
|
"""
|
|
479
|
-
profile_memory = "off"
|
|
480
|
-
if self._profile_memory:
|
|
481
|
-
profile_memory = "on"
|
|
482
|
-
profiler_communication = "off"
|
|
483
|
-
if self._profile_communication:
|
|
484
|
-
profiler_communication = "on"
|
|
485
|
-
|
|
486
602
|
fp_point = os.environ.get("PROFILING_FP_START", "")
|
|
487
603
|
bp_point = os.environ.get("PROFILING_BP_END", "")
|
|
488
604
|
|
|
@@ -490,41 +606,35 @@ class Profiler:
|
|
|
490
606
|
"output": self._output_path,
|
|
491
607
|
"fp_point": fp_point,
|
|
492
608
|
"bp_point": bp_point,
|
|
493
|
-
"training_trace": "on",
|
|
494
|
-
"task_trace": "on",
|
|
495
|
-
"aic_metrics": "ArithmeticUtilization",
|
|
496
|
-
"aicpu": "on",
|
|
497
|
-
"profile_memory":
|
|
498
|
-
"hccl":
|
|
499
|
-
"
|
|
609
|
+
"training_trace": "on" if self._op_time else "off",
|
|
610
|
+
"task_trace": "on" if self._op_time else "off",
|
|
611
|
+
"aic_metrics": AICORE_METRICS_DICT.get(self._aicore_metrics_id, "ArithmeticUtilization"),
|
|
612
|
+
"aicpu": "on" if self._data_process or self._op_time else "off",
|
|
613
|
+
"profile_memory": "on" if self._op_time and self._profile_memory else "off",
|
|
614
|
+
"hccl": "on" if self._op_time and self._profile_communication else "off",
|
|
615
|
+
"l2_cache": self._l2_cache,
|
|
616
|
+
"parallel_strategy": "on" if self._parallel_strategy else "off",
|
|
617
|
+
"op_time": "on" if self._op_time else "off"
|
|
500
618
|
}
|
|
501
619
|
|
|
502
620
|
return profiling_options
|
|
503
621
|
|
|
504
622
|
def _parse_parameter_for_gpu(self, kwargs):
|
|
505
623
|
"""Parse parameter in Proflier when the device target is GPU."""
|
|
506
|
-
|
|
507
624
|
self.start_profile = kwargs.pop("start_profile", True)
|
|
508
625
|
if not isinstance(self.start_profile, bool):
|
|
509
626
|
raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
|
|
510
627
|
f"but got type {type(self.start_profile)}")
|
|
511
628
|
|
|
512
|
-
self.
|
|
513
|
-
if not isinstance(self.
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
if self._profile_communication:
|
|
517
|
-
raise RuntimeError(f"The parameter profile_communication is not supported on GPU currently.")
|
|
518
|
-
|
|
519
|
-
self._profile_memory = kwargs.pop("profile_memory", False)
|
|
520
|
-
if not isinstance(self._profile_memory, bool):
|
|
521
|
-
raise TypeError(f"For '{self.__class__.__name__}', the parameter _profile_memory must be bool, "
|
|
522
|
-
f"but got type {type(self._profile_memory)}")
|
|
523
|
-
if self._profile_memory:
|
|
524
|
-
raise RuntimeError(f"The parameter profile_memory is not supported on GPU currently.")
|
|
629
|
+
self._sync_enable = kwargs.pop("sync_enable", True)
|
|
630
|
+
if not isinstance(self._sync_enable, bool):
|
|
631
|
+
logger.warning("The parameter sync_enable is an invalid value, it will be set to True.")
|
|
632
|
+
self._sync_enable = True
|
|
525
633
|
|
|
526
634
|
def _parse_parameter_for_ascend(self, kwargs):
|
|
527
635
|
"""Parse parameter in Proflier when the device target is Ascend."""
|
|
636
|
+
ascend_job_id = kwargs.pop("ascend_job_id", "")
|
|
637
|
+
self._set_ascend_job_id(ascend_job_id)
|
|
528
638
|
self.start_profile = kwargs.pop("start_profile", True)
|
|
529
639
|
if not isinstance(self.start_profile, bool):
|
|
530
640
|
raise TypeError(f"For '{self.__class__.__name__}', the parameter start_profile must be bool, "
|
|
@@ -532,8 +642,10 @@ class Profiler:
|
|
|
532
642
|
|
|
533
643
|
self._profile_communication = kwargs.pop("profile_communication", False)
|
|
534
644
|
if not isinstance(self._profile_communication, bool):
|
|
535
|
-
|
|
536
|
-
|
|
645
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_communication must be bool, "
|
|
646
|
+
f"but got type {type(self._profile_communication)}, it will be set to False.")
|
|
647
|
+
self._profile_communication = False
|
|
648
|
+
|
|
537
649
|
if self._profile_communication:
|
|
538
650
|
hccl_option = {"output": self._output_path, "task_trace": "on"}
|
|
539
651
|
os.environ['PROFILING_OPTIONS'] = json.dumps(hccl_option)
|
|
@@ -543,10 +655,36 @@ class Profiler:
|
|
|
543
655
|
|
|
544
656
|
self._profile_memory = kwargs.pop("profile_memory", False)
|
|
545
657
|
if not isinstance(self._profile_memory, bool):
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
658
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter profile_memory must be bool, "
|
|
659
|
+
f"but got type {type(self._profile_memory)}, it will be set to False.")
|
|
660
|
+
self._profile_memory = False
|
|
661
|
+
|
|
662
|
+
self._aicore_metrics_id = kwargs.pop("aicore_metrics", 0)
|
|
663
|
+
if not isinstance(self._aicore_metrics_id, int):
|
|
664
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be int, "
|
|
665
|
+
f"but got type {type(self._aicore_metrics_id)}, it will be set to 0.")
|
|
666
|
+
self._aicore_metrics_id = 0
|
|
667
|
+
|
|
668
|
+
if self._aicore_metrics_id not in AICORE_METRICS_DICT:
|
|
669
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter aicore_metrics must be in "
|
|
670
|
+
f"[-1, 0, 1, 2, 3, 4, 5], but got {self._aicore_metrics_id}, it will be set to 0.")
|
|
671
|
+
self._aicore_metrics_id = 0
|
|
672
|
+
|
|
673
|
+
l2_cache_enable = kwargs.pop("l2_cache", False)
|
|
674
|
+
if not isinstance(l2_cache_enable, bool):
|
|
675
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter l2_cache must be bool, "
|
|
676
|
+
f"but got type {type(l2_cache_enable)}, it will be set to False.")
|
|
677
|
+
l2_cache_enable = False
|
|
678
|
+
if l2_cache_enable:
|
|
679
|
+
self._l2_cache = "on"
|
|
680
|
+
else:
|
|
681
|
+
self._l2_cache = "off"
|
|
682
|
+
|
|
683
|
+
self._parallel_strategy = kwargs.pop("parallel_strategy", True)
|
|
684
|
+
if not isinstance(self._parallel_strategy, bool):
|
|
685
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter parallel_strategy must be bool, "
|
|
686
|
+
f"but got type {type(self._parallel_strategy)}, it will be set to True.")
|
|
687
|
+
self._parallel_strategy = True
|
|
550
688
|
|
|
551
689
|
task_sink = os.getenv("GRAPH_OP_RUN")
|
|
552
690
|
if task_sink and task_sink == "1":
|
|
@@ -555,6 +693,8 @@ class Profiler:
|
|
|
555
693
|
|
|
556
694
|
def _set_ascend_job_id(self, ascend_job_id):
|
|
557
695
|
"""Set output_path for offline parsing performance data."""
|
|
696
|
+
if not ascend_job_id:
|
|
697
|
+
return
|
|
558
698
|
self._ascend_job_id = validate_and_normalize_path(ascend_job_id)
|
|
559
699
|
if not os.path.exists(self._ascend_job_id):
|
|
560
700
|
msg = f"Invalid ascend_job_id: {self._ascend_job_id}, Please pass the absolute path of the JOB dir"
|
|
@@ -599,34 +739,62 @@ class Profiler:
|
|
|
599
739
|
|
|
600
740
|
if GlobalComm.INITED:
|
|
601
741
|
self._rank_size = get_group_size()
|
|
742
|
+
else:
|
|
743
|
+
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
602
744
|
|
|
603
745
|
if self._has_started:
|
|
604
746
|
self.stop()
|
|
605
747
|
else:
|
|
606
748
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
749
|
+
self._ascend_graph_analyse()
|
|
607
750
|
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
self.
|
|
751
|
+
def _ascend_timeline_analyse(self, aicpu_data_parser, optime_parser, source_path):
|
|
752
|
+
"""Analyse timeline info."""
|
|
753
|
+
try:
|
|
754
|
+
self._analyse_timeline(aicpu_data_parser, optime_parser, source_path)
|
|
755
|
+
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
756
|
+
logger.warning('Fail to write timeline data: %s', err)
|
|
757
|
+
finally:
|
|
758
|
+
pass
|
|
612
759
|
|
|
613
|
-
|
|
760
|
+
def _ascend_step_trace_analyse(self, source_path, framework_parser):
|
|
761
|
+
"""Analyse step trace info."""
|
|
762
|
+
points, is_training_mode_flag = None, False
|
|
614
763
|
try:
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
620
|
-
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
621
|
-
else:
|
|
622
|
-
logger.warning("MSAdvisor running failed. %s", err)
|
|
764
|
+
if self._is_support_step_info_collect() and not self._dynamic_status:
|
|
765
|
+
points, is_training_mode_flag = self._analyse_step_trace(source_path, framework_parser)
|
|
766
|
+
except ProfilerException as err:
|
|
767
|
+
logger.warning(err.message)
|
|
623
768
|
finally:
|
|
624
769
|
pass
|
|
770
|
+
return points, is_training_mode_flag
|
|
771
|
+
|
|
772
|
+
def _ascend_dynamic_net_analyse(self):
|
|
773
|
+
"""Analyse dynamic shape network info."""
|
|
774
|
+
if self._profile_communication:
|
|
775
|
+
logger.warning(
|
|
776
|
+
"The profile_communication parameter cannot be set on the dynamic shape network.")
|
|
777
|
+
if self._profile_memory:
|
|
778
|
+
logger.warning("The profile_memory parameter cannot be set on the dynamic shape network.")
|
|
779
|
+
logger.warning(
|
|
780
|
+
"[Profiler]Dynamic Shape network does not support collecting step trace performance data currently.")
|
|
781
|
+
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id)
|
|
782
|
+
dynamic_parser.write_dynamic_shape_data()
|
|
783
|
+
|
|
784
|
+
def _ascend_flops_analyse(self, source_path, op_task_dict, is_training_mode_flag):
|
|
785
|
+
"""Get op FLOPs from aicore.data.x.slice.0 file, and compute FLOPS, write output_op_flops_x.txt."""
|
|
786
|
+
flops_parser = FlopsParser(source_path, self._output_path, op_task_dict, self._dev_id, self._rank_id,
|
|
787
|
+
is_training_mode_flag)
|
|
788
|
+
logger.info("Profiling: analyzing the operation FLOPs.")
|
|
789
|
+
flops_parser.execute()
|
|
625
790
|
|
|
626
791
|
def _ascend_graph_memory_analyse(self, points):
|
|
627
792
|
"""Analyse memory usage info."""
|
|
628
793
|
if not self._profile_memory:
|
|
629
794
|
return
|
|
795
|
+
if self._profile_memory and context.get_context("mode") == context.PYNATIVE_MODE:
|
|
796
|
+
logger.warning("[Profiler]The parameter profile_memory is not supported on Ascend "
|
|
797
|
+
"PyNative mode currently.")
|
|
630
798
|
try:
|
|
631
799
|
logger.info("Profiling: analyzing the memory usage info.")
|
|
632
800
|
self._analyse_memory_usage(points)
|
|
@@ -639,6 +807,9 @@ class Profiler:
|
|
|
639
807
|
"""Analyse hccl profiler info."""
|
|
640
808
|
if not self._profile_communication:
|
|
641
809
|
return
|
|
810
|
+
if self._profile_communication and context.get_context("mode") == context.PYNATIVE_MODE:
|
|
811
|
+
logger.warning("[Profiler]The parameter profile_communication is not supported on Ascend "
|
|
812
|
+
"PyNative mode currently.")
|
|
642
813
|
try:
|
|
643
814
|
logger.info("Profiling: analyzing the hccl profiler info.")
|
|
644
815
|
self._analyse_hccl_info()
|
|
@@ -647,6 +818,26 @@ class Profiler:
|
|
|
647
818
|
finally:
|
|
648
819
|
pass
|
|
649
820
|
|
|
821
|
+
def _ascend_graph_msadvisor_analyse(self, job_id):
|
|
822
|
+
"""Call MSAdvisor function."""
|
|
823
|
+
logger.info("MSAdvisor starts running.")
|
|
824
|
+
msadvisor = Msadvisor(job_id, self._rank_id, self._output_path)
|
|
825
|
+
try:
|
|
826
|
+
msadvisor.analyse()
|
|
827
|
+
except FileNotFoundError as err:
|
|
828
|
+
logger.warning("MSAdvisor: command not found,"
|
|
829
|
+
"please check if installed ascend-toolkit and set environment path correctly. %s", err)
|
|
830
|
+
except OSError as err:
|
|
831
|
+
logger.warning("Cannot execute binary file: Exec format error. %s", err)
|
|
832
|
+
except subprocess.CalledProcessError:
|
|
833
|
+
logger.warning("MSAdvisor running failed, please check MSAdvisor running log.")
|
|
834
|
+
except (ValueError, ProfilerFileNotFoundException) as err:
|
|
835
|
+
logger.warning("MSAdvisor running failed. %s", err)
|
|
836
|
+
finally:
|
|
837
|
+
pass
|
|
838
|
+
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
839
|
+
logger.warning("Pynative mode does not support MSAdvisor analyzer currently.")
|
|
840
|
+
|
|
650
841
|
def _ascend_graph_op_analyse(self, source_path):
|
|
651
842
|
"""
|
|
652
843
|
Ascend graph model hwts analyse.
|
|
@@ -690,29 +881,37 @@ class Profiler:
|
|
|
690
881
|
logger.info("Profiling: analyzing the data preprocess data.")
|
|
691
882
|
aicpu_data_parser.execute()
|
|
692
883
|
|
|
884
|
+
# analyse op compute time info
|
|
885
|
+
try:
|
|
886
|
+
self._analyser_op_info()
|
|
887
|
+
except ProfilerException as err:
|
|
888
|
+
logger.warning(err.message)
|
|
889
|
+
finally:
|
|
890
|
+
pass
|
|
693
891
|
return [framework_parser, aicpu_data_parser, optime_parser, op_task_dict]
|
|
694
892
|
|
|
695
|
-
def
|
|
893
|
+
def _minddata_analyse(self, source_path):
|
|
696
894
|
"""Analyse mindadata for ascend graph model."""
|
|
895
|
+
if not self._data_process:
|
|
896
|
+
return
|
|
897
|
+
store_id = self._rank_id if self._device_target == DeviceTarget.ASCEND.value else self._dev_id
|
|
697
898
|
# Parsing minddata AICPU profiling
|
|
698
|
-
|
|
699
|
-
|
|
899
|
+
if self._device_target == DeviceTarget.ASCEND.value:
|
|
900
|
+
logger.info("Profiling: analyzing the minddata AICPU data.")
|
|
901
|
+
MinddataParser.execute(source_path, self._output_path, store_id)
|
|
700
902
|
|
|
701
903
|
# parse minddata pipeline operator and queue
|
|
702
904
|
try:
|
|
703
|
-
|
|
704
|
-
logger.info("Profiling: analyzing the minddata pipeline operator and queue.")
|
|
705
|
-
pipeline_parser.parse()
|
|
905
|
+
MinddataPipelineParser(self._output_path, store_id, self._output_path).parse()
|
|
706
906
|
except ProfilerException as err:
|
|
707
907
|
logger.warning(err.message)
|
|
708
908
|
finally:
|
|
709
909
|
pass
|
|
710
910
|
|
|
711
911
|
# Analyze minddata information
|
|
912
|
+
logger.info("Profiling: analyzing the minddata information.")
|
|
712
913
|
try:
|
|
713
|
-
|
|
714
|
-
logger.info("Profiling: analyzing the minddata information.")
|
|
715
|
-
md_analyzer.analyze()
|
|
914
|
+
MinddataProfilingAnalyzer(self._output_path, store_id, self._output_path).analyze()
|
|
716
915
|
except ProfilerException as err:
|
|
717
916
|
logger.warning(err.message)
|
|
718
917
|
finally:
|
|
@@ -727,63 +926,18 @@ class Profiler:
|
|
|
727
926
|
|
|
728
927
|
self._check_output_path(output_path=self._output_path)
|
|
729
928
|
source_path = os.path.join(self._output_path, job_id)
|
|
730
|
-
|
|
731
|
-
self.
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
self.
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
if self._dynamic_status and self._profile_communication:
|
|
743
|
-
raise RuntimeError("The profile_communication parameter cannot be set on the dynamic shape network.")
|
|
744
|
-
if self._dynamic_status and self._profile_memory:
|
|
745
|
-
raise RuntimeError("The profile_memory parameter cannot be set on the dynamic shape network.")
|
|
746
|
-
|
|
747
|
-
# analyse step trace info
|
|
748
|
-
points = None
|
|
749
|
-
is_training_mode_flag = False
|
|
750
|
-
|
|
751
|
-
# analyse timeline info
|
|
752
|
-
try:
|
|
753
|
-
logger.info("Profiling: analyzing the timeline data.")
|
|
754
|
-
self._analyse_timeline(aicpu_data_parser, optime_parser, source_path)
|
|
755
|
-
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
756
|
-
logger.warning('Fail to write timeline data: %s', err)
|
|
757
|
-
finally:
|
|
758
|
-
pass
|
|
759
|
-
|
|
760
|
-
# get op FLOPs from aicore.data.x.slice.0 file, and compute FLOPS, write output_op_flops_x.txt
|
|
761
|
-
if not self._dynamic_status:
|
|
762
|
-
try:
|
|
763
|
-
logger.info("Profiling: analyzing the step trace data.")
|
|
764
|
-
points, is_training_mode_flag = self._analyse_step_trace(source_path, framework_parser)
|
|
765
|
-
except ProfilerException as err:
|
|
766
|
-
logger.warning(err.message)
|
|
767
|
-
finally:
|
|
768
|
-
pass
|
|
769
|
-
|
|
770
|
-
flops_parser = FlopsParser(source_path, self._output_path, op_task_dict,
|
|
771
|
-
self._dev_id, self._rank_id, is_training_mode_flag)
|
|
772
|
-
logger.info("Profiling: analyzing the operation FLOPs.")
|
|
773
|
-
flops_parser.execute()
|
|
774
|
-
else:
|
|
775
|
-
dynamic_parser = DynamicFrameWorkParser(self._output_path, self._rank_id)
|
|
776
|
-
dynamic_parser.write_dynamic_shape_data()
|
|
777
|
-
|
|
778
|
-
self._ascend_graph_memory_analyse(points)
|
|
779
|
-
self._ascend_graph_hccl_analyse()
|
|
780
|
-
|
|
781
|
-
def _ascend_pynative_start(self):
|
|
782
|
-
"""Ascend pynative mode start profiling."""
|
|
783
|
-
pynative_profiler = c_expression.Profiler
|
|
784
|
-
self._pynative_profiler = pynative_profiler.get_instance("PyNative")
|
|
785
|
-
self._pynative_profiler.init(self._output_path)
|
|
786
|
-
self._ascend_profiler.start()
|
|
929
|
+
self._minddata_analyse(source_path)
|
|
930
|
+
if self._op_time:
|
|
931
|
+
framework_parser, aicpu_data_parser, optime_parser, op_task_dict = self._ascend_graph_op_analyse(
|
|
932
|
+
source_path)
|
|
933
|
+
points, is_training_mode_flag = self._ascend_step_trace_analyse(source_path, framework_parser)
|
|
934
|
+
self._ascend_timeline_analyse(aicpu_data_parser, optime_parser, source_path)
|
|
935
|
+
if self._dynamic_status:
|
|
936
|
+
self._ascend_dynamic_net_analyse()
|
|
937
|
+
self._ascend_flops_analyse(source_path, op_task_dict, is_training_mode_flag)
|
|
938
|
+
self._ascend_graph_memory_analyse(points)
|
|
939
|
+
self._ascend_graph_hccl_analyse()
|
|
940
|
+
self._ascend_graph_msadvisor_analyse(job_id)
|
|
787
941
|
|
|
788
942
|
def _ascend_graph_start(self):
|
|
789
943
|
"""Ascend graph mode start profiling."""
|
|
@@ -798,49 +952,58 @@ class Profiler:
|
|
|
798
952
|
|
|
799
953
|
if GlobalComm.INITED:
|
|
800
954
|
self._rank_size = get_group_size()
|
|
955
|
+
else:
|
|
956
|
+
self._rank_size = int(os.getenv('RANK_SIZE', '1'))
|
|
801
957
|
|
|
802
958
|
if self._has_started:
|
|
803
959
|
self.stop()
|
|
804
960
|
else:
|
|
805
961
|
logger.info("No need to stop profiler because profiler has been stopped.")
|
|
806
962
|
|
|
807
|
-
|
|
808
|
-
timeline_generator = self._generate_timeline(reduce_op_type)
|
|
809
|
-
|
|
810
|
-
# parse minddata pipeline operator and queue for GPU
|
|
811
|
-
try:
|
|
812
|
-
pipeline_parser = MinddataPipelineParser(self._output_path, self._dev_id, self._output_path)
|
|
813
|
-
logger.info("Profiling: analyzing the minddata pipeline operator and queue for GPU.")
|
|
814
|
-
pipeline_parser.parse()
|
|
815
|
-
except ProfilerException as err:
|
|
816
|
-
logger.warning(err.message)
|
|
963
|
+
self._minddata_analyse(self._output_path)
|
|
817
964
|
|
|
818
|
-
# Analyze minddata information
|
|
819
965
|
try:
|
|
820
|
-
|
|
821
|
-
logger.info("Profiling: analyzing the minddata information.")
|
|
822
|
-
md_analyzer.analyze()
|
|
966
|
+
self._analyse_step_relation_info()
|
|
823
967
|
except ProfilerException as err:
|
|
824
968
|
logger.warning(err.message)
|
|
969
|
+
finally:
|
|
970
|
+
pass
|
|
825
971
|
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
972
|
+
def _is_support_step_info_collect(self, analyse_step_trace=True):
|
|
973
|
+
"""Whether iteration related information needs to be parsed."""
|
|
974
|
+
profiler_info = ProfilerInfo.get_profiler_info()
|
|
975
|
+
graph_ids = profiler_info.get("graph_ids")
|
|
976
|
+
if len(graph_ids) > 1:
|
|
977
|
+
analyse_step_trace = False
|
|
978
|
+
logger.warning(
|
|
979
|
+
"[Profiler]Current model has multiple sub graphs, the segmentation of steps may be inaccurate.")
|
|
980
|
+
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
981
|
+
analyse_step_trace = False
|
|
982
|
+
logger.warning(
|
|
983
|
+
"[Profiler]Pynative mode does not support collecting step trace performance data currently.")
|
|
984
|
+
if self._is_heterogeneous:
|
|
985
|
+
analyse_step_trace = False
|
|
986
|
+
logger.warning(
|
|
987
|
+
"[Profiler]Profiler does not support collecting step trace performance data for heterogeneous "
|
|
988
|
+
"scenarios currently.")
|
|
989
|
+
return analyse_step_trace
|
|
990
|
+
|
|
991
|
+
def _analyse_step_relation_info(self):
|
|
992
|
+
"""Parse iteration related information."""
|
|
993
|
+
if not self._op_time:
|
|
994
|
+
return
|
|
995
|
+
reduce_op_type = self._get_step_reduce_op_type()
|
|
996
|
+
timeline_generator = self._generate_timeline(reduce_op_type)
|
|
997
|
+
parser = GpuFrameWorkParser(self._output_path, self._dev_id)
|
|
998
|
+
graph_ids = parser.get_graph_ids()
|
|
999
|
+
ProfilerInfo.set_graph_ids(graph_ids)
|
|
1000
|
+
if self._is_support_step_info_collect():
|
|
829
1001
|
self._analyse_step_trace(
|
|
830
1002
|
is_training_mode_flag=timeline_generator.check_op_name('Gradients'),
|
|
831
1003
|
is_gpu_kernel_async_launch_flag=timeline_generator.is_gpu_kernel_async_launch()
|
|
832
1004
|
)
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
finally:
|
|
836
|
-
pass
|
|
837
|
-
if self._dynamic_status:
|
|
838
|
-
parser = GpuFrameWorkParser(self._output_path, self._dev_id)
|
|
839
|
-
parser.analyse_dynamic_shape_data(self._timeline_meta)
|
|
840
|
-
logger.warning(
|
|
841
|
-
'\nThe GPU supports only the training mode or inference mode, '
|
|
842
|
-
'it does not support train and infer at the same time.'
|
|
843
|
-
)
|
|
1005
|
+
if self._dynamic_status:
|
|
1006
|
+
parser.analyse_dynamic_shape_data(self._timeline_meta)
|
|
844
1007
|
|
|
845
1008
|
def _get_step_reduce_op_type(self):
|
|
846
1009
|
"""Gets all communication operator names."""
|
|
@@ -860,15 +1023,15 @@ class Profiler:
|
|
|
860
1023
|
"""Collect and analyse cpu performance data."""
|
|
861
1024
|
|
|
862
1025
|
try:
|
|
863
|
-
timeline_generator = CpuTimelineGenerator(self._output_path, context.get_context("mode"))
|
|
1026
|
+
timeline_generator = CpuTimelineGenerator(self._output_path, self._rank_id, context.get_context("mode"))
|
|
864
1027
|
timeline_generator.init_timeline()
|
|
865
1028
|
timeline_generator.write_timeline(self._timeline_size_limit_byte)
|
|
866
1029
|
timeline_generator.write_timeline_summary()
|
|
867
1030
|
except (ProfilerIOException, ProfilerFileNotFoundException, RuntimeError) as err:
|
|
868
1031
|
logger.warning('Fail to write timeline data: %s', err)
|
|
869
1032
|
raise RuntimeError('Fail to write timeline data.') from err
|
|
870
|
-
if
|
|
871
|
-
raise RuntimeError(
|
|
1033
|
+
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
1034
|
+
raise RuntimeError("Pynative mode is not supported on CPU currently.")
|
|
872
1035
|
|
|
873
1036
|
def _analyse_step_trace(self, source_path=None, framework_parser=None, is_training_mode_flag=True,
|
|
874
1037
|
is_gpu_kernel_async_launch_flag=False):
|
|
@@ -935,6 +1098,7 @@ class Profiler:
|
|
|
935
1098
|
optime_parser (OPComputeTimeParserParser): The parser instance for AI Core
|
|
936
1099
|
operator execution time calculation.
|
|
937
1100
|
"""
|
|
1101
|
+
logger.info("Profiling: analyzing the timeline data.")
|
|
938
1102
|
timeline_analyser = AscendTimelineGenerator(self._output_path, self._dev_id, self._rank_id,
|
|
939
1103
|
self._rank_size, context.get_context("mode"))
|
|
940
1104
|
# Get framework info
|
|
@@ -1015,13 +1179,13 @@ class Profiler:
|
|
|
1015
1179
|
else:
|
|
1016
1180
|
job_dir = os.path.join(self._output_path, dir_name)
|
|
1017
1181
|
|
|
1018
|
-
|
|
1019
|
-
if
|
|
1182
|
+
start_file_path = get_file_path(job_dir, "start_info")
|
|
1183
|
+
if start_file_path is None:
|
|
1020
1184
|
logger.warning("Find profiling job path %s, but host_start.log not exist, "
|
|
1021
1185
|
"profiler will ignore this job dir.", job_dir)
|
|
1022
1186
|
continue
|
|
1023
1187
|
|
|
1024
|
-
training_device_id =
|
|
1188
|
+
training_device_id = start_file_path.split('.')[-1]
|
|
1025
1189
|
if self._dev_id != training_device_id:
|
|
1026
1190
|
logger.debug("Find profiling find job path %s, but not current training device id. "
|
|
1027
1191
|
"Current training device id %s, but job path device id: %s, "
|
|
@@ -1031,7 +1195,7 @@ class Profiler:
|
|
|
1031
1195
|
if not os.listdir(os.path.join(job_dir, 'data')):
|
|
1032
1196
|
continue
|
|
1033
1197
|
|
|
1034
|
-
job_start_time = self.
|
|
1198
|
+
job_start_time = self._parse_start_log(start_file_path)
|
|
1035
1199
|
if not job_start_time:
|
|
1036
1200
|
logger.warning("Find profiling job path %s, but fail to get job start info, "
|
|
1037
1201
|
"profiler will ignore this job dir.", job_start_time)
|
|
@@ -1060,6 +1224,7 @@ class Profiler:
|
|
|
1060
1224
|
|
|
1061
1225
|
def _analyser_op_info(self):
|
|
1062
1226
|
"""Analyse the operator information."""
|
|
1227
|
+
logger.info("Profiling: analyzing the operation compute time.")
|
|
1063
1228
|
integrator = Integrator(self._output_path, self._rank_id)
|
|
1064
1229
|
integrator.integrate()
|
|
1065
1230
|
|
|
@@ -1177,6 +1342,42 @@ class Profiler:
|
|
|
1177
1342
|
logger.warning("The target dir already exists. "
|
|
1178
1343
|
"There may be some old profiling data, and they will be rewritten in the end.")
|
|
1179
1344
|
|
|
1345
|
+
def _parser_kwargs(self, kwargs):
|
|
1346
|
+
"""Parse kwargs vale."""
|
|
1347
|
+
self._op_time = kwargs.get("op_time", True)
|
|
1348
|
+
|
|
1349
|
+
env_run_config = json.loads(os.getenv("MS_PROFILER_RUN_CONFIG", "{}"))
|
|
1350
|
+
params = list(kwargs.keys())
|
|
1351
|
+
if not env_run_config.get("start"):
|
|
1352
|
+
for param in params:
|
|
1353
|
+
if param not in DeviceSupportParam.__getattr__(f'{self._device_target}'.upper()).value:
|
|
1354
|
+
logger.warning("%s is an invalid param which doesn't work.", param)
|
|
1355
|
+
kwargs.pop(param)
|
|
1356
|
+
elif not self._op_time and param not in ALWAYS_VALID_PARAM:
|
|
1357
|
+
logger.warning(f"When op_time is set to False, the parameter '{param}' setting is invalid.")
|
|
1358
|
+
|
|
1359
|
+
if not isinstance(self._op_time, bool):
|
|
1360
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter op_time must be bool, "
|
|
1361
|
+
f"but got type {type(self._op_time)}, it will be set to True.")
|
|
1362
|
+
self._op_time = True
|
|
1363
|
+
|
|
1364
|
+
self._data_process = kwargs.pop("data_process", True)
|
|
1365
|
+
if not isinstance(self._data_process, bool):
|
|
1366
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter data_process must be bool, "
|
|
1367
|
+
f"but got type {type(self._data_process)}, it will be set to True.")
|
|
1368
|
+
self._data_process = True
|
|
1369
|
+
|
|
1370
|
+
timeline_limit = kwargs.pop("timeline_limit", 500)
|
|
1371
|
+
if isinstance(timeline_limit, bool) or not isinstance(timeline_limit, int):
|
|
1372
|
+
logger.warning(f"For '{self.__class__.__name__}', the parameter timeline_limit must be int, "
|
|
1373
|
+
f"but got type {type(timeline_limit)}, it will be set to 500.")
|
|
1374
|
+
timeline_limit = 500
|
|
1375
|
+
if timeline_limit <= 0:
|
|
1376
|
+
logger.warning(
|
|
1377
|
+
"[Profiler]The 'timeline_limit' parameter must be greater than 0, it will be set to 500.")
|
|
1378
|
+
timeline_limit = 500
|
|
1379
|
+
self._timeline_size_limit_byte = timeline_limit * 1024 * 1024
|
|
1380
|
+
|
|
1180
1381
|
def _analyse_hccl_info(self):
|
|
1181
1382
|
"""Analyse hccl info."""
|
|
1182
1383
|
hccl_path = os.path.join(self._output_path, "hccl_info_{}".format(self._rank_id))
|
|
@@ -1195,6 +1396,7 @@ class Profiler:
|
|
|
1195
1396
|
"The hccl_parser-{version}-py3-none-any.whl package is usually located "
|
|
1196
1397
|
"in the /usr/local/Ascend/tools Directory", err)
|
|
1197
1398
|
raise ImportError(err) from err
|
|
1399
|
+
|
|
1198
1400
|
logger.info("Parse hccl info successfully.")
|
|
1199
1401
|
logger.info("Start analyse hccl info.")
|
|
1200
1402
|
hccl_parse = HcclParser(hccl_path, self._dev_id, self._rank_id, self._output_path)
|