mindspore 2.0.0rc1__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Third_Party_Open_Source_Software_Notice +2 -2
- mindspore/__init__.py +5 -2
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/config/repository.json +195 -0
- mindspore/_akg/akg/global_configs.py +5 -1
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/op_dsl.py +17 -1
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +5 -1
- mindspore/_checkparam.py +79 -62
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +2 -0
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +128 -21
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +18 -13
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +13 -9
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +19 -17
- mindspore/_extends/parse/namespace.py +7 -36
- mindspore/_extends/parse/parser.py +375 -189
- mindspore/_extends/parse/resources.py +36 -41
- mindspore/_extends/parse/standard_method.py +350 -245
- mindspore/_extends/parse/trope.py +2 -12
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +85 -19
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/base.py +2 -2
- mindspore/boost/boost.py +27 -32
- mindspore/boost/boost_cell_wrapper.py +37 -13
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/grad_freeze.py +34 -6
- mindspore/boost/group_loss_scale_manager.py +15 -14
- mindspore/boost/less_batch_normalization.py +28 -3
- mindspore/common/__init__.py +15 -11
- mindspore/common/_auto_dynamic.py +68 -0
- mindspore/common/_jit_fallback_utils.py +111 -0
- mindspore/common/_register_for_adapter.py +17 -5
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +18 -15
- mindspore/common/_utils.py +31 -7
- mindspore/common/api.py +269 -101
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +61 -21
- mindspore/common/dump.py +9 -7
- mindspore/common/initializer.py +106 -76
- mindspore/common/jit_config.py +35 -14
- mindspore/common/lazy_inline.py +187 -0
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/mutable.py +10 -13
- mindspore/common/parameter.py +246 -55
- mindspore/common/seed.py +13 -7
- mindspore/common/sparse_tensor.py +29 -33
- mindspore/common/tensor.py +907 -251
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +84 -4
- mindspore/communication/management.py +160 -88
- mindspore/config/op_info.config +99 -75
- mindspore/config/super_bar_config.json +36 -4
- mindspore/context.py +526 -219
- mindspore/dataset/__init__.py +9 -46
- mindspore/dataset/audio/__init__.py +4 -19
- mindspore/dataset/audio/transforms.py +545 -233
- mindspore/dataset/audio/utils.py +21 -18
- mindspore/dataset/callback/ds_callback.py +42 -13
- mindspore/dataset/core/config.py +158 -100
- mindspore/dataset/core/validator_helpers.py +1 -63
- mindspore/dataset/debug/debug_hook.py +45 -13
- mindspore/dataset/debug/pre_defined_hook.py +5 -5
- mindspore/dataset/engine/__init__.py +0 -5
- mindspore/dataset/engine/cache_client.py +38 -15
- mindspore/dataset/engine/datasets.py +615 -278
- mindspore/dataset/engine/datasets_audio.py +154 -283
- mindspore/dataset/engine/datasets_standard_format.py +104 -116
- mindspore/dataset/engine/datasets_text.py +443 -326
- mindspore/dataset/engine/datasets_user_defined.py +251 -164
- mindspore/dataset/engine/datasets_vision.py +839 -1443
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +7 -3
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/offload.py +6 -6
- mindspore/dataset/engine/queue.py +15 -14
- mindspore/dataset/engine/samplers.py +39 -23
- mindspore/dataset/engine/serializer_deserializer.py +22 -6
- mindspore/dataset/engine/validators.py +21 -331
- mindspore/dataset/text/__init__.py +5 -33
- mindspore/dataset/text/transforms.py +334 -165
- mindspore/dataset/text/utils.py +215 -145
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/c_transforms.py +3 -2
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +174 -71
- mindspore/dataset/utils/browse_dataset.py +25 -17
- mindspore/dataset/utils/line_reader.py +24 -21
- mindspore/dataset/vision/__init__.py +5 -26
- mindspore/dataset/vision/c_transforms.py +177 -165
- mindspore/dataset/vision/py_transforms.py +114 -119
- mindspore/dataset/vision/py_transforms_util.py +54 -51
- mindspore/dataset/vision/transforms.py +1127 -381
- mindspore/dataset/vision/utils.py +54 -38
- mindspore/dataset/vision/validators.py +12 -2
- mindspore/experimental/map_parameter.py +38 -4
- mindspore/{dataset/datapreprocess → experimental/optim}/__init__.py +14 -4
- mindspore/experimental/optim/adam.py +192 -0
- mindspore/experimental/optim/adamw.py +181 -0
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/experimental/optim/optimizer.py +252 -0
- mindspore/experimental/optim/sgd.py +147 -0
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +1 -2
- mindspore/include/api/context.h +21 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +29 -42
- mindspore/include/api/model_group.h +14 -3
- mindspore/include/api/model_parallel_runner.h +18 -2
- mindspore/include/api/serialization.h +26 -0
- mindspore/include/api/status.h +1 -0
- mindspore/include/api/types.h +38 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/c_api/status_c.h +3 -0
- mindspore/include/dataset/constants.h +6 -12
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +25 -31
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/format.h +0 -1
- mindspore/include/mindapi/base/type_id.h +2 -1
- mindspore/include/mindapi/base/types.h +5 -1
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libmpi_adapter.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/log.py +9 -6
- mindspore/mindrecord/filereader.py +33 -4
- mindspore/mindrecord/filewriter.py +70 -35
- mindspore/mindrecord/mindpage.py +40 -34
- mindspore/mindrecord/shardreader.py +1 -1
- mindspore/mindrecord/shardsegment.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +25 -18
- mindspore/mindrecord/tools/cifar10_to_mr.py +25 -18
- mindspore/mindrecord/tools/csv_to_mr.py +29 -13
- mindspore/mindrecord/tools/imagenet_to_mr.py +24 -10
- mindspore/mindrecord/tools/mnist_to_mr.py +24 -11
- mindspore/mindrecord/tools/tfrecord_to_mr.py +31 -26
- mindspore/nn/cell.py +463 -169
- mindspore/nn/dynamic_lr.py +47 -43
- mindspore/nn/layer/activation.py +225 -82
- mindspore/nn/layer/basic.py +121 -79
- mindspore/nn/layer/channel_shuffle.py +21 -21
- mindspore/nn/layer/combined.py +33 -26
- mindspore/nn/layer/container.py +277 -22
- mindspore/nn/layer/conv.py +441 -304
- mindspore/nn/layer/dense.py +19 -13
- mindspore/nn/layer/embedding.py +62 -49
- mindspore/nn/layer/flash_attention.py +264 -0
- mindspore/nn/layer/image.py +50 -39
- mindspore/nn/layer/math.py +62 -51
- mindspore/nn/layer/normalization.py +219 -167
- mindspore/nn/layer/padding.py +58 -70
- mindspore/nn/layer/pooling.py +334 -287
- mindspore/nn/layer/rnn_cells.py +53 -38
- mindspore/nn/layer/rnns.py +59 -56
- mindspore/nn/layer/thor_layer.py +52 -44
- mindspore/nn/layer/timedistributed.py +6 -4
- mindspore/nn/layer/transformer.py +284 -164
- mindspore/nn/learning_rate_schedule.py +34 -25
- mindspore/nn/loss/__init__.py +3 -2
- mindspore/nn/loss/loss.py +554 -311
- mindspore/nn/optim/ada_grad.py +12 -9
- mindspore/nn/optim/adadelta.py +14 -11
- mindspore/nn/optim/adafactor.py +19 -16
- mindspore/nn/optim/adam.py +62 -47
- mindspore/nn/optim/adamax.py +13 -10
- mindspore/nn/optim/adasum.py +12 -8
- mindspore/nn/optim/asgd.py +10 -9
- mindspore/nn/optim/ftrl.py +20 -17
- mindspore/nn/optim/lamb.py +16 -12
- mindspore/nn/optim/lars.py +8 -6
- mindspore/nn/optim/lazyadam.py +25 -20
- mindspore/nn/optim/momentum.py +10 -7
- mindspore/nn/optim/optimizer.py +61 -9
- mindspore/nn/optim/proximal_ada_grad.py +14 -13
- mindspore/nn/optim/rmsprop.py +17 -13
- mindspore/nn/optim/rprop.py +30 -17
- mindspore/nn/optim/sgd.py +40 -23
- mindspore/nn/optim/thor.py +24 -26
- mindspore/nn/probability/bijector/bijector.py +11 -11
- mindspore/nn/probability/bijector/exp.py +1 -1
- mindspore/nn/probability/bijector/gumbel_cdf.py +3 -3
- mindspore/nn/probability/bijector/invert.py +1 -1
- mindspore/nn/probability/bijector/power_transform.py +29 -29
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +5 -5
- mindspore/nn/probability/bnn_layers/bnn_cell_wrapper.py +4 -2
- mindspore/nn/probability/bnn_layers/conv_variational.py +13 -13
- mindspore/nn/probability/bnn_layers/dense_variational.py +12 -12
- mindspore/nn/probability/bnn_layers/layer_distribution.py +9 -8
- mindspore/nn/probability/distribution/_utils/custom_ops.py +19 -3
- mindspore/nn/probability/distribution/_utils/utils.py +1 -1
- mindspore/nn/probability/distribution/bernoulli.py +9 -9
- mindspore/nn/probability/distribution/beta.py +8 -8
- mindspore/nn/probability/distribution/categorical.py +23 -15
- mindspore/nn/probability/distribution/cauchy.py +5 -6
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/exponential.py +4 -4
- mindspore/nn/probability/distribution/gamma.py +10 -10
- mindspore/nn/probability/distribution/geometric.py +8 -8
- mindspore/nn/probability/distribution/gumbel.py +8 -9
- mindspore/nn/probability/distribution/half_normal.py +5 -5
- mindspore/nn/probability/distribution/laplace.py +5 -5
- mindspore/nn/probability/distribution/log_normal.py +12 -11
- mindspore/nn/probability/distribution/logistic.py +8 -8
- mindspore/nn/probability/distribution/normal.py +6 -5
- mindspore/nn/probability/distribution/poisson.py +10 -11
- mindspore/nn/probability/distribution/student_t.py +8 -9
- mindspore/nn/probability/distribution/transformed_distribution.py +5 -5
- mindspore/nn/probability/distribution/uniform.py +11 -11
- mindspore/nn/reinforcement/tensor_array.py +2 -2
- mindspore/nn/sparse/sparse.py +9 -9
- mindspore/nn/wrap/cell_wrapper.py +188 -63
- mindspore/nn/wrap/grad_reducer.py +21 -12
- mindspore/nn/wrap/loss_scale.py +136 -49
- mindspore/numpy/__init__.py +4 -4
- mindspore/numpy/array_creations.py +55 -56
- mindspore/numpy/array_ops.py +134 -35
- mindspore/numpy/logic_ops.py +66 -20
- mindspore/numpy/math_ops.py +142 -139
- mindspore/numpy/utils_const.py +2 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +7 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +231 -348
- mindspore/ops/{_grad → _grad_experimental}/grad_base.py +1 -33
- mindspore/ops/{_grad → _grad_experimental}/grad_comm_ops.py +25 -13
- mindspore/ops/{_grad/__init__.py → _grad_experimental/grad_debug_ops.py} +15 -7
- mindspore/ops/{_grad → _grad_experimental}/grad_implementations.py +17 -11
- mindspore/ops/_grad_experimental/grad_inner_ops.py +33 -52
- mindspore/ops/_grad_experimental/grad_math_ops.py +151 -1224
- mindspore/ops/_grad_experimental/grad_nn_ops.py +141 -414
- mindspore/ops/{_grad → _grad_experimental}/grad_quant_ops.py +10 -6
- mindspore/ops/_grad_experimental/grad_sparse.py +317 -2
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -13
- mindspore/ops/{_grad → _grad_experimental}/taylor_rule.py +1 -1
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +406 -0
- mindspore/{_extends/graph_kernel/expanders/complex/__init__.py → ops/_op_impl/_custom_op/flash_attention/constants.py} +27 -8
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +467 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +563 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +193 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +435 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +45 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +67 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +62 -0
- mindspore/ops/_op_impl/_custom_op/matmul_cube_dense_left_impl.py +2 -2
- mindspore/ops/_op_impl/aicpu/__init__.py +41 -1
- mindspore/ops/_op_impl/aicpu/adaptive_max_pool_2d.py +37 -0
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/cast.py +52 -0
- mindspore/ops/_op_impl/aicpu/coalesce.py +2 -0
- mindspore/ops/_op_impl/aicpu/col2im.py +3 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/dropout_genmask.py +6 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/eye.py +4 -4
- mindspore/ops/_op_impl/aicpu/fft_with_size.py +6 -0
- mindspore/ops/_op_impl/aicpu/fill_diagonal.py +5 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/im2col.py +3 -5
- mindspore/ops/_op_impl/aicpu/lgamma.py +1 -0
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu.py +39 -0
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/masked_scatter.py +1 -0
- mindspore/ops/_op_impl/aicpu/masked_select_grad.py +3 -0
- mindspore/ops/_op_impl/aicpu/matrix_band_part.py +59 -0
- mindspore/ops/_op_impl/aicpu/matrix_power.py +6 -1
- mindspore/ops/_op_impl/aicpu/median.py +1 -0
- mindspore/ops/_op_impl/aicpu/multinomial.py +9 -9
- mindspore/ops/_op_impl/aicpu/not_equal.py +0 -5
- mindspore/ops/_op_impl/aicpu/pad_v3.py +3 -1
- mindspore/ops/_op_impl/aicpu/pad_v3_grad.py +2 -0
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/resize_bilinear_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2.py +0 -6
- mindspore/ops/_op_impl/aicpu/resize_nearest_neighbor_v2_grad.py +0 -7
- mindspore/ops/_op_impl/aicpu/scatter_nd.py +2 -0
- mindspore/ops/_op_impl/aicpu/sequence_concat.py +40 -0
- mindspore/ops/_op_impl/aicpu/sequence_stack.py +40 -0
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -4
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -4
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d.py +14 -6
- mindspore/ops/_op_impl/aicpu/upsample_nearest_3d_grad.py +22 -8
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d.py +11 -6
- mindspore/ops/_op_impl/aicpu/upsample_trilinear_3d_grad.py +21 -10
- mindspore/ops/_op_impl/tbe/__init__.py +6 -4
- mindspore/ops/_op_impl/tbe/atomic_addr_clean.py +1 -1
- mindspore/ops/_op_impl/tbe/avg_pool.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_3d.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_3d_grad.py +4 -4
- mindspore/ops/_op_impl/tbe/avg_pool_ds.py +2 -2
- mindspore/ops/_op_impl/tbe/avg_pool_grad.py +3 -3
- mindspore/ops/_op_impl/tbe/avg_pool_grad_vm.py +3 -3
- mindspore/ops/_op_impl/tbe/batch_to_space.py +1 -1
- mindspore/ops/_op_impl/tbe/batch_to_space_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer.py +2 -2
- mindspore/ops/_op_impl/tbe/bn_infer_ds.py +3 -2
- mindspore/ops/_op_impl/tbe/broadcast_to.py +1 -1
- mindspore/ops/_op_impl/tbe/depthwise_conv2d.py +3 -3
- mindspore/ops/_op_impl/tbe/expand_dims.py +1 -1
- mindspore/ops/_op_impl/tbe/gather_v2.py +56 -0
- mindspore/ops/_op_impl/tbe/im2col.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/mem_set.py +38 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_add.py +3 -0
- mindspore/ops/_op_impl/tbe/scatter_nd_d.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch.py +1 -1
- mindspore/ops/_op_impl/tbe/space_to_batch_nd.py +2 -2
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +241 -0
- mindspore/ops/_utils/utils.py +10 -2
- mindspore/ops/_vmap/vmap_array_ops.py +5 -3
- mindspore/ops/_vmap/vmap_base.py +5 -4
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +11 -6
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +135 -11
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/__init__.py +7 -5
- mindspore/ops/composite/base.py +78 -34
- mindspore/ops/composite/math_ops.py +5 -695
- mindspore/ops/composite/multitype_ops/_compile_utils.py +403 -97
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +28 -22
- mindspore/ops/composite/multitype_ops/add_impl.py +69 -7
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/div_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/getitem_impl.py +48 -10
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/greater_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/less_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +2 -2
- mindspore/ops/composite/multitype_ops/mod_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/mul_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/negative_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/not_in_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +10 -7
- mindspore/ops/composite/multitype_ops/sub_impl.py +1 -0
- mindspore/ops/composite/multitype_ops/uadd_impl.py +2 -0
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +41 -4
- mindspore/ops/function/array_func.py +1108 -467
- mindspore/ops/function/clip_func.py +94 -27
- mindspore/ops/function/debug_func.py +3 -1
- mindspore/ops/function/grad/grad_func.py +82 -73
- mindspore/ops/function/image_func.py +28 -12
- mindspore/ops/function/linalg_func.py +135 -39
- mindspore/ops/function/math_func.py +3779 -894
- mindspore/ops/function/nn_func.py +1584 -657
- mindspore/ops/function/parameter_func.py +13 -3
- mindspore/ops/function/random_func.py +247 -153
- mindspore/ops/function/sparse_func.py +14 -11
- mindspore/ops/function/sparse_unary_func.py +173 -47
- mindspore/ops/function/spectral_func.py +8 -4
- mindspore/ops/function/vmap_func.py +8 -7
- mindspore/ops/functional.py +47 -16
- mindspore/ops/op_info_register.py +346 -86
- mindspore/ops/operations/__init__.py +38 -22
- mindspore/ops/operations/_grad_ops.py +145 -149
- mindspore/ops/operations/_inner_ops.py +298 -56
- mindspore/ops/operations/_ms_kernel.py +3 -3
- mindspore/ops/operations/_quant_ops.py +24 -28
- mindspore/ops/operations/_rl_inner_ops.py +9 -7
- mindspore/ops/operations/_scalar_ops.py +115 -0
- mindspore/ops/operations/_sequence_ops.py +148 -10
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/_thor_ops.py +2 -2
- mindspore/ops/operations/array_ops.py +1239 -561
- mindspore/ops/operations/comm_ops.py +166 -90
- mindspore/ops/operations/control_ops.py +3 -3
- mindspore/ops/operations/custom_ops.py +124 -102
- mindspore/ops/operations/debug_ops.py +24 -11
- mindspore/ops/operations/image_ops.py +86 -71
- mindspore/ops/operations/inner_ops.py +18 -13
- mindspore/ops/operations/linalg_ops.py +30 -11
- mindspore/ops/operations/math_ops.py +1730 -435
- mindspore/ops/operations/nn_ops.py +1953 -943
- mindspore/ops/operations/other_ops.py +65 -43
- mindspore/ops/operations/random_ops.py +258 -98
- mindspore/ops/operations/rl_ops.py +4 -36
- mindspore/ops/operations/sparse_ops.py +38 -33
- mindspore/ops/operations/spectral_ops.py +8 -4
- mindspore/ops/primitive.py +66 -44
- mindspore/ops/signature.py +5 -5
- mindspore/parallel/_auto_parallel_context.py +80 -19
- mindspore/parallel/_cost_model_context.py +42 -0
- mindspore/parallel/_offload_context.py +162 -72
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +16 -4
- mindspore/parallel/_recovery_context.py +2 -1
- mindspore/parallel/_tensor.py +15 -13
- mindspore/parallel/_transformer/layers.py +8 -6
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +7 -7
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +34 -14
- mindspore/parallel/_utils.py +36 -14
- mindspore/parallel/algo_parameter_config.py +114 -20
- mindspore/parallel/checkpoint_transform.py +16 -18
- mindspore/parallel/shard.py +16 -13
- mindspore/profiler/__init__.py +1 -1
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +11 -4
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +94 -0
- mindspore/profiler/parser/ascend_fpbp_generator.py +76 -0
- mindspore/profiler/parser/ascend_hccl_generator.py +288 -0
- mindspore/profiler/parser/ascend_msprof_exporter.py +213 -0
- mindspore/profiler/parser/ascend_msprof_generator.py +199 -0
- mindspore/profiler/parser/ascend_op_generator.py +276 -0
- mindspore/profiler/parser/ascend_steptrace_generator.py +94 -0
- mindspore/profiler/parser/ascend_timeline_generator.py +110 -54
- mindspore/profiler/parser/base_timeline_generator.py +11 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +45 -46
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +92 -73
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +36 -11
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +4 -5
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +678 -377
- mindspore/rewrite/api/node.py +211 -54
- mindspore/rewrite/api/node_type.py +5 -0
- mindspore/rewrite/api/pattern_engine.py +22 -23
- mindspore/rewrite/api/scoped_value.py +20 -17
- mindspore/rewrite/api/symbol_tree.py +252 -106
- mindspore/rewrite/api/tree_node_helper.py +3 -0
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +97 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +51 -51
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{ops/bprop_mindir → rewrite/node}/__init__.py +9 -4
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +313 -247
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/node/node_topological_manager.py +243 -0
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +225 -239
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +179 -218
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +23 -20
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +202 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/sparsify.py +4 -1
- mindspore/rewrite/sparsify/utils.py +11 -5
- mindspore/rewrite/symbol_tree.py +577 -732
- mindspore/rewrite/symbol_tree_builder.py +9 -175
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +46 -39
- mindspore/run_check/run_check.py +3 -2
- mindspore/{scipy/sparse → safeguard}/__init__.py +4 -5
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/__init__.py +1 -1
- mindspore/scipy/linalg.py +67 -61
- mindspore/scipy/ops.py +5 -41
- mindspore/scipy/ops_grad.py +3 -2
- mindspore/scipy/ops_wrapper.py +5 -5
- mindspore/scipy/optimize/line_search.py +8 -8
- mindspore/scipy/optimize/linear_sum_assignment.py +4 -4
- mindspore/scipy/optimize/minimize.py +16 -12
- mindspore/scipy/utils.py +1 -52
- mindspore/scipy/utils_const.py +4 -4
- mindspore/train/__init__.py +4 -4
- mindspore/train/_utils.py +13 -5
- mindspore/train/amp.py +410 -148
- mindspore/train/anf_ir_pb2.py +16 -4
- mindspore/train/callback/_backup_and_restore.py +8 -11
- mindspore/train/callback/_callback.py +80 -3
- mindspore/train/callback/_checkpoint.py +82 -51
- mindspore/train/callback/_early_stop.py +12 -15
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_lambda_callback.py +13 -13
- mindspore/train/callback/_landscape.py +21 -17
- mindspore/train/callback/_loss_monitor.py +9 -10
- mindspore/train/callback/_on_request_exit.py +16 -33
- mindspore/train/callback/_reduce_lr_on_plateau.py +21 -24
- mindspore/train/callback/_summary_collector.py +44 -30
- mindspore/train/callback/_time_monitor.py +62 -12
- mindspore/train/data_sink.py +10 -16
- mindspore/train/dataset_helper.py +154 -86
- mindspore/train/loss_scale_manager.py +14 -9
- mindspore/train/metrics/__init__.py +10 -2
- mindspore/train/metrics/accuracy.py +1 -1
- mindspore/train/metrics/auc.py +1 -1
- mindspore/train/metrics/bleu_score.py +2 -2
- mindspore/train/metrics/confusion_matrix.py +14 -14
- mindspore/train/metrics/cosine_similarity.py +3 -3
- mindspore/train/metrics/dice.py +1 -1
- mindspore/train/metrics/fbeta.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +8 -6
- mindspore/train/metrics/mean_surface_distance.py +5 -4
- mindspore/train/metrics/metric.py +49 -17
- mindspore/train/metrics/occlusion_sensitivity.py +4 -4
- mindspore/train/metrics/perplexity.py +1 -1
- mindspore/train/metrics/precision.py +2 -2
- mindspore/train/metrics/recall.py +2 -3
- mindspore/train/metrics/roc.py +7 -7
- mindspore/train/metrics/root_mean_square_surface_distance.py +5 -4
- mindspore/train/metrics/topk.py +7 -4
- mindspore/train/mind_ir_pb2.py +193 -48
- mindspore/train/model.py +377 -133
- mindspore/train/serialization.py +697 -245
- mindspore/train/summary/_summary_adapter.py +5 -2
- mindspore/train/summary/_writer_pool.py +4 -3
- mindspore/train/summary/summary_record.py +25 -23
- mindspore/train/train_thor/convert_utils.py +39 -23
- mindspore/train/train_thor/dataset_helper.py +4 -3
- mindspore/train/train_thor/model_thor.py +8 -8
- mindspore/version.py +1 -1
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/METADATA +7 -8
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/RECORD +633 -804
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -57
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/bias_add_grad.py +0 -49
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gather.py +0 -43
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/tile.py +0 -54
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/_extends/parse/jit_fallback_modules.py +0 -51
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/dataset/engine/graphdata.py +0 -1586
- mindspore/include/api/net.h +0 -142
- mindspore/ops/_grad/grad_array_ops.py +0 -1347
- mindspore/ops/_grad/grad_clip_ops.py +0 -84
- mindspore/ops/_grad/grad_debug_ops.py +0 -68
- mindspore/ops/_grad/grad_inner_ops.py +0 -235
- mindspore/ops/_grad/grad_math_ops.py +0 -1684
- mindspore/ops/_grad/grad_nn_ops.py +0 -1529
- mindspore/ops/_grad/grad_other_ops.py +0 -89
- mindspore/ops/_grad/grad_sequence_ops.py +0 -296
- mindspore/ops/_grad/grad_sparse.py +0 -323
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -249
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -195
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/bprop_mindir/AdaptiveAvgPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/AdaptiveMaxPool2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ApproximateEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Argmax_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Argmin_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/AssignSub_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Assign_bprop.mindir +0 -17
- mindspore/ops/bprop_mindir/AvgPool3D_bprop.mindir +0 -150
- mindspore/ops/bprop_mindir/AvgPool_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/BCEWithLogitsLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/BatchNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BatchToSpaceND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/BiasAddGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/BinaryCrossEntropy_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/BroadcastTo_bprop.mindir +0 -306
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/CTCLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Concat_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Conv2DBackpropFilter_bprop.mindir +0 -240
- mindspore/ops/bprop_mindir/Conv2DBackpropInput_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv2DTranspose_bprop.mindir +0 -247
- mindspore/ops/bprop_mindir/Conv3DTranspose_bprop.mindir +0 -315
- mindspore/ops/bprop_mindir/Conv3D_bprop.mindir +0 -278
- mindspore/ops/bprop_mindir/DType_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/DeformableOffsets_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/DepthToSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/DiagPart_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Dropout2D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Dropout3D_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DropoutDoMask_bprop.mindir +0 -25
- mindspore/ops/bprop_mindir/DropoutGenMask_bprop.mindir +0 -18
- mindspore/ops/bprop_mindir/DropoutGrad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Dropout_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicGRUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicRNN_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DynamicShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Elu_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Equal_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ExpandDims_bprop.mindir +0 -58
- mindspore/ops/bprop_mindir/FastGeLU_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Flatten_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/FloorDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/GatherD_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/GatherNd_bprop.mindir +0 -57
- mindspore/ops/bprop_mindir/Gather_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/GreaterEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Greater_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/HSigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/HSwish_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/IOU_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/InstanceNorm_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/IsFinite_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsInf_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/IsNan_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/KLDivLoss_bprop.mindir +0 -126
- mindspore/ops/bprop_mindir/L2Loss_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/L2Normalize_bprop.mindir +0 -30
- mindspore/ops/bprop_mindir/LRN_bprop.mindir +0 -43
- mindspore/ops/bprop_mindir/LayerNormGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/LessEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/Less_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LinSpace_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -13
- mindspore/ops/bprop_mindir/LogSoftmax_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/LogicalAnd_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/LogicalNot_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/MaskedSelect_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/MaxPool3DGradGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3DGrad_bprop.mindir +0 -74
- mindspore/ops/bprop_mindir/MaxPool3D_bprop.mindir +0 -75
- mindspore/ops/bprop_mindir/MaxPoolGradGrad_bprop.mindir +0 -65
- mindspore/ops/bprop_mindir/MaxPoolWithArgmax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Maximum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Minimum_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/MirrorPad_bprop.mindir +0 -27
- mindspore/ops/bprop_mindir/Mish_bprop.mindir +0 -35
- mindspore/ops/bprop_mindir/MulNoNan_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NLLLoss_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/NonZero_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/NotEqual_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/OneHot_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/OnesLike_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/PReLU_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Pad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Padding_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/RNNTLoss_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ROIAlign_bprop.mindir +0 -82
- mindspore/ops/bprop_mindir/Range_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Rank_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/ReLU6_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/ReLUV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ReduceAll_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReduceAny_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/ReluGrad_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Reshape_bprop.mindir +0 -60
- mindspore/ops/bprop_mindir/ResizeBilinear_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/ResizeNearestNeighbor_bprop.mindir +0 -89
- mindspore/ops/bprop_mindir/ReverseSequence_bprop.mindir +0 -52
- mindspore/ops/bprop_mindir/ReverseV2_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Round_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/ScatterMax_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterMin_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNdUpdate_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterNd_bprop.mindir +0 -24
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/ScatterUpdate_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SeLU_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/Select_bprop.mindir +0 -31
- mindspore/ops/bprop_mindir/Shape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/SigmoidCrossEntropyWithLogits_bprop.mindir +0 -21
- mindspore/ops/bprop_mindir/SigmoidGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Sigmoid_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Sign_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/Slice_bprop.mindir +0 -26
- mindspore/ops/bprop_mindir/SmoothL1Loss_bprop.mindir +0 -36
- mindspore/ops/bprop_mindir/SoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Softplus_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Softsign_bprop.mindir +0 -33
- mindspore/ops/bprop_mindir/Sort_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SpaceToBatchND_bprop.mindir +0 -28
- mindspore/ops/bprop_mindir/SpaceToDepth_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Split_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/Squeeze_bprop.mindir +0 -54
- mindspore/ops/bprop_mindir/StridedSliceGrad_bprop.mindir +0 -95
- mindspore/ops/bprop_mindir/StridedSlice_bprop.mindir +0 -98
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TanhGrad_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Tanh_bprop.mindir +0 -66
- mindspore/ops/bprop_mindir/TensorScatterAdd_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/TensorScatterUpdate_bprop.mindir +0 -29
- mindspore/ops/bprop_mindir/TensorShape_bprop.mindir +0 -14
- mindspore/ops/bprop_mindir/Tile_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TopK_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -23
- mindspore/ops/bprop_mindir/TruncateDiv_bprop.mindir +0 -19
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -20
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -16
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -22
- mindspore/ops/bprop_mindir/UpsampleNearest3D_bprop.mindir +0 -32
- mindspore/ops/bprop_mindir/UpsampleTrilinear3D_bprop.mindir +0 -38
- mindspore/ops/bprop_mindir/ZerosLike_bprop.mindir +0 -15
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- mindspore/rewrite/topological_manager.py +0 -203
- mindspore/scipy/sparse/linalg.py +0 -192
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.0.0rc1.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -15,13 +15,14 @@
|
|
|
15
15
|
"""The integrator for integrating parsed profiling files."""
|
|
16
16
|
import os
|
|
17
17
|
|
|
18
|
+
import numpy as np
|
|
18
19
|
from mindspore import log as logger
|
|
19
20
|
from mindspore.profiler.common.exceptions.exceptions import ProfilerIOException
|
|
20
|
-
from mindspore.profiler.parser.container import TimelineContainer
|
|
21
|
-
from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
|
|
22
21
|
from mindspore.profiler.parser.base_timeline_generator import BaseTimelineGenerator
|
|
23
|
-
from mindspore.profiler.parser.
|
|
22
|
+
from mindspore.profiler.parser.container import TimelineContainer
|
|
24
23
|
from mindspore.profiler.parser.cpu_gpu_timeline_generator import CpuTimelineGenerator
|
|
24
|
+
from mindspore.profiler.parser.integrator import DeviceTarget
|
|
25
|
+
from mindspore.profiler.parser.op_intermediate_parser import OPIntermediateParser
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
@@ -39,6 +40,17 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
39
40
|
self._display_filename = self._display_filename.format(rank_id)
|
|
40
41
|
self._timeline_summary_filename = self._timeline_summary_filename.format(rank_id)
|
|
41
42
|
|
|
43
|
+
self.step_time_list_df = np.dtype(
|
|
44
|
+
[('Iteration ID', object), ('Steps', object), ('Iteration Start', float), ('Iteration Time', float)])
|
|
45
|
+
|
|
46
|
+
self.aicpu_time_list_dt = np.dtype(
|
|
47
|
+
[('Op Name', object), ('Stream ID', int), ('Task Start Time', float), ('Task Duration', float),
|
|
48
|
+
('pid', int)])
|
|
49
|
+
|
|
50
|
+
self.communication_info_dt = np.dtype(
|
|
51
|
+
[('Op Name', object), ('Stream ID', int), ('Task Start Time', float), ('Task Duration', float),
|
|
52
|
+
('pid', int)])
|
|
53
|
+
|
|
42
54
|
@staticmethod
|
|
43
55
|
def _get_all_reduce_names(communication_info):
|
|
44
56
|
names = []
|
|
@@ -49,27 +61,48 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
49
61
|
names.append(all_reduce_name)
|
|
50
62
|
return names
|
|
51
63
|
|
|
52
|
-
def init_timeline(self,
|
|
64
|
+
def init_timeline(self, op_summary, steptrace):
|
|
53
65
|
"""
|
|
54
66
|
Init timeline metadata, adding all collected info.
|
|
55
67
|
|
|
56
68
|
Args:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
aicpu_info (dict): The metadata of AI CPU operator.
|
|
60
|
-
min_cycle_counter (float): The minimum cycle counter of the timeline.
|
|
61
|
-
source_path (str): The source of file.
|
|
69
|
+
op_summary: op data
|
|
70
|
+
steptrace: step data
|
|
62
71
|
"""
|
|
63
|
-
if min_cycle_counter == float('inf'):
|
|
64
|
-
min_cycle_counter = 0
|
|
65
72
|
|
|
66
73
|
logger.info('Initiating timeline...')
|
|
67
|
-
|
|
68
|
-
timeline_list.
|
|
74
|
+
|
|
75
|
+
timeline_list = op_summary[~np.isin(op_summary['Task Type'], ['AI_CPU', 'HCCL'])][
|
|
76
|
+
['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
|
|
77
|
+
|
|
78
|
+
timeline_list = timeline_list.tolist()
|
|
79
|
+
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._rank_id, self._model)
|
|
80
|
+
cpu_timeline_list = cpu_timeline_generator.get_timeline_data()
|
|
81
|
+
if cpu_timeline_list:
|
|
82
|
+
timeline_list.extend(cpu_timeline_list)
|
|
83
|
+
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
84
|
+
min_cycle_counter = 0
|
|
85
|
+
if timeline_list:
|
|
86
|
+
min_cycle_counter = timeline_list[0][2]
|
|
69
87
|
|
|
70
88
|
# Generate step time.
|
|
71
89
|
self._set_step_start_and_end_op_name(timeline_list)
|
|
72
|
-
|
|
90
|
+
|
|
91
|
+
if not isinstance(steptrace, np.ndarray) or steptrace.shape[0] == 0 or not steptrace.tolist():
|
|
92
|
+
iteration_time = op_summary[-1]['Task Start Time'] - op_summary[0]['Task Start Time'] + op_summary[-1][
|
|
93
|
+
'Task Duration'] + op_summary[-1]['Task Wait Time']
|
|
94
|
+
step_time_list = [['1', 'Steps', op_summary[0]['Task Start Time'], iteration_time]]
|
|
95
|
+
else:
|
|
96
|
+
step_time_list = np.empty((len(steptrace),), dtype=self.step_time_list_df)
|
|
97
|
+
step_time_list['Iteration ID'] = \
|
|
98
|
+
np.char.add("Model ID: ",
|
|
99
|
+
np.char.add(steptrace['Model ID'].astype(str),
|
|
100
|
+
np.char.add(" Iteration ID: ",
|
|
101
|
+
steptrace['Iteration ID'].astype(str))))
|
|
102
|
+
step_time_list['Steps'] = 'Steps'
|
|
103
|
+
step_time_list['Iteration Start'] = steptrace['Iteration End'] - steptrace['Iteration Time']
|
|
104
|
+
step_time_list['Iteration Time'] = steptrace['Iteration Time']
|
|
105
|
+
step_time_list = step_time_list.tolist()
|
|
73
106
|
|
|
74
107
|
# Add Scope Name.
|
|
75
108
|
default_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "Default")
|
|
@@ -77,17 +110,32 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
77
110
|
recompute_scope_name_time_list = self._get_scope_name_time_list(timeline_list, "recompute_Default")
|
|
78
111
|
|
|
79
112
|
# Add AI CPU data into timeline temp list and sort by start time.
|
|
80
|
-
aicpu_data = aicpu_info.get('info')
|
|
81
|
-
if aicpu_data:
|
|
82
|
-
timeline_list.extend(aicpu_data)
|
|
83
|
-
self._timeline_summary['op_exe_times'] += aicpu_info.get('op_exe_times', 0)
|
|
84
|
-
self._timeline_summary['num_of_streams'] += aicpu_info.get('num_of_streams', 0)
|
|
85
|
-
self._timeline_summary['num_of_ops'] += aicpu_info.get('num_of_ops', 0)
|
|
86
|
-
self._timeline_summary['total_time'] += aicpu_info.get('total_time', 0)
|
|
87
113
|
|
|
114
|
+
aicpu_op = op_summary[op_summary['Task Type'] == 'AI_CPU']
|
|
115
|
+
if aicpu_op.size:
|
|
116
|
+
aicpu_time_list = np.empty((len(aicpu_op),), dtype=self.aicpu_time_list_dt)
|
|
117
|
+
aicpu_time_list['Op Name'] = aicpu_op['Op Name']
|
|
118
|
+
aicpu_time_list['Stream ID'] = aicpu_op['Stream ID']
|
|
119
|
+
aicpu_time_list['Task Start Time'] = aicpu_op['Task Start Time']
|
|
120
|
+
aicpu_time_list['Task Duration'] = aicpu_op['Task Duration'] + aicpu_op['Task Wait Time']
|
|
121
|
+
aicpu_time_list['pid'] = 9000
|
|
122
|
+
aicpu_time_list = aicpu_time_list.tolist()
|
|
123
|
+
timeline_list.extend(aicpu_time_list)
|
|
88
124
|
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
89
125
|
|
|
90
126
|
# Add AllReduce info to timeline temp list and sort by start time.
|
|
127
|
+
hccl_op = op_summary[op_summary['Task Type'] == 'HCCL']
|
|
128
|
+
if hccl_op.size:
|
|
129
|
+
communication_info = np.empty((len(hccl_op,)), dtype=self.communication_info_dt)
|
|
130
|
+
communication_info['Op Name'] = hccl_op['Op Name']
|
|
131
|
+
communication_info['Stream ID'] = hccl_op['Stream ID']
|
|
132
|
+
communication_info['Task Start Time'] = hccl_op['Task Start Time']
|
|
133
|
+
communication_info['Task Duration'] = hccl_op['Task Duration']
|
|
134
|
+
communication_info['pid'] = 10000
|
|
135
|
+
communication_info = communication_info.tolist()
|
|
136
|
+
communication_info.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
137
|
+
else:
|
|
138
|
+
communication_info = []
|
|
91
139
|
if communication_info:
|
|
92
140
|
logger.debug('AllReduce info found. Start adding info into timeline...')
|
|
93
141
|
cluster_related_timeline = self._get_cluster_timeline(
|
|
@@ -104,24 +152,24 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
104
152
|
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
105
153
|
|
|
106
154
|
# Init a dict for counting the num of streams.
|
|
107
|
-
stream_count_dict = {}
|
|
108
155
|
for timeline in timeline_list:
|
|
109
156
|
self._parse_timeline_data(timeline, min_cycle_counter)
|
|
110
|
-
# Updating the collection of streams.
|
|
111
|
-
if len(timeline) == 4:
|
|
112
|
-
self._update_num_of_streams(timeline, stream_count_dict)
|
|
113
157
|
|
|
114
158
|
# Add format thread meta data.
|
|
115
159
|
self._format_meta_data_list.extend(self._timeline_meta)
|
|
116
160
|
self._timeline_meta = self._format_meta_data_list
|
|
117
|
-
# Get framework metadata.
|
|
118
|
-
# The length of list is the number of operators.
|
|
119
|
-
self._timeline_summary['num_of_ops'] += len(framework_info.get('object'))
|
|
120
|
-
self._add_framework_info(framework_info.get('object'))
|
|
121
|
-
logger.info('Finished adding info into timeline...')
|
|
122
161
|
|
|
123
162
|
# Update timeline summary info
|
|
124
|
-
|
|
163
|
+
timeline_summary = op_summary[np.isin(op_summary['Task Type'], ['AI_CORE', 'AI_CPU', 'HCCL'])][[
|
|
164
|
+
'Op Name', 'Stream ID', 'Task Duration']]
|
|
165
|
+
self._timeline_summary['total_time'] = np.sum(timeline_summary['Task Duration'])
|
|
166
|
+
self._timeline_summary['num_of_streams'] = int(
|
|
167
|
+
len(np.unique(timeline_summary['Stream ID'], return_counts=True)[0]))
|
|
168
|
+
self._timeline_summary['num_of_ops'] = int(len(np.unique(timeline_summary['Op Name'], return_counts=True)[0]))
|
|
169
|
+
self._timeline_summary['op_exe_times'] = int(len(timeline_summary))
|
|
170
|
+
self._timeline_summary['max_scope_name_num'] = int(np.max(
|
|
171
|
+
[len(x) for x in np.char.split(timeline_summary['Op Name'].astype(str), sep='/')]))
|
|
172
|
+
logger.info('Finished adding info into timeline...')
|
|
125
173
|
|
|
126
174
|
def init_pynative_timeline(self):
|
|
127
175
|
"""Init timeline for pynative model."""
|
|
@@ -207,21 +255,31 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
207
255
|
"""Synchronize the timestamp from host to device."""
|
|
208
256
|
host_start_file_path = os.path.join(source_path, f"host_start.log.{self._device_id}")
|
|
209
257
|
dev_start_file_path = os.path.join(source_path, f"dev_start.log.{self._device_id}")
|
|
210
|
-
|
|
258
|
+
host_monotonic = 0
|
|
259
|
+
dev_cntvct = 0
|
|
211
260
|
try:
|
|
212
261
|
with open(host_start_file_path) as f_obj:
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
262
|
+
lines = f_obj.readlines()
|
|
263
|
+
for line in lines:
|
|
264
|
+
info = line.strip().split(':')
|
|
265
|
+
if len(info) < 2 or info[0] != "clock_monotonic_raw":
|
|
266
|
+
continue
|
|
267
|
+
host_monotonic = int(info[1])
|
|
268
|
+
break
|
|
269
|
+
|
|
219
270
|
with open(dev_start_file_path) as f_obj:
|
|
220
|
-
|
|
221
|
-
|
|
271
|
+
lines = f_obj.readlines()
|
|
272
|
+
for line in lines:
|
|
273
|
+
info = line.strip().split(':')
|
|
274
|
+
if len(info) < 2 or info[0] != "cntvct":
|
|
275
|
+
continue
|
|
276
|
+
dev_cntvct = int(info[1])
|
|
277
|
+
break
|
|
222
278
|
except (IOError, OSError) as err:
|
|
223
279
|
logger.critical('Error occurred when read dev_start.log: %s', err)
|
|
224
|
-
raise ProfilerIOException()
|
|
280
|
+
raise ProfilerIOException() from err
|
|
281
|
+
if host_monotonic == 0 or dev_cntvct == 0:
|
|
282
|
+
logger.error('Error occurred when read host_monotonic or dev_cntvct time')
|
|
225
283
|
|
|
226
284
|
factor = {"factor_ns_to_ms": 1e-6, "factor_ten_ns_to_ns": 10, "factor_ms_to_ns": 1e6}
|
|
227
285
|
for idx, time_item in enumerate(timeline_list):
|
|
@@ -291,8 +349,6 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
291
349
|
that communication inside each stage slow down the training.
|
|
292
350
|
"""
|
|
293
351
|
is_pipeline_parallel = False
|
|
294
|
-
time_info = {"receive_op_not_overlapped_timeline": [], "collective_comm_not_overlapped_timeline": [],
|
|
295
|
-
"comm_not_overlapped_timeline": []}
|
|
296
352
|
comm_timeline = self._get_merged_time_list(
|
|
297
353
|
comm_info, display_name="communication"
|
|
298
354
|
)
|
|
@@ -300,7 +356,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
300
356
|
aicore_info, get_interval_time=True
|
|
301
357
|
)
|
|
302
358
|
# Consider if the overlap will be 0 or not.
|
|
303
|
-
|
|
359
|
+
comm_not_overlapped_timeline = self._get_intersection_time(
|
|
304
360
|
aicore_timeline[0], comm_timeline[0]
|
|
305
361
|
)
|
|
306
362
|
|
|
@@ -316,8 +372,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
316
372
|
timeline_exclude_receive_op_interval = self._get_merged_time_list(
|
|
317
373
|
receive_timeline[1], get_interval_time=True
|
|
318
374
|
)[0]
|
|
319
|
-
|
|
320
|
-
time_info["receive_op_not_overlapped_timeline"] = self._get_intersection_time(
|
|
375
|
+
receive_op_not_overlapped_timeline = self._get_intersection_time(
|
|
321
376
|
timeline_exclude_receive_op_interval, receive_op_merged_timeline
|
|
322
377
|
)
|
|
323
378
|
|
|
@@ -326,7 +381,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
326
381
|
comm_info, "Receive-op"
|
|
327
382
|
)[-1]
|
|
328
383
|
|
|
329
|
-
|
|
384
|
+
collective_comm_not_overlapped_timeline = self._get_intersection_time(
|
|
330
385
|
aicore_timeline[0], self._get_merged_time_list(collective_comm_timeline)[0]
|
|
331
386
|
)
|
|
332
387
|
|
|
@@ -335,12 +390,11 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
335
390
|
all_timeline, get_interval_time=True, display_name="free_time"
|
|
336
391
|
)[1]
|
|
337
392
|
|
|
338
|
-
self._parse_cluster_metrices(step_info,
|
|
339
|
-
|
|
340
|
-
time_info.get("collective_comm_not_overlapped_timeline"), is_pipeline_parallel)
|
|
393
|
+
self._parse_cluster_metrices(step_info, receive_op_not_overlapped_timeline, comm_not_overlapped_timeline,
|
|
394
|
+
collective_comm_not_overlapped_timeline, is_pipeline_parallel)
|
|
341
395
|
|
|
342
396
|
res_timeline = []
|
|
343
|
-
res_timeline.extend(
|
|
397
|
+
res_timeline.extend(comm_not_overlapped_timeline)
|
|
344
398
|
res_timeline.extend(aicore_timeline[2])
|
|
345
399
|
res_timeline.extend(comm_timeline[2])
|
|
346
400
|
res_timeline.extend(free_timeline)
|
|
@@ -370,8 +424,10 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
370
424
|
except IndexError as err:
|
|
371
425
|
logger.error(err)
|
|
372
426
|
|
|
373
|
-
metrices_per_step_list = [
|
|
374
|
-
|
|
427
|
+
metrices_per_step_list = [
|
|
428
|
+
time_info.get("computation_time"), comm_alone_time, time_info.get("stage_time"),
|
|
429
|
+
recieve_alone_time, collective_comm_alone_time
|
|
430
|
+
]
|
|
375
431
|
if step_num > 1:
|
|
376
432
|
for metric in metrices_per_step_list:
|
|
377
433
|
metric.append(sum(metric[1:]) / (step_num - 1))
|
|
@@ -379,7 +435,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
379
435
|
self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Ascend", self._rank_id)
|
|
380
436
|
except (IOError, OSError) as err:
|
|
381
437
|
logger.warning(err)
|
|
382
|
-
raise ProfilerIOException
|
|
438
|
+
raise ProfilerIOException from err
|
|
383
439
|
|
|
384
440
|
def _compute_time_inside_step(self, metric_timeline, step_time_list):
|
|
385
441
|
"""Compute per step time of metric_timeline."""
|
|
@@ -198,7 +198,7 @@ class BaseTimelineGenerator:
|
|
|
198
198
|
display_file_path = validate_and_normalize_path(display_file_path)
|
|
199
199
|
|
|
200
200
|
try:
|
|
201
|
-
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
201
|
+
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
202
202
|
json_file.write('[')
|
|
203
203
|
for _, item in enumerate(self._timeline_meta):
|
|
204
204
|
json.dump(item, json_file)
|
|
@@ -216,7 +216,7 @@ class BaseTimelineGenerator:
|
|
|
216
216
|
return self._timeline_meta
|
|
217
217
|
except (IOError, OSError) as err:
|
|
218
218
|
logger.critical('Error occurred when write timeline display file: %s', err)
|
|
219
|
-
raise ProfilerIOException()
|
|
219
|
+
raise ProfilerIOException() from err
|
|
220
220
|
|
|
221
221
|
def write_timeline_summary(self):
|
|
222
222
|
"""Write timeline summary to json."""
|
|
@@ -229,11 +229,11 @@ class BaseTimelineGenerator:
|
|
|
229
229
|
|
|
230
230
|
try:
|
|
231
231
|
with os.fdopen(os.open(timeline_summary_file_path,
|
|
232
|
-
os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
232
|
+
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
233
233
|
json.dump(self._timeline_summary, json_file)
|
|
234
234
|
except (IOError, OSError) as err:
|
|
235
235
|
logger.critical('Error occurred when write timeline summary file: %s', err)
|
|
236
|
-
raise ProfilerIOException()
|
|
236
|
+
raise ProfilerIOException() from err
|
|
237
237
|
if os.path.exists(timeline_summary_file_path):
|
|
238
238
|
os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
239
239
|
|
|
@@ -438,6 +438,8 @@ class BaseTimelineGenerator:
|
|
|
438
438
|
"""Write cluster metric."""
|
|
439
439
|
# Note that the feature of cluster bottleneck analyse is not supported in offline parse mode,
|
|
440
440
|
# due to that parallel context is not set.
|
|
441
|
+
if context.get_context("mode") == context.PYNATIVE_MODE:
|
|
442
|
+
return
|
|
441
443
|
parallel_mode, stage_num = BaseTimelineGenerator.get_parallel_context()
|
|
442
444
|
|
|
443
445
|
unit = 1 if device_target == "Ascend" else 1e3
|
|
@@ -448,12 +450,14 @@ class BaseTimelineGenerator:
|
|
|
448
450
|
)
|
|
449
451
|
cluster_analyse_file_path = validate_and_normalize_path(cluster_analyse_file_path)
|
|
450
452
|
|
|
451
|
-
with os.fdopen(os.open(cluster_analyse_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
453
|
+
with os.fdopen(os.open(cluster_analyse_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
452
454
|
'w') as file_handle:
|
|
453
455
|
csv_writer = csv.writer(file_handle)
|
|
454
456
|
if is_pipeline_parallel:
|
|
455
|
-
header = [
|
|
456
|
-
|
|
457
|
+
header = [
|
|
458
|
+
'computation_time', 'communication_alone_time', 'stage_time',
|
|
459
|
+
'receive_alone_time', 'collective_communication_alone_time'
|
|
460
|
+
]
|
|
457
461
|
zip_metrices = zip(metrices[0], metrices[1], metrices[2], metrices[3], metrices[4])
|
|
458
462
|
else:
|
|
459
463
|
header = ['computation_time', 'communication_alone_time']
|
|
@@ -231,7 +231,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
231
231
|
gpu_start_time = int(lines[1].strip().split(':')[-1])
|
|
232
232
|
except (IOError, OSError) as err:
|
|
233
233
|
logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
|
|
234
|
-
raise ProfilerIOException()
|
|
234
|
+
raise ProfilerIOException() from err
|
|
235
235
|
|
|
236
236
|
time_diff = gpu_start_time - host_monotonic_start_time
|
|
237
237
|
for idx, time_item in enumerate(timeline_list):
|
|
@@ -258,7 +258,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
258
258
|
communication_info.append(line_list)
|
|
259
259
|
except (IOError, OSError) as err:
|
|
260
260
|
logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
|
|
261
|
-
raise ProfilerIOException()
|
|
261
|
+
raise ProfilerIOException() from err
|
|
262
262
|
|
|
263
263
|
return op_timeline_list, communication_info
|
|
264
264
|
|
|
@@ -324,7 +324,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
324
324
|
step_num += 1
|
|
325
325
|
except (IOError, OSError) as err:
|
|
326
326
|
logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
|
|
327
|
-
raise ProfilerIOException()
|
|
327
|
+
raise ProfilerIOException() from err
|
|
328
328
|
|
|
329
329
|
return step_time_list
|
|
330
330
|
|
|
@@ -344,56 +344,56 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
344
344
|
communication time between stages slow down the training. The value of t3 indicates the degree
|
|
345
345
|
that communication inside each stage slow down the training.
|
|
346
346
|
"""
|
|
347
|
-
time_info = {
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
347
|
+
time_info = {
|
|
348
|
+
"stage_time": [], "computation_time": [], "recieve_alone_time": [], "comm_alone_time": [],
|
|
349
|
+
"collective_comm_alone_time": []
|
|
350
|
+
}
|
|
351
|
+
is_pipeline_parallel = False
|
|
352
|
+
comm_timeline = self._get_merged_time_list(
|
|
353
353
|
comm_info,
|
|
354
354
|
display_name="communication",
|
|
355
355
|
factor=1e-3
|
|
356
356
|
)
|
|
357
357
|
compute_op_timeline = timeline + activity_info
|
|
358
358
|
compute_op_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
359
|
-
|
|
359
|
+
compute_timeline = self._get_merged_time_list(
|
|
360
360
|
compute_op_timeline,
|
|
361
361
|
get_interval_time=True,
|
|
362
362
|
factor=1e-3
|
|
363
363
|
)
|
|
364
364
|
# Consider if the overlap will be 0 or not.
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
365
|
+
comm_not_overlapped_timeline = self._get_intersection_time(
|
|
366
|
+
compute_timeline[0],
|
|
367
|
+
comm_timeline[0]
|
|
368
368
|
)
|
|
369
369
|
|
|
370
370
|
# Process receive part.
|
|
371
371
|
all_timeline = timeline + comm_info
|
|
372
372
|
all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
373
|
-
|
|
373
|
+
receive_op_timeline = self._produce_two_separated_timeline(
|
|
374
374
|
all_timeline,
|
|
375
375
|
"Receive-op"
|
|
376
376
|
)[0]
|
|
377
|
-
if
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
377
|
+
if receive_op_timeline:
|
|
378
|
+
is_pipeline_parallel = True
|
|
379
|
+
receive_op_merged_timeline = self._get_merged_time_list(receive_op_timeline,
|
|
380
|
+
factor=1e-3)[0]
|
|
381
|
+
|
|
382
|
+
receive_op_not_overlapped_timeline = self._get_intersection_time(
|
|
383
|
+
compute_timeline[0],
|
|
384
|
+
receive_op_merged_timeline,
|
|
385
385
|
display_name="receive_op_not_overlapped"
|
|
386
386
|
)
|
|
387
387
|
|
|
388
388
|
# Process collective communication part.
|
|
389
|
-
|
|
389
|
+
collective_comm_timeline = self._produce_two_separated_timeline(
|
|
390
390
|
comm_info,
|
|
391
391
|
"Receive-op"
|
|
392
392
|
)[-1]
|
|
393
|
-
collective_comm_merged_timeline = self._get_merged_time_list(
|
|
393
|
+
collective_comm_merged_timeline = self._get_merged_time_list(collective_comm_timeline,
|
|
394
394
|
factor=1e-3)[0]
|
|
395
|
-
|
|
396
|
-
|
|
395
|
+
collective_comm_not_overlapped_timeline = self._get_intersection_time(
|
|
396
|
+
compute_timeline[0],
|
|
397
397
|
collective_comm_merged_timeline,
|
|
398
398
|
display_name="exclude_receive_op"
|
|
399
399
|
)
|
|
@@ -401,7 +401,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
401
401
|
# Generate free time that exclude computation and communication time.
|
|
402
402
|
all_timeline = compute_op_timeline + comm_info
|
|
403
403
|
all_timeline.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
404
|
-
|
|
404
|
+
free_timeline = self._get_merged_time_list(
|
|
405
405
|
all_timeline,
|
|
406
406
|
get_interval_time=True,
|
|
407
407
|
display_name="free_time",
|
|
@@ -409,18 +409,16 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
409
409
|
)[1]
|
|
410
410
|
|
|
411
411
|
# Compute these five metrics mentioned above per step.
|
|
412
|
-
time_info["recieve_alone_time"] = self._compute_time_inside_step(
|
|
413
|
-
|
|
414
|
-
time_info["comm_alone_time"] = self._compute_time_inside_step(time_info.get("comm_not_overlapped_timeline"),
|
|
415
|
-
step_info)
|
|
412
|
+
time_info["recieve_alone_time"] = self._compute_time_inside_step(receive_op_not_overlapped_timeline, step_info)
|
|
413
|
+
time_info["comm_alone_time"] = self._compute_time_inside_step(comm_not_overlapped_timeline, step_info)
|
|
416
414
|
time_info["collective_comm_alone_time"] = self._compute_time_inside_step(
|
|
417
|
-
|
|
415
|
+
collective_comm_not_overlapped_timeline,
|
|
418
416
|
step_info
|
|
419
417
|
)
|
|
420
418
|
step_num = len(step_info)
|
|
421
419
|
for step in range(step_num):
|
|
422
420
|
try:
|
|
423
|
-
if
|
|
421
|
+
if is_pipeline_parallel:
|
|
424
422
|
time_info.get("stage_time").append(
|
|
425
423
|
step_info[step][self._duration_idx] - time_info.get("recieve_alone_time")[step]
|
|
426
424
|
)
|
|
@@ -433,24 +431,25 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
433
431
|
except IndexError as e:
|
|
434
432
|
logger.error(e)
|
|
435
433
|
|
|
436
|
-
metrices_per_step_list = [
|
|
437
|
-
|
|
438
|
-
|
|
434
|
+
metrices_per_step_list = [
|
|
435
|
+
time_info.get("computation_time"), time_info.get("comm_alone_time"),
|
|
436
|
+
time_info.get("stage_time"), time_info.get("recieve_alone_time"),
|
|
437
|
+
time_info.get("collective_comm_alone_time")
|
|
438
|
+
]
|
|
439
439
|
if step_num > 1:
|
|
440
440
|
for metric in metrices_per_step_list:
|
|
441
441
|
metric.append(sum(metric[1:]) / (step_num - 1))
|
|
442
442
|
try:
|
|
443
|
-
self._write_cluster_metrices(metrices_per_step_list,
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
raise ProfilerIOException
|
|
443
|
+
self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Gpu", self._device_id)
|
|
444
|
+
except (IOError, OSError) as err:
|
|
445
|
+
logger.warning(err)
|
|
446
|
+
raise ProfilerIOException from err
|
|
448
447
|
|
|
449
448
|
res_timeline = []
|
|
450
|
-
res_timeline.extend(
|
|
451
|
-
res_timeline.extend(
|
|
452
|
-
res_timeline.extend(
|
|
453
|
-
res_timeline.extend(
|
|
449
|
+
res_timeline.extend(comm_not_overlapped_timeline)
|
|
450
|
+
res_timeline.extend(compute_timeline[2])
|
|
451
|
+
res_timeline.extend(comm_timeline[2])
|
|
452
|
+
res_timeline.extend(free_timeline)
|
|
454
453
|
return res_timeline
|
|
455
454
|
|
|
456
455
|
def _compute_time_inside_step(self, metric_timeline, step_time_list):
|
|
@@ -609,7 +608,7 @@ class CpuTimelineGenerator(GpuTimelineGenerator):
|
|
|
609
608
|
op_timeline_list.append(line_list)
|
|
610
609
|
except (IOError, OSError) as err:
|
|
611
610
|
logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
|
|
612
|
-
raise ProfilerIOException()
|
|
611
|
+
raise ProfilerIOException() from err
|
|
613
612
|
|
|
614
613
|
return op_timeline_list
|
|
615
614
|
|
|
@@ -181,7 +181,7 @@ class FlopsParser:
|
|
|
181
181
|
sum_flops_utilization = 0.0
|
|
182
182
|
# calculate the every step FLOPS utilization and the average values.
|
|
183
183
|
utilization_save_filename = os.path.join(self._output_dir, self._flops_utilization_step_filename)
|
|
184
|
-
with open(utilization_save_filename, 'w') as f:
|
|
184
|
+
with os.fdopen(os.open(utilization_save_filename, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
|
|
185
185
|
f.write("steps, FLOPS_Utilization %\n")
|
|
186
186
|
for i, x in enumerate(op_all_step_comp):
|
|
187
187
|
current_utilization = x[0] / x[1] * 1e9 / peak_flops * 100
|
|
@@ -190,6 +190,8 @@ class FlopsParser:
|
|
|
190
190
|
f.write(",")
|
|
191
191
|
f.write(str(current_utilization))
|
|
192
192
|
f.write("\n")
|
|
193
|
+
os.chmod(utilization_save_filename, stat.S_IREAD | stat.S_IWRITE)
|
|
194
|
+
|
|
193
195
|
if len(op_all_step_comp) >= 1:
|
|
194
196
|
self._flops_summary['FLOPS_Utilization'] = sum_flops_utilization / len(op_all_step_comp)
|
|
195
197
|
else:
|
|
@@ -222,7 +224,7 @@ class FlopsParser:
|
|
|
222
224
|
all_log_struct = aicore_file.read(self.AICORE_LOG_SIZE * read_count)
|
|
223
225
|
except (IOError, OSError) as err:
|
|
224
226
|
logger.critical(f'Error occurred when read {aicore_file_path} file: {err}')
|
|
225
|
-
raise ProfilerIOException()
|
|
227
|
+
raise ProfilerIOException() from err
|
|
226
228
|
|
|
227
229
|
return read_count, all_log_struct
|
|
228
230
|
|
|
@@ -246,7 +248,7 @@ class FlopsParser:
|
|
|
246
248
|
peak_flops = device_frequency * 1e6 * ai_core_num * 4096 * 2
|
|
247
249
|
except (IOError, OSError, json.JSONDecodeError) as err:
|
|
248
250
|
logger.critical(f'Error occurred when read {info_json_file_path} file: {err}')
|
|
249
|
-
raise ProfilerIOException()
|
|
251
|
+
raise ProfilerIOException() from err
|
|
250
252
|
|
|
251
253
|
return peak_flops
|
|
252
254
|
|
|
@@ -304,7 +306,7 @@ class FlopsParser:
|
|
|
304
306
|
op_avg_time_dict[op_name] = avg_time
|
|
305
307
|
except (IOError, OSError) as err:
|
|
306
308
|
logger.critical(f'Error occurred when read {optime_file_path} file: {err}')
|
|
307
|
-
raise ProfilerIOException()
|
|
309
|
+
raise ProfilerIOException() from err
|
|
308
310
|
|
|
309
311
|
return op_avg_time_dict
|
|
310
312
|
|
|
@@ -317,7 +319,7 @@ class FlopsParser:
|
|
|
317
319
|
For op_name like "Default/network", the "network" will be renamed as "network(Default)".
|
|
318
320
|
For op_name like "recompute_Default/network", "network" --> "network(recompute_Default)".
|
|
319
321
|
For op_name like "Gradients/network", "network" --> "network(Gradients)".
|
|
320
|
-
For op_name like "Gradients/recompute_Default/network"
|
|
322
|
+
For op_name like "Gradients/recompute_Default/network", "network" --> "network(recompute_Gradients)".
|
|
321
323
|
"""
|
|
322
324
|
# Only extracts the scope name, remove the operator name.
|
|
323
325
|
scope_list = op_name.split('/')[:-1]
|
|
@@ -387,7 +389,7 @@ class FlopsParser:
|
|
|
387
389
|
output_flops_scope_file_path = join_file_path(self._flops_scope_filename)
|
|
388
390
|
|
|
389
391
|
try:
|
|
390
|
-
with open(output_file_path, 'w') as f:
|
|
392
|
+
with os.fdopen(os.open(output_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
|
|
391
393
|
header = "op_full_name, MFLOPs(10^6), GFLOPS(10^9), FLOPS utilization(%) \n"
|
|
392
394
|
f.writelines(header)
|
|
393
395
|
for op_flops in op_flops_list:
|
|
@@ -396,25 +398,27 @@ class FlopsParser:
|
|
|
396
398
|
os.chmod(output_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
397
399
|
except (IOError, OSError) as err:
|
|
398
400
|
logger.critical(f'Error occurred when writing {output_file_path} file: {err}')
|
|
399
|
-
raise ProfilerIOException()
|
|
401
|
+
raise ProfilerIOException() from err
|
|
400
402
|
|
|
401
403
|
for key in self._flops_summary:
|
|
402
404
|
self._flops_summary[key] = round(self._flops_summary[key], 3)
|
|
403
405
|
try:
|
|
404
|
-
with open(output_summary_file_path,
|
|
406
|
+
with os.fdopen(os.open(output_summary_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
407
|
+
'w') as json_file:
|
|
405
408
|
json.dump(self._flops_summary, json_file)
|
|
406
409
|
os.chmod(output_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
407
410
|
except (IOError, OSError) as err:
|
|
408
411
|
logger.critical(f'Error occurred when write {output_summary_file_path} file: {err}')
|
|
409
|
-
raise ProfilerIOException()
|
|
412
|
+
raise ProfilerIOException() from err
|
|
410
413
|
|
|
411
414
|
try:
|
|
412
|
-
with open(output_flops_scope_file_path,
|
|
415
|
+
with os.fdopen(os.open(output_flops_scope_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
416
|
+
'w') as json_file:
|
|
413
417
|
json.dump(self._flops_sankey_diagram, json_file)
|
|
414
418
|
os.chmod(output_flops_scope_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
415
419
|
except (IOError, OSError) as err:
|
|
416
420
|
logger.critical(f'Error occurred when write {output_flops_scope_file_path} file: {err}')
|
|
417
|
-
raise ProfilerIOException()
|
|
421
|
+
raise ProfilerIOException() from err
|
|
418
422
|
|
|
419
423
|
def _get_aicore_files(self, profiler_dir):
|
|
420
424
|
"""Get aicore files."""
|