mindspore 2.1.0__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +26 -32
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +12 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +61 -71
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +72 -95
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +47 -11
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +13 -0
- mindspore/common/api.py +173 -258
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +240 -145
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +13 -2
- mindspore/config/super_bar_config.json +4 -2
- mindspore/context.py +143 -59
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +28 -5
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +11 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +59 -66
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +0 -14
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/include/mindapi/base/types.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +316 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +21 -28
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +310 -207
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +82 -41
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +13 -18
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +22 -17
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +78 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -2
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +10 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +4 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +273 -72
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +40 -2
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +167 -189
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -8
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +470 -251
- mindspore/ops/function/random_func.py +86 -56
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +235 -19
- mindspore/ops/operations/__init__.py +25 -17
- mindspore/ops/operations/_grad_ops.py +52 -7
- mindspore/ops/operations/_inner_ops.py +213 -12
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +64 -280
- mindspore/ops/operations/comm_ops.py +105 -57
- mindspore/ops/operations/custom_ops.py +10 -3
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/math_ops.py +185 -138
- mindspore/ops/operations/nn_ops.py +716 -492
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +14 -12
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +6 -10
- mindspore/parallel/shard.py +4 -4
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +17 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +104 -252
- mindspore/profiler/parser/ascend_msprof_generator.py +8 -8
- mindspore/profiler/parser/ascend_op_generator.py +5 -5
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +9 -6
- mindspore/profiler/parser/base_timeline_generator.py +9 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +14 -10
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +37 -21
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +2 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +139 -71
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +525 -577
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +2 -2
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/linalg.py +1 -1
- mindspore/scipy/optimize/minimize.py +7 -3
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +14 -7
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +83 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +185 -45
- mindspore/train/serialization.py +390 -150
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +14 -10
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/METADATA +6 -7
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/RECORD +447 -507
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -94,7 +94,7 @@ class AscendOPGenerator:
|
|
|
94
94
|
writer.writerows(self.op_detail.tolist())
|
|
95
95
|
except (IOError, OSError) as err:
|
|
96
96
|
logging.critical('Errot occurred when write aicore detail file: %s', err)
|
|
97
|
-
raise ProfilerIOException()
|
|
97
|
+
raise ProfilerIOException() from err
|
|
98
98
|
if os.path.exists(aicore_intermediate_detail_path):
|
|
99
99
|
os.chmod(aicore_intermediate_detail_path, stat.S_IREAD | stat.S_IWRITE)
|
|
100
100
|
|
|
@@ -109,7 +109,7 @@ class AscendOPGenerator:
|
|
|
109
109
|
writer.writerows(self.op_type.tolist())
|
|
110
110
|
except (IOError, OSError) as err:
|
|
111
111
|
logging.critical('Errot occurred when write aicore type file: %s', err)
|
|
112
|
-
raise ProfilerIOException()
|
|
112
|
+
raise ProfilerIOException() from err
|
|
113
113
|
if os.path.exists(aicore_intermediate_type_path):
|
|
114
114
|
os.chmod(aicore_intermediate_type_path, stat.S_IREAD | stat.S_IWRITE)
|
|
115
115
|
|
|
@@ -139,12 +139,12 @@ class AscendOPGenerator:
|
|
|
139
139
|
writer.writerows(self.framework_raw.tolist())
|
|
140
140
|
except (IOError, OSError) as err:
|
|
141
141
|
logging.critical('Errot occurred when write framework file: %s', err)
|
|
142
|
-
raise ProfilerIOException()
|
|
142
|
+
raise ProfilerIOException() from err
|
|
143
143
|
if os.path.exists(framework_raw_path):
|
|
144
144
|
os.chmod(framework_raw_path, stat.S_IREAD | stat.S_IWRITE)
|
|
145
145
|
|
|
146
146
|
# output_timeline_data
|
|
147
|
-
if self.output_timeline_data.shape[0] != 0:
|
|
147
|
+
if self.output_timeline_data.shape[0] != 0 and output_timeline_data_path:
|
|
148
148
|
try:
|
|
149
149
|
with os.fdopen(os.open(output_timeline_data_path,
|
|
150
150
|
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, stat.S_IWUSR | stat.S_IRUSR),
|
|
@@ -154,7 +154,7 @@ class AscendOPGenerator:
|
|
|
154
154
|
writer.writerows(self.output_timeline_data.tolist())
|
|
155
155
|
except (IOError, OSError) as err:
|
|
156
156
|
logging.critical('Error occurred when write output timeline data file: %s', err)
|
|
157
|
-
raise ProfilerIOException()
|
|
157
|
+
raise ProfilerIOException() from err
|
|
158
158
|
if os.path.exists(aicpu_intermediate_detail_path):
|
|
159
159
|
os.chmod(aicpu_intermediate_detail_path, stat.S_IREAD | stat.S_IWRITE)
|
|
160
160
|
|
|
@@ -29,9 +29,11 @@ class AscendStepTraceGenerator:
|
|
|
29
29
|
self.steptrace = steptrace
|
|
30
30
|
self.steptrace_detail = None
|
|
31
31
|
|
|
32
|
-
self.steptrace_detail_dt = [
|
|
33
|
-
|
|
34
|
-
|
|
32
|
+
self.steptrace_detail_dt = [
|
|
33
|
+
('step_num', object), ('start_point', np.int64), ('end_point', np.int64),
|
|
34
|
+
('total', np.int64), ('fp_point', np.int64), ('bp_point', np.int64),
|
|
35
|
+
('iteration_interval', np.int64), ('fp_and_bp', np.int64), ('tail', np.int64)
|
|
36
|
+
]
|
|
35
37
|
|
|
36
38
|
def parse(self):
|
|
37
39
|
"""Analyse the original steptrace data generate steptrace data."""
|
|
@@ -87,6 +89,6 @@ class AscendStepTraceGenerator:
|
|
|
87
89
|
writer.writerows(self.steptrace_detail.tolist())
|
|
88
90
|
except (IOError, OSError) as err:
|
|
89
91
|
logging.critical('Errot occurred when write step trace file: %s', err)
|
|
90
|
-
raise ProfilerIOException()
|
|
92
|
+
raise ProfilerIOException() from err
|
|
91
93
|
if os.path.exists(step_trace_intermediate_file_path):
|
|
92
94
|
os.chmod(step_trace_intermediate_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
@@ -72,12 +72,12 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
72
72
|
|
|
73
73
|
logger.info('Initiating timeline...')
|
|
74
74
|
|
|
75
|
-
timeline_list = op_summary[op_summary['Task Type']
|
|
75
|
+
timeline_list = op_summary[~np.isin(op_summary['Task Type'], ['AI_CPU', 'HCCL'])][
|
|
76
76
|
['Op Name', 'Stream ID', 'Task Start Time', 'Task Duration']]
|
|
77
77
|
|
|
78
78
|
timeline_list = timeline_list.tolist()
|
|
79
79
|
cpu_timeline_generator = CpuTimelineGenerator(self._profiling_dir, self._rank_id, self._model)
|
|
80
|
-
cpu_timeline_list = cpu_timeline_generator.
|
|
80
|
+
cpu_timeline_list = cpu_timeline_generator.get_timeline_data()
|
|
81
81
|
if cpu_timeline_list:
|
|
82
82
|
timeline_list.extend(cpu_timeline_list)
|
|
83
83
|
timeline_list.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
@@ -133,6 +133,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
133
133
|
communication_info['Task Duration'] = hccl_op['Task Duration']
|
|
134
134
|
communication_info['pid'] = 10000
|
|
135
135
|
communication_info = communication_info.tolist()
|
|
136
|
+
communication_info.sort(key=lambda x: float(x[self._start_time_idx]))
|
|
136
137
|
else:
|
|
137
138
|
communication_info = []
|
|
138
139
|
if communication_info:
|
|
@@ -276,7 +277,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
276
277
|
break
|
|
277
278
|
except (IOError, OSError) as err:
|
|
278
279
|
logger.critical('Error occurred when read dev_start.log: %s', err)
|
|
279
|
-
raise ProfilerIOException()
|
|
280
|
+
raise ProfilerIOException() from err
|
|
280
281
|
if host_monotonic == 0 or dev_cntvct == 0:
|
|
281
282
|
logger.error('Error occurred when read host_monotonic or dev_cntvct time')
|
|
282
283
|
|
|
@@ -423,8 +424,10 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
423
424
|
except IndexError as err:
|
|
424
425
|
logger.error(err)
|
|
425
426
|
|
|
426
|
-
metrices_per_step_list = [
|
|
427
|
-
|
|
427
|
+
metrices_per_step_list = [
|
|
428
|
+
time_info.get("computation_time"), comm_alone_time, time_info.get("stage_time"),
|
|
429
|
+
recieve_alone_time, collective_comm_alone_time
|
|
430
|
+
]
|
|
428
431
|
if step_num > 1:
|
|
429
432
|
for metric in metrices_per_step_list:
|
|
430
433
|
metric.append(sum(metric[1:]) / (step_num - 1))
|
|
@@ -432,7 +435,7 @@ class AscendTimelineGenerator(BaseTimelineGenerator):
|
|
|
432
435
|
self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Ascend", self._rank_id)
|
|
433
436
|
except (IOError, OSError) as err:
|
|
434
437
|
logger.warning(err)
|
|
435
|
-
raise ProfilerIOException
|
|
438
|
+
raise ProfilerIOException from err
|
|
436
439
|
|
|
437
440
|
def _compute_time_inside_step(self, metric_timeline, step_time_list):
|
|
438
441
|
"""Compute per step time of metric_timeline."""
|
|
@@ -198,7 +198,7 @@ class BaseTimelineGenerator:
|
|
|
198
198
|
display_file_path = validate_and_normalize_path(display_file_path)
|
|
199
199
|
|
|
200
200
|
try:
|
|
201
|
-
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
201
|
+
with os.fdopen(os.open(display_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
202
202
|
json_file.write('[')
|
|
203
203
|
for _, item in enumerate(self._timeline_meta):
|
|
204
204
|
json.dump(item, json_file)
|
|
@@ -216,7 +216,7 @@ class BaseTimelineGenerator:
|
|
|
216
216
|
return self._timeline_meta
|
|
217
217
|
except (IOError, OSError) as err:
|
|
218
218
|
logger.critical('Error occurred when write timeline display file: %s', err)
|
|
219
|
-
raise ProfilerIOException()
|
|
219
|
+
raise ProfilerIOException() from err
|
|
220
220
|
|
|
221
221
|
def write_timeline_summary(self):
|
|
222
222
|
"""Write timeline summary to json."""
|
|
@@ -229,11 +229,11 @@ class BaseTimelineGenerator:
|
|
|
229
229
|
|
|
230
230
|
try:
|
|
231
231
|
with os.fdopen(os.open(timeline_summary_file_path,
|
|
232
|
-
os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
232
|
+
os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as json_file:
|
|
233
233
|
json.dump(self._timeline_summary, json_file)
|
|
234
234
|
except (IOError, OSError) as err:
|
|
235
235
|
logger.critical('Error occurred when write timeline summary file: %s', err)
|
|
236
|
-
raise ProfilerIOException()
|
|
236
|
+
raise ProfilerIOException() from err
|
|
237
237
|
if os.path.exists(timeline_summary_file_path):
|
|
238
238
|
os.chmod(timeline_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
239
239
|
|
|
@@ -450,12 +450,14 @@ class BaseTimelineGenerator:
|
|
|
450
450
|
)
|
|
451
451
|
cluster_analyse_file_path = validate_and_normalize_path(cluster_analyse_file_path)
|
|
452
452
|
|
|
453
|
-
with os.fdopen(os.open(cluster_analyse_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
453
|
+
with os.fdopen(os.open(cluster_analyse_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
454
454
|
'w') as file_handle:
|
|
455
455
|
csv_writer = csv.writer(file_handle)
|
|
456
456
|
if is_pipeline_parallel:
|
|
457
|
-
header = [
|
|
458
|
-
|
|
457
|
+
header = [
|
|
458
|
+
'computation_time', 'communication_alone_time', 'stage_time',
|
|
459
|
+
'receive_alone_time', 'collective_communication_alone_time'
|
|
460
|
+
]
|
|
459
461
|
zip_metrices = zip(metrices[0], metrices[1], metrices[2], metrices[3], metrices[4])
|
|
460
462
|
else:
|
|
461
463
|
header = ['computation_time', 'communication_alone_time']
|
|
@@ -231,7 +231,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
231
231
|
gpu_start_time = int(lines[1].strip().split(':')[-1])
|
|
232
232
|
except (IOError, OSError) as err:
|
|
233
233
|
logger.critical(f'Error occurred when read {start_time_file_path}: {err}')
|
|
234
|
-
raise ProfilerIOException()
|
|
234
|
+
raise ProfilerIOException() from err
|
|
235
235
|
|
|
236
236
|
time_diff = gpu_start_time - host_monotonic_start_time
|
|
237
237
|
for idx, time_item in enumerate(timeline_list):
|
|
@@ -258,7 +258,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
258
258
|
communication_info.append(line_list)
|
|
259
259
|
except (IOError, OSError) as err:
|
|
260
260
|
logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
|
|
261
|
-
raise ProfilerIOException()
|
|
261
|
+
raise ProfilerIOException() from err
|
|
262
262
|
|
|
263
263
|
return op_timeline_list, communication_info
|
|
264
264
|
|
|
@@ -324,7 +324,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
324
324
|
step_num += 1
|
|
325
325
|
except (IOError, OSError) as err:
|
|
326
326
|
logger.critical(f'Error occurred when read {step_trace_profiling_path}: {err}')
|
|
327
|
-
raise ProfilerIOException()
|
|
327
|
+
raise ProfilerIOException() from err
|
|
328
328
|
|
|
329
329
|
return step_time_list
|
|
330
330
|
|
|
@@ -344,8 +344,10 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
344
344
|
communication time between stages slow down the training. The value of t3 indicates the degree
|
|
345
345
|
that communication inside each stage slow down the training.
|
|
346
346
|
"""
|
|
347
|
-
time_info = {
|
|
348
|
-
|
|
347
|
+
time_info = {
|
|
348
|
+
"stage_time": [], "computation_time": [], "recieve_alone_time": [], "comm_alone_time": [],
|
|
349
|
+
"collective_comm_alone_time": []
|
|
350
|
+
}
|
|
349
351
|
is_pipeline_parallel = False
|
|
350
352
|
comm_timeline = self._get_merged_time_list(
|
|
351
353
|
comm_info,
|
|
@@ -429,9 +431,11 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
429
431
|
except IndexError as e:
|
|
430
432
|
logger.error(e)
|
|
431
433
|
|
|
432
|
-
metrices_per_step_list = [
|
|
433
|
-
|
|
434
|
-
|
|
434
|
+
metrices_per_step_list = [
|
|
435
|
+
time_info.get("computation_time"), time_info.get("comm_alone_time"),
|
|
436
|
+
time_info.get("stage_time"), time_info.get("recieve_alone_time"),
|
|
437
|
+
time_info.get("collective_comm_alone_time")
|
|
438
|
+
]
|
|
435
439
|
if step_num > 1:
|
|
436
440
|
for metric in metrices_per_step_list:
|
|
437
441
|
metric.append(sum(metric[1:]) / (step_num - 1))
|
|
@@ -439,7 +443,7 @@ class GpuTimelineGenerator(BaseTimelineGenerator):
|
|
|
439
443
|
self._write_cluster_metrices(metrices_per_step_list, is_pipeline_parallel, "Gpu", self._device_id)
|
|
440
444
|
except (IOError, OSError) as err:
|
|
441
445
|
logger.warning(err)
|
|
442
|
-
raise ProfilerIOException
|
|
446
|
+
raise ProfilerIOException from err
|
|
443
447
|
|
|
444
448
|
res_timeline = []
|
|
445
449
|
res_timeline.extend(comm_not_overlapped_timeline)
|
|
@@ -604,7 +608,7 @@ class CpuTimelineGenerator(GpuTimelineGenerator):
|
|
|
604
608
|
op_timeline_list.append(line_list)
|
|
605
609
|
except (IOError, OSError) as err:
|
|
606
610
|
logger.critical('Error occurred when load operator timeline data intermediate file: %s', err)
|
|
607
|
-
raise ProfilerIOException()
|
|
611
|
+
raise ProfilerIOException() from err
|
|
608
612
|
|
|
609
613
|
return op_timeline_list
|
|
610
614
|
|
|
@@ -181,7 +181,7 @@ class FlopsParser:
|
|
|
181
181
|
sum_flops_utilization = 0.0
|
|
182
182
|
# calculate the every step FLOPS utilization and the average values.
|
|
183
183
|
utilization_save_filename = os.path.join(self._output_dir, self._flops_utilization_step_filename)
|
|
184
|
-
with open(utilization_save_filename, 'w') as f:
|
|
184
|
+
with os.fdopen(os.open(utilization_save_filename, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
|
|
185
185
|
f.write("steps, FLOPS_Utilization %\n")
|
|
186
186
|
for i, x in enumerate(op_all_step_comp):
|
|
187
187
|
current_utilization = x[0] / x[1] * 1e9 / peak_flops * 100
|
|
@@ -190,6 +190,8 @@ class FlopsParser:
|
|
|
190
190
|
f.write(",")
|
|
191
191
|
f.write(str(current_utilization))
|
|
192
192
|
f.write("\n")
|
|
193
|
+
os.chmod(utilization_save_filename, stat.S_IREAD | stat.S_IWRITE)
|
|
194
|
+
|
|
193
195
|
if len(op_all_step_comp) >= 1:
|
|
194
196
|
self._flops_summary['FLOPS_Utilization'] = sum_flops_utilization / len(op_all_step_comp)
|
|
195
197
|
else:
|
|
@@ -222,7 +224,7 @@ class FlopsParser:
|
|
|
222
224
|
all_log_struct = aicore_file.read(self.AICORE_LOG_SIZE * read_count)
|
|
223
225
|
except (IOError, OSError) as err:
|
|
224
226
|
logger.critical(f'Error occurred when read {aicore_file_path} file: {err}')
|
|
225
|
-
raise ProfilerIOException()
|
|
227
|
+
raise ProfilerIOException() from err
|
|
226
228
|
|
|
227
229
|
return read_count, all_log_struct
|
|
228
230
|
|
|
@@ -246,7 +248,7 @@ class FlopsParser:
|
|
|
246
248
|
peak_flops = device_frequency * 1e6 * ai_core_num * 4096 * 2
|
|
247
249
|
except (IOError, OSError, json.JSONDecodeError) as err:
|
|
248
250
|
logger.critical(f'Error occurred when read {info_json_file_path} file: {err}')
|
|
249
|
-
raise ProfilerIOException()
|
|
251
|
+
raise ProfilerIOException() from err
|
|
250
252
|
|
|
251
253
|
return peak_flops
|
|
252
254
|
|
|
@@ -304,7 +306,7 @@ class FlopsParser:
|
|
|
304
306
|
op_avg_time_dict[op_name] = avg_time
|
|
305
307
|
except (IOError, OSError) as err:
|
|
306
308
|
logger.critical(f'Error occurred when read {optime_file_path} file: {err}')
|
|
307
|
-
raise ProfilerIOException()
|
|
309
|
+
raise ProfilerIOException() from err
|
|
308
310
|
|
|
309
311
|
return op_avg_time_dict
|
|
310
312
|
|
|
@@ -317,7 +319,7 @@ class FlopsParser:
|
|
|
317
319
|
For op_name like "Default/network", the "network" will be renamed as "network(Default)".
|
|
318
320
|
For op_name like "recompute_Default/network", "network" --> "network(recompute_Default)".
|
|
319
321
|
For op_name like "Gradients/network", "network" --> "network(Gradients)".
|
|
320
|
-
For op_name like "Gradients/recompute_Default/network"
|
|
322
|
+
For op_name like "Gradients/recompute_Default/network", "network" --> "network(recompute_Gradients)".
|
|
321
323
|
"""
|
|
322
324
|
# Only extracts the scope name, remove the operator name.
|
|
323
325
|
scope_list = op_name.split('/')[:-1]
|
|
@@ -387,7 +389,7 @@ class FlopsParser:
|
|
|
387
389
|
output_flops_scope_file_path = join_file_path(self._flops_scope_filename)
|
|
388
390
|
|
|
389
391
|
try:
|
|
390
|
-
with open(output_file_path, 'w') as f:
|
|
392
|
+
with os.fdopen(os.open(output_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as f:
|
|
391
393
|
header = "op_full_name, MFLOPs(10^6), GFLOPS(10^9), FLOPS utilization(%) \n"
|
|
392
394
|
f.writelines(header)
|
|
393
395
|
for op_flops in op_flops_list:
|
|
@@ -396,25 +398,27 @@ class FlopsParser:
|
|
|
396
398
|
os.chmod(output_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
397
399
|
except (IOError, OSError) as err:
|
|
398
400
|
logger.critical(f'Error occurred when writing {output_file_path} file: {err}')
|
|
399
|
-
raise ProfilerIOException()
|
|
401
|
+
raise ProfilerIOException() from err
|
|
400
402
|
|
|
401
403
|
for key in self._flops_summary:
|
|
402
404
|
self._flops_summary[key] = round(self._flops_summary[key], 3)
|
|
403
405
|
try:
|
|
404
|
-
with open(output_summary_file_path,
|
|
406
|
+
with os.fdopen(os.open(output_summary_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
407
|
+
'w') as json_file:
|
|
405
408
|
json.dump(self._flops_summary, json_file)
|
|
406
409
|
os.chmod(output_summary_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
407
410
|
except (IOError, OSError) as err:
|
|
408
411
|
logger.critical(f'Error occurred when write {output_summary_file_path} file: {err}')
|
|
409
|
-
raise ProfilerIOException()
|
|
412
|
+
raise ProfilerIOException() from err
|
|
410
413
|
|
|
411
414
|
try:
|
|
412
|
-
with open(output_flops_scope_file_path,
|
|
415
|
+
with os.fdopen(os.open(output_flops_scope_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
416
|
+
'w') as json_file:
|
|
413
417
|
json.dump(self._flops_sankey_diagram, json_file)
|
|
414
418
|
os.chmod(output_flops_scope_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
415
419
|
except (IOError, OSError) as err:
|
|
416
420
|
logger.critical(f'Error occurred when write {output_flops_scope_file_path} file: {err}')
|
|
417
|
-
raise ProfilerIOException()
|
|
421
|
+
raise ProfilerIOException() from err
|
|
418
422
|
|
|
419
423
|
def _get_aicore_files(self, profiler_dir):
|
|
420
424
|
"""Get aicore files."""
|
|
@@ -17,6 +17,7 @@ import csv
|
|
|
17
17
|
import glob
|
|
18
18
|
import json
|
|
19
19
|
import os
|
|
20
|
+
import stat
|
|
20
21
|
import re
|
|
21
22
|
import struct
|
|
22
23
|
from collections import defaultdict
|
|
@@ -52,8 +53,10 @@ TASK_TYPE_TO_KERNEL_TYPE = {
|
|
|
52
53
|
1000: 'MSPROF_UNKNOWN_TYPE'
|
|
53
54
|
}
|
|
54
55
|
|
|
55
|
-
COL_NAMES = [
|
|
56
|
-
|
|
56
|
+
COL_NAMES = [
|
|
57
|
+
'task_id', 'stream_id', 'block_dim', 'full_op_name', 'op_name', 'op_type', 'subgraph', 'op_info',
|
|
58
|
+
'graph_id', 'kernel_type'
|
|
59
|
+
]
|
|
57
60
|
OpData = namedtuple('OpData', field_names=COL_NAMES)
|
|
58
61
|
|
|
59
62
|
|
|
@@ -222,10 +225,11 @@ class FrameworkParser:
|
|
|
222
225
|
|
|
223
226
|
@staticmethod
|
|
224
227
|
def _write_framework_to_file(all_op_data: List[OpData], output_file):
|
|
225
|
-
with open(output_file, 'w') as file_handler:
|
|
228
|
+
with os.fdopen(os.open(output_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as file_handler:
|
|
226
229
|
csv_writer = csv.writer(file_handler)
|
|
227
230
|
csv_writer.writerow(COL_NAMES)
|
|
228
231
|
csv_writer.writerows(all_op_data)
|
|
232
|
+
os.chmod(output_file, stat.S_IREAD | stat.S_IWRITE)
|
|
229
233
|
|
|
230
234
|
@staticmethod
|
|
231
235
|
def _get_subgraph_name(full_op_name):
|
|
@@ -545,14 +549,18 @@ class GpuFrameWorkParser:
|
|
|
545
549
|
op_total_time = float(line_info[-4])
|
|
546
550
|
if not self.op_detail.get(op_name):
|
|
547
551
|
# line_info[4]: op_occurrences, line_info[5]: op_detail_time(us), line_info[6]: op_avg_time(us);
|
|
548
|
-
self.op_detail[op_name] = [
|
|
549
|
-
|
|
552
|
+
self.op_detail[op_name] = [
|
|
553
|
+
op_occurrences, op_total_time,
|
|
554
|
+
round(op_total_time / op_occurrences, 4), op_side
|
|
555
|
+
]
|
|
550
556
|
else:
|
|
551
557
|
self.op_detail.get(op_name)[1] += op_total_time
|
|
552
558
|
self.op_detail.get(op_name)[2] = self.op_detail.get(op_name)[1] / self.op_detail.get(op_name)[0]
|
|
553
|
-
self.op_detail[op_name] = [
|
|
554
|
-
|
|
555
|
-
|
|
559
|
+
self.op_detail[op_name] = [
|
|
560
|
+
self.op_detail.get(op_name)[0],
|
|
561
|
+
round(self.op_detail.get(op_name)[1], 4),
|
|
562
|
+
round(self.op_detail.get(op_name)[2], 4), op_side
|
|
563
|
+
]
|
|
556
564
|
|
|
557
565
|
def combine_performance_data(self, op_name):
|
|
558
566
|
"""Combine operator detail info with framework info."""
|
|
@@ -575,8 +583,10 @@ class GpuFrameWorkParser:
|
|
|
575
583
|
op_shape_dict.get(op_shape)[0] += op_occurrences
|
|
576
584
|
op_shape_dict.get(op_shape)[1] += op_total_time
|
|
577
585
|
op_shape_dict.get(op_shape)[2] = op_shape_dict.get(op_shape)[1] / op_shape_dict.get(op_shape)[0]
|
|
578
|
-
op_shape_dict[op_shape] = [
|
|
579
|
-
|
|
586
|
+
op_shape_dict[op_shape] = [
|
|
587
|
+
op_shape_dict.get(op_shape)[0], round(op_shape_dict.get(op_shape)[1], 4),
|
|
588
|
+
round(op_shape_dict.get(op_shape)[2], 4), op_side
|
|
589
|
+
]
|
|
580
590
|
else:
|
|
581
591
|
op_shape_dict[op_shape] = [op_occurrences, op_total_time, op_avg_time, op_side]
|
|
582
592
|
|
|
@@ -669,8 +679,9 @@ class GpuFrameWorkParser:
|
|
|
669
679
|
"kernel_type": kernel_type_step_time,
|
|
670
680
|
}
|
|
671
681
|
dynamic_shape_file_path = os.path.join(self._output_path, output_dynamic_shape_file_name)
|
|
672
|
-
with os.fdopen(os.open(dynamic_shape_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
682
|
+
with os.fdopen(os.open(dynamic_shape_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as fp:
|
|
673
683
|
json.dump(result, fp)
|
|
684
|
+
os.chmod(dynamic_shape_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
674
685
|
|
|
675
686
|
def get_graph_ids(self):
|
|
676
687
|
"""Get gpu graph ids."""
|
|
@@ -724,10 +735,12 @@ class GpuFrameWorkParser:
|
|
|
724
735
|
else:
|
|
725
736
|
self.one_step_op_time[sort_type][0] += duration
|
|
726
737
|
self.one_step_op_time[sort_type][1] += 1
|
|
727
|
-
self.one_step_op_time[sort_type] = [
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
738
|
+
self.one_step_op_time[sort_type] = [
|
|
739
|
+
self.one_step_op_time[sort_type][0],
|
|
740
|
+
self.one_step_op_time[sort_type][1],
|
|
741
|
+
round(self.one_step_op_time[sort_type][0] /
|
|
742
|
+
self.one_step_op_time[sort_type][1], 4)
|
|
743
|
+
]
|
|
731
744
|
else:
|
|
732
745
|
sort_type = item.get("op_name")
|
|
733
746
|
op_full_name = item.get("op_full_name")
|
|
@@ -737,11 +750,13 @@ class GpuFrameWorkParser:
|
|
|
737
750
|
else:
|
|
738
751
|
self.one_step_kernel_time[sort_type][0] += duration
|
|
739
752
|
self.one_step_kernel_time[sort_type][1] += 1
|
|
740
|
-
self.one_step_kernel_time[sort_type] = [
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
753
|
+
self.one_step_kernel_time[sort_type] = [
|
|
754
|
+
self.one_step_kernel_time[sort_type][0],
|
|
755
|
+
self.one_step_kernel_time[sort_type][1],
|
|
756
|
+
round(self.one_step_kernel_time[sort_type][0] /
|
|
757
|
+
self.one_step_kernel_time[sort_type][1], 4),
|
|
758
|
+
op_full_name
|
|
759
|
+
]
|
|
745
760
|
|
|
746
761
|
|
|
747
762
|
class DynamicFrameWorkParser:
|
|
@@ -788,8 +803,9 @@ class DynamicFrameWorkParser:
|
|
|
788
803
|
len(self._op_type_exe_time[op_type]), 4)).tolist()
|
|
789
804
|
self._dynamic_shape_info['op_type'] = self._op_info.get("op_type")
|
|
790
805
|
dynamic_shape_file_path = os.path.join(self._output_path, output_dynamic_shape_file_name)
|
|
791
|
-
with os.fdopen(os.open(dynamic_shape_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
|
|
806
|
+
with os.fdopen(os.open(dynamic_shape_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as fp:
|
|
792
807
|
json.dump(self._dynamic_shape_info, fp)
|
|
808
|
+
os.chmod(dynamic_shape_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
793
809
|
|
|
794
810
|
def _analyse_op_execute_time(self, op_summary):
|
|
795
811
|
"""Obtain the execution time of aicpu operator and aicore operator."""
|
|
@@ -36,9 +36,9 @@ class CommunicationInfo(Enum):
|
|
|
36
36
|
RDMA: Communication link between servers in cluster training.
|
|
37
37
|
SDMA: Communication link inside server in cluster training.
|
|
38
38
|
LOCAL: The operation of this card has no transmission process.
|
|
39
|
-
RDMASEND
|
|
40
|
-
REDUCE_INLINE
|
|
41
|
-
MEMCPY
|
|
39
|
+
RDMASEND: Communication operator of RDMA link.
|
|
40
|
+
REDUCE_INLINE: Communication operator of SDMA link.
|
|
41
|
+
MEMCPY: Communication operator of SDMA link.
|
|
42
42
|
NOTIFY_RECORD: Communication operator of SDMA link.
|
|
43
43
|
NOTIFY_WAIT: operator of LOCAL.
|
|
44
44
|
"""
|
|
@@ -269,15 +269,17 @@ class HcclParser:
|
|
|
269
269
|
|
|
270
270
|
# index_0:step_num, index_1:start_point, index_2:end_point
|
|
271
271
|
# The unit of time stamp is 10ns. To convert it to μs, you need to divide it by 100.
|
|
272
|
-
step_timestamps_info = [
|
|
273
|
-
|
|
272
|
+
step_timestamps_info = [
|
|
273
|
+
[info[0], float(info[1]) / 100, float(info[2]) / 100]
|
|
274
|
+
for info in csv_reader if info[0].isdigit()
|
|
275
|
+
]
|
|
274
276
|
|
|
275
277
|
return [communication_operators_names, step_timestamps_info]
|
|
276
278
|
|
|
277
279
|
def _get_communication_operator_name_mapping_info(self):
|
|
278
280
|
"""Get the name of communication operators mapping between hccl and step trace."""
|
|
279
281
|
dir_path = self._validate_dir_path(self._source_dir)
|
|
280
|
-
# The name of the operator in hccl is like
|
|
282
|
+
# The name of the operator in hccl is like: operatorName_{Ordered_number}_xx_xx.
|
|
281
283
|
operators_names_in_hccl = [entry.name for entry in os.scandir(dir_path) if entry.is_dir()]
|
|
282
284
|
operators_names_in_hccl_set = set({i.split('_')[0] for i in operators_names_in_hccl})
|
|
283
285
|
op_names_in_hccl_dic = dict()
|
|
@@ -294,8 +296,10 @@ class HcclParser:
|
|
|
294
296
|
for op_name in operators_names_in_step_trace})
|
|
295
297
|
op_names_in_step_trace_dic = dict()
|
|
296
298
|
for item in op_names_in_step_trace_set:
|
|
297
|
-
op_names_in_step_trace_dic[item] = [
|
|
298
|
-
|
|
299
|
+
op_names_in_step_trace_dic[item] = [
|
|
300
|
+
op_name for op_name in operators_names_in_step_trace
|
|
301
|
+
if op_name.split('/')[-1].split('-')[0].split('_')[-1] == item
|
|
302
|
+
]
|
|
299
303
|
|
|
300
304
|
communication_operator_mapping_info = dict()
|
|
301
305
|
for hccl_key, hccl_value in op_names_in_hccl_dic.items():
|
|
@@ -548,9 +552,9 @@ class HcclParser:
|
|
|
548
552
|
"""Validate file path."""
|
|
549
553
|
try:
|
|
550
554
|
file_path = validate_and_normalize_path(file_path)
|
|
551
|
-
except RuntimeError:
|
|
555
|
+
except RuntimeError as err:
|
|
552
556
|
logger.warning('file path is invalid.')
|
|
553
|
-
raise ProfilerPathErrorException('file path is invalid.')
|
|
557
|
+
raise ProfilerPathErrorException('file path is invalid.') from err
|
|
554
558
|
if not os.path.isfile(file_path):
|
|
555
559
|
logger.warning('The file <%s> not found.', file_path)
|
|
556
560
|
raise ProfilerFileNotFoundException(file_path)
|
|
@@ -560,9 +564,9 @@ class HcclParser:
|
|
|
560
564
|
"""Validate dir path."""
|
|
561
565
|
try:
|
|
562
566
|
dir_path = validate_and_normalize_path(dir_path)
|
|
563
|
-
except RuntimeError:
|
|
567
|
+
except RuntimeError as err:
|
|
564
568
|
logger.warning('dir path is invalid.')
|
|
565
|
-
raise ProfilerPathErrorException('dir path is invalid.')
|
|
569
|
+
raise ProfilerPathErrorException('dir path is invalid.') from err
|
|
566
570
|
if not os.path.isdir(dir_path):
|
|
567
571
|
logger.warning('The dir <%s> not found.', dir_path)
|
|
568
572
|
raise ProfilerDirNotFoundException(dir_path)
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
import csv
|
|
17
17
|
import json
|
|
18
18
|
import os
|
|
19
|
+
import stat
|
|
19
20
|
from decimal import Decimal
|
|
20
21
|
from enum import Enum
|
|
21
22
|
import sys
|
|
@@ -39,8 +40,10 @@ class Integrator:
|
|
|
39
40
|
_file_name_framework = 'framework_raw_{}.csv'
|
|
40
41
|
_header_aicore_type = ['op_type', 'total_time', 'execution_frequency', 'percent']
|
|
41
42
|
_header_aicore_detail = ['full_op_name', 'execution_time', 'execution_frequency']
|
|
42
|
-
_header_aicpu = [
|
|
43
|
-
|
|
43
|
+
_header_aicpu = [
|
|
44
|
+
'serial_number', 'op_type', 'total_time', 'dispatch_time',
|
|
45
|
+
'execution_time', 'run_start', 'run_end'
|
|
46
|
+
]
|
|
44
47
|
|
|
45
48
|
_file_name_aicore_type_time = 'aicore_intermediate_{}_type.csv'
|
|
46
49
|
_file_name_aicore_detail_info = 'aicore_intermediate_{}_detail.csv'
|
|
@@ -153,7 +156,7 @@ class Integrator:
|
|
|
153
156
|
op_type_time[1] += op_info[1]
|
|
154
157
|
op_type_file_name = 'aicore_intermediate_' + self._device_id + '_type.csv'
|
|
155
158
|
op_type_file_path = os.path.join(self._profiling_dir, op_type_file_name)
|
|
156
|
-
with open(op_type_file_path, 'w') as type_file:
|
|
159
|
+
with os.fdopen(os.open(op_type_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as type_file:
|
|
157
160
|
csv_writer = csv.writer(type_file)
|
|
158
161
|
csv_writer.writerow(self._header_aicore_type)
|
|
159
162
|
|
|
@@ -164,6 +167,7 @@ class Integrator:
|
|
|
164
167
|
round((op_type_time_info[0] / total_time) * 100, 2)
|
|
165
168
|
]
|
|
166
169
|
csv_writer.writerow(type_info)
|
|
170
|
+
os.chmod(op_type_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
167
171
|
|
|
168
172
|
def _parse_aicore_detail_time(self):
|
|
169
173
|
"""Parse the parsed AICORE operator time file."""
|
|
@@ -188,8 +192,8 @@ class Integrator:
|
|
|
188
192
|
_ = src_file.readline()
|
|
189
193
|
else:
|
|
190
194
|
return
|
|
191
|
-
|
|
192
|
-
|
|
195
|
+
with os.fdopen(os.open(op_detail_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600),
|
|
196
|
+
'w') as detail_file:
|
|
193
197
|
csv_writer = csv.writer(detail_file)
|
|
194
198
|
csv_writer.writerow(self._header_aicore_detail)
|
|
195
199
|
|
|
@@ -204,6 +208,7 @@ class Integrator:
|
|
|
204
208
|
continue
|
|
205
209
|
self._op_time_cache[op_infos[0]] = [Decimal(op_infos[1]), int(op_infos[3])]
|
|
206
210
|
csv_writer.writerow([op_infos[0], op_infos[1], op_infos[3]])
|
|
211
|
+
os.chmod(op_detail_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
207
212
|
|
|
208
213
|
def _parse_aicpu_time(self):
|
|
209
214
|
"""Parse the parsed AICPU operator time file."""
|
|
@@ -221,7 +226,7 @@ class Integrator:
|
|
|
221
226
|
row = src_file.readline()
|
|
222
227
|
if not row.startswith('serial_number'):
|
|
223
228
|
return
|
|
224
|
-
with open(save_file_path, 'w') as save_file:
|
|
229
|
+
with os.fdopen(os.open(save_file_path, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600), 'w') as save_file:
|
|
225
230
|
csv_writer = csv.writer(save_file)
|
|
226
231
|
csv_writer.writerow(self._header_aicpu)
|
|
227
232
|
|
|
@@ -233,6 +238,7 @@ class Integrator:
|
|
|
233
238
|
if infos[0] == 'AI':
|
|
234
239
|
continue
|
|
235
240
|
csv_writer.writerow(infos)
|
|
241
|
+
os.chmod(save_file_path, stat.S_IREAD | stat.S_IWRITE)
|
|
236
242
|
|
|
237
243
|
def _aicore_data_load(self):
|
|
238
244
|
"""Load data according to the parsed AICORE operator types file."""
|
|
@@ -276,7 +282,8 @@ class Integrator:
|
|
|
276
282
|
_ = next(csv_reader)
|
|
277
283
|
for info in csv_reader:
|
|
278
284
|
framework_infos[info[3]] = [
|
|
279
|
-
info[3], info[4], info[5], info[6], json.loads(info[7]) if info[7] else None
|
|
285
|
+
info[3], info[4], info[5], info[6], json.loads(info[7]) if info[7] else None
|
|
286
|
+
]
|
|
280
287
|
|
|
281
288
|
with open(op_detail_file_path, 'r') as file:
|
|
282
289
|
csv_reader = csv.reader(file)
|
|
@@ -374,8 +381,10 @@ class Integrator:
|
|
|
374
381
|
factor = 1e5 # convert time unit from 10ns to 1ms
|
|
375
382
|
reduce_pid = 10000
|
|
376
383
|
reduce_info = []
|
|
377
|
-
reduce_fields = [
|
|
378
|
-
|
|
384
|
+
reduce_fields = [
|
|
385
|
+
field_name for field_name in self._column
|
|
386
|
+
if field_name.startswith('stream_') and not field_name.endswith('point')
|
|
387
|
+
]
|
|
379
388
|
for reduce_field in reduce_fields:
|
|
380
389
|
reduce_start = row_info_dict.get(reduce_field + '_start_point')
|
|
381
390
|
reduce_start = reduce_start / factor \
|
|
@@ -388,8 +397,10 @@ class Integrator:
|
|
|
388
397
|
cur_stream_id = reduce_field.split('_', 3)[1]
|
|
389
398
|
if reduce_field.split('_', 2)[1] == 'ops':
|
|
390
399
|
cur_stream_id = reduce_field.split('_', 3)[2]
|
|
391
|
-
reduce_meta = [
|
|
392
|
-
|
|
400
|
+
reduce_meta = [
|
|
401
|
+
reduce_field, int(cur_stream_id), reduce_start,
|
|
402
|
+
reduce_duration, reduce_pid
|
|
403
|
+
]
|
|
393
404
|
reduce_info.append(reduce_meta)
|
|
394
405
|
|
|
395
406
|
return reduce_info
|