mindspore 2.1.0__cp37-cp37m-manylinux1_x86_64.whl → 2.2.10__cp37-cp37m-manylinux1_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +46 -19
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/ascend_profilier/__init__.py +0 -0
- mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
- mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
- mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
- mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +98 -274
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
- mindspore/_akg/akg/utils/util.py +38 -0
- mindspore/_c_dataengine.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +23 -29
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +4 -11
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +12 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +61 -71
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +74 -104
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +25 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-37m-x86_64-linux-gnu.so +0 -0
- mindspore/amp.py +47 -11
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +13 -0
- mindspore/common/api.py +174 -259
- mindspore/common/auto_dynamic_shape.py +494 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +243 -165
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +14 -3
- mindspore/config/super_bar_config.json +4 -2
- mindspore/context.py +152 -61
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +28 -5
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +16 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +17 -14
- mindspore/include/api/status.h +8 -3
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/include/mindapi/base/types.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
- mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8928 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.10 +0 -0
- mindspore/lib/plugin/gpu/libcuda_ops.so.11 +0 -0
- mindspore/lib/plugin/gpu10.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu10.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.1/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.1/libnccl.so.2 +0 -0
- mindspore/lib/plugin/gpu11.6/libakg.so +0 -0
- mindspore/lib/plugin/gpu11.6/libnccl.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.10.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.1 +0 -0
- mindspore/lib/plugin/libmindspore_gpu.so.11.6 +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +313 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +22 -30
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +323 -204
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +141 -88
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +16 -25
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +27 -22
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +84 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/numpy/utils_const.py +5 -5
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +6 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +174 -193
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -9
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +568 -260
- mindspore/ops/function/random_func.py +88 -57
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +244 -25
- mindspore/ops/operations/__init__.py +28 -19
- mindspore/ops/operations/_grad_ops.py +72 -7
- mindspore/ops/operations/_inner_ops.py +350 -17
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +68 -282
- mindspore/ops/operations/comm_ops.py +107 -59
- mindspore/ops/operations/custom_ops.py +94 -70
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +189 -141
- mindspore/ops/operations/nn_ops.py +794 -489
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +12 -5
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +18 -13
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +19 -12
- mindspore/parallel/shard.py +21 -14
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +4 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
- mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
- mindspore/profiler/parser/ascend_op_generator.py +6 -6
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
- mindspore/profiler/parser/base_timeline_generator.py +10 -8
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +38 -22
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +21 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +179 -89
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +523 -578
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +6 -4
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +541 -0
- mindspore/scipy/linalg.py +1 -1
- mindspore/scipy/optimize/minimize.py +7 -3
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +15 -8
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +84 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +187 -47
- mindspore/train/serialization.py +487 -161
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +37 -17
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/METADATA +6 -7
- {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/RECORD +488 -528
- {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.10.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# coding: utf-8
|
|
3
|
-
# Copyright 2019-
|
|
3
|
+
# Copyright 2019-2023 Huawei Technologies Co., Ltd
|
|
4
4
|
#
|
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
6
|
# you may not use this file except in compliance with the License.
|
|
@@ -35,8 +35,6 @@ import numpy as np
|
|
|
35
35
|
|
|
36
36
|
import akg
|
|
37
37
|
import akg.tvm
|
|
38
|
-
from akg.tvm import autotvm
|
|
39
|
-
from akg.tvm import rpc
|
|
40
38
|
from akg.tvm import _api_internal
|
|
41
39
|
from akg.build_module import help_tiling_level
|
|
42
40
|
from akg.utils import result_analysis as ra_util
|
|
@@ -45,16 +43,16 @@ from akg.utils import custom_tiling as ct_util
|
|
|
45
43
|
from akg.utils import validation_check as vc_util
|
|
46
44
|
from akg.utils.dsl_create import TensorUtils
|
|
47
45
|
from akg.utils.util import parse_kwargs
|
|
48
|
-
from akg.backend.parsing_profiling_data import HWTSLogParser
|
|
46
|
+
from akg.backend.parsing_profiling_data import HWTSLogParser, max_time_consume
|
|
49
47
|
from akg.backend.parsing_profiling_data import validate_and_normalize_path
|
|
50
48
|
from akg.backend import aic_model
|
|
51
|
-
|
|
49
|
+
from .ascend_profilier.cann_file_parser import CANNFileParser
|
|
50
|
+
from .ascend_profilier.op_summary_parser import OpSummaryParser
|
|
51
|
+
from .ascend_profilier.op_summary_headers import OpSummaryHeaders
|
|
52
52
|
sh = logging.StreamHandler(sys.stdout)
|
|
53
53
|
logging.getLogger().addHandler(sh)
|
|
54
54
|
logging.getLogger().setLevel(logging.INFO)
|
|
55
55
|
|
|
56
|
-
rpc_machine = {}
|
|
57
|
-
rpc_lb = {}
|
|
58
56
|
|
|
59
57
|
PERFORMANCE_TEST_FILE = "PERFORMANCE_TEST_FILE"
|
|
60
58
|
BINDS = "binds"
|
|
@@ -173,212 +171,7 @@ def gen_name_kernel(kernel, dtype, shapes):
|
|
|
173
171
|
return res
|
|
174
172
|
|
|
175
173
|
|
|
176
|
-
def
|
|
177
|
-
"""
|
|
178
|
-
load rpc server host and port info.
|
|
179
|
-
|
|
180
|
-
Args:
|
|
181
|
-
mode (str): string of runtime choose, can set ca aic and rpc.
|
|
182
|
-
"""
|
|
183
|
-
env_dic = os.environ
|
|
184
|
-
if env_dic.get('RPC_HOST') and env_dic.get('RPC_PORT'):
|
|
185
|
-
return
|
|
186
|
-
|
|
187
|
-
if mode == 'rpc_cloud':
|
|
188
|
-
logging.error("runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
189
|
-
raise Exception("ERROR:runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
190
|
-
|
|
191
|
-
rpc_server_info_config = env_dic.get('RPC_SERVER_INFO_FILE')
|
|
192
|
-
if not rpc_server_info_config:
|
|
193
|
-
logging.error("runtime_mode=rpc must set RPC_SERVER_INFO_FILE for rpc server info config")
|
|
194
|
-
raise Exception("ERROR:runtime_mode=rpc must set RPC_SERVER_INFO_FILE for rpc server info config")
|
|
195
|
-
|
|
196
|
-
# load rpc server host and port info from local file.
|
|
197
|
-
import json
|
|
198
|
-
with open(rpc_server_info_config, 'r') as f:
|
|
199
|
-
info = json.load(f)
|
|
200
|
-
|
|
201
|
-
for i in info:
|
|
202
|
-
rpc_machine[i] = info[i]
|
|
203
|
-
rpc_lb[i] = 0.0
|
|
204
|
-
return
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def dispatch(rank=0):
|
|
208
|
-
"""Function for lock waiting dispatch handle version 1."""
|
|
209
|
-
|
|
210
|
-
def _sort_by_value(d):
|
|
211
|
-
items = list(d.items())
|
|
212
|
-
random.shuffle(items)
|
|
213
|
-
items.sort(key=lambda x: x[1])
|
|
214
|
-
return list(item[0] for item in items)
|
|
215
|
-
|
|
216
|
-
for k, v in rpc_lb.items():
|
|
217
|
-
logging.info("######rpc_lb[%s]=%f", rpc_machine.get(k)[0], v)
|
|
218
|
-
lb_list = _sort_by_value(rpc_lb)
|
|
219
|
-
if len(lb_list) > rank:
|
|
220
|
-
return lb_list[rank]
|
|
221
|
-
return lb_list[len(lb_list) - 1]
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def commit(remote, weight):
|
|
225
|
-
rpc_lb[remote] = weight
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
@func_time_required
|
|
229
|
-
def mod_launch_rpc_worker(mod, args, outputs, host, port, tuning=False):
|
|
230
|
-
"""internal RPC worker, should be called by mod_launch_rpc_thread."""
|
|
231
|
-
logging.info("%s:====start connect to rpc ip: %s, rpc port: %d ",
|
|
232
|
-
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), host, port)
|
|
233
|
-
remote = rpc.connect(host, port, session_timeout=300)
|
|
234
|
-
logging.info("%s:====connect to rpc ip: %s, rpc port: %d finished ",
|
|
235
|
-
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), host, port)
|
|
236
|
-
uuid_str = uuid.uuid4().hex
|
|
237
|
-
temp_file_name = "stackvm_%s.o" % uuid_str
|
|
238
|
-
mod.save(temp_file_name)
|
|
239
|
-
remote.upload(temp_file_name)
|
|
240
|
-
remote_mod = remote.load_module(temp_file_name)
|
|
241
|
-
ctx = remote.cce()
|
|
242
|
-
arg_list = []
|
|
243
|
-
for a in args:
|
|
244
|
-
arg_list.append(akg.tvm.nd.array(a, ctx))
|
|
245
|
-
start_time = timer()
|
|
246
|
-
remote_mod(*arg_list)
|
|
247
|
-
ctx.sync()
|
|
248
|
-
if os.path.exists(temp_file_name):
|
|
249
|
-
os.remove(temp_file_name)
|
|
250
|
-
out_list = []
|
|
251
|
-
for i in outputs:
|
|
252
|
-
out = arg_list[len(arg_list) + i if i < 0 else i].asnumpy()
|
|
253
|
-
out_list.append(out)
|
|
254
|
-
# this time measure is no accurate now, to be improved soon
|
|
255
|
-
t = timer() - start_time
|
|
256
|
-
if not tuning:
|
|
257
|
-
return out_list[0] if len(out_list) == 1 else tuple(out_list)
|
|
258
|
-
stat_info = {"run_time": t}
|
|
259
|
-
return out_list[0] if len(out_list) == 1 else tuple(out_list), stat_info
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def mod_launch_rpc_thread(mode, mod, args, outputs, results, need_retry, retry, tuning=False):
|
|
263
|
-
"""internal RPC thread, should be called by mod_launch_rpc_multithread."""
|
|
264
|
-
remoteevb = '0'
|
|
265
|
-
host = None
|
|
266
|
-
port = None
|
|
267
|
-
env_dic = os.environ
|
|
268
|
-
if env_dic.get('RPC_HOST') and env_dic.get('RPC_PORT'):
|
|
269
|
-
host = env_dic.get('RPC_HOST')
|
|
270
|
-
port = int(env_dic.get('RPC_PORT'))
|
|
271
|
-
else:
|
|
272
|
-
if mode == 'rpc_cloud':
|
|
273
|
-
logging.error("runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
274
|
-
raise Exception("ERROR:runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
275
|
-
remoteevb = dispatch(retry)
|
|
276
|
-
host = rpc_machine.get(remoteevb)[0]
|
|
277
|
-
port = rpc_machine.get(remoteevb)[1]
|
|
278
|
-
|
|
279
|
-
start_time = timer()
|
|
280
|
-
end_time = 0.0
|
|
281
|
-
logging.debug("rpc ip: %s, rpc port: %d", host, port)
|
|
282
|
-
try:
|
|
283
|
-
out_list = mod_launch_rpc_worker(mod, args, outputs, host, port, tuning=tuning)
|
|
284
|
-
end_time = timer()
|
|
285
|
-
t = end_time - start_time
|
|
286
|
-
if not env_dic.get('RPC_HOST'):
|
|
287
|
-
commit(remoteevb, 20 if t > 20 else t)
|
|
288
|
-
logging.info("===this round host is %s time is %f", host, (end_time - start_time))
|
|
289
|
-
results[retry] = out_list
|
|
290
|
-
except RuntimeError:
|
|
291
|
-
need_retry[retry] = True
|
|
292
|
-
end_time = timer()
|
|
293
|
-
logging.error("===Failed! this round host is %s time is %f", host, (end_time - start_time))
|
|
294
|
-
if not env_dic.get('RPC_HOST'):
|
|
295
|
-
commit(remoteevb, end_time - start_time + 20 * (retry + 1))
|
|
296
|
-
logging.error("rpc retry error: %d %s", retry, sys.exc_info())
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def _get_rpc_result(poll_count, threads, thread_index, poll_interval, need_retry, results, retried):
|
|
300
|
-
"""Get rpc run result."""
|
|
301
|
-
while poll_count > 0:
|
|
302
|
-
poll_count -= 1
|
|
303
|
-
# wait for the newly created thread, because it is most likely to complete first
|
|
304
|
-
threads[thread_index].join(poll_interval)
|
|
305
|
-
for poll_index in range(thread_index + 1):
|
|
306
|
-
if not threads[poll_index].is_alive() and not need_retry[poll_index]:
|
|
307
|
-
return True, results[poll_index]
|
|
308
|
-
if need_retry[poll_index] and not retried[poll_index]:
|
|
309
|
-
logging.error("Thread %d exit with error, spawn a new thread immediately", poll_index)
|
|
310
|
-
poll_count = 0
|
|
311
|
-
retried[poll_index] = True
|
|
312
|
-
return False, False
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
def mod_launch_rpc(mode, mod, args, outputs, tuning=False):
|
|
316
|
-
"""
|
|
317
|
-
launch rpc or rpc_cloud module with retry.
|
|
318
|
-
|
|
319
|
-
Note:
|
|
320
|
-
To minimize waiting time of struggler RPC servers, we wait for a short timeout and spawn
|
|
321
|
-
a new thread after the timeout.
|
|
322
|
-
In normal case, RPC would complete before the short timeout, so, only one thread will be created.
|
|
323
|
-
When the RPC server is slow, we create multiple threads that run concurrently.
|
|
324
|
-
We wait for the first thread that successfully completes its work and return the result.
|
|
325
|
-
If a thread fails (an exception is raised), we spawn a new thread to retry.
|
|
326
|
-
Newly spawned threads will use different RPC servers.
|
|
327
|
-
We bound the maximum number of threads, i.e. maximum number of retries.
|
|
328
|
-
"""
|
|
329
|
-
max_num_threads = 5
|
|
330
|
-
|
|
331
|
-
import operator
|
|
332
|
-
arg_filter = filter(lambda x: isinstance(x, np.ndarray), args)
|
|
333
|
-
arg_tensor = list(arg_filter)
|
|
334
|
-
tensor_size = reduce(operator.add, (reduce(operator.mul, arg.shape) for arg in arg_tensor))
|
|
335
|
-
expected_upload_speed = 5e6
|
|
336
|
-
expected_upload_time = int(tensor_size / expected_upload_speed)
|
|
337
|
-
|
|
338
|
-
timeout_before_spawning_new_thread = 200 + expected_upload_time
|
|
339
|
-
poll_interval = 1
|
|
340
|
-
thread_timeout = 400 + expected_upload_time * 3
|
|
341
|
-
|
|
342
|
-
load_rpc_server_info(mode)
|
|
343
|
-
|
|
344
|
-
threads = [None] * max_num_threads
|
|
345
|
-
results = [None] * max_num_threads
|
|
346
|
-
need_retry = [None] * max_num_threads
|
|
347
|
-
retried = [False] * max_num_threads
|
|
348
|
-
for thread_index in range(max_num_threads):
|
|
349
|
-
if thread_index > 0:
|
|
350
|
-
logging.error("Thread %d run for %d seconds, spawn a new thread to retry",
|
|
351
|
-
(thread_index - 1), timeout_before_spawning_new_thread)
|
|
352
|
-
threads[thread_index] = Thread(target=mod_launch_rpc_thread,
|
|
353
|
-
args=(mode, mod, args, outputs, results, need_retry, thread_index, tuning))
|
|
354
|
-
# daemonize the thread to prevent long running threads from hanging the whole process
|
|
355
|
-
threads[thread_index].daemon = True
|
|
356
|
-
threads[thread_index].start()
|
|
357
|
-
poll_count = timeout_before_spawning_new_thread // poll_interval
|
|
358
|
-
has_res, res = _get_rpc_result(poll_count, threads, thread_index, poll_interval, need_retry, results, retried)
|
|
359
|
-
if has_res:
|
|
360
|
-
return res
|
|
361
|
-
|
|
362
|
-
logging.error("All %d threads are created, poll the threads until the first one exits normally, \
|
|
363
|
-
or all threads exit abnormally or timeout", max_num_threads)
|
|
364
|
-
poll_count = thread_timeout // poll_interval
|
|
365
|
-
for _ in range(poll_count):
|
|
366
|
-
threads[max_num_threads - 1].join(poll_interval)
|
|
367
|
-
exit_thread_count = 0
|
|
368
|
-
for poll_index in range(max_num_threads):
|
|
369
|
-
if not threads[poll_index].is_alive() and not need_retry[poll_index]:
|
|
370
|
-
return results[poll_index]
|
|
371
|
-
if not threads[poll_index].is_alive():
|
|
372
|
-
exit_thread_count += 1
|
|
373
|
-
if exit_thread_count == max_num_threads:
|
|
374
|
-
logging.error("All %d threads exit abnormally", max_num_threads)
|
|
375
|
-
return None
|
|
376
|
-
|
|
377
|
-
logging.error("All %d threads timeout", max_num_threads)
|
|
378
|
-
return None
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
|
|
174
|
+
def profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=None):
|
|
382
175
|
"""
|
|
383
176
|
Function for collecting cycle data from device.
|
|
384
177
|
|
|
@@ -389,23 +182,45 @@ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
|
|
|
389
182
|
tuning: tuning model.
|
|
390
183
|
device_id: device_id on device.
|
|
391
184
|
"""
|
|
392
|
-
akg.tvm.get_global_func("ascend_start_profiling")(
|
|
185
|
+
akg.tvm.get_global_func("ascend_start_profiling")(kernel_name)
|
|
393
186
|
time_before_launch = time.time()
|
|
394
187
|
output_data = ascend_run(kernel_name, args, outputs, device_id)
|
|
395
188
|
akg.tvm.get_global_func("ascend_stop_profiling")()
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
189
|
+
cycle = 0
|
|
190
|
+
if arch is not None and "910B" in arch:
|
|
191
|
+
# for ascend910B profiling
|
|
192
|
+
cycle = profiling_analyse_910B(time_before_launch)
|
|
193
|
+
else:
|
|
194
|
+
cycle = profiling_analyse(device_id, time_before_launch)
|
|
195
|
+
logging.info('=====Task Duration(us)==============================')
|
|
399
196
|
if cycle != PROF_ERROR_CODE:
|
|
400
197
|
logging.info(cycle)
|
|
401
198
|
else:
|
|
402
|
-
logging.error("OOPS, can't correctly
|
|
199
|
+
logging.error("OOPS, can't correctly Task Duration!")
|
|
403
200
|
TestUtils.record_cycle(cycle)
|
|
404
|
-
logging.info('=====
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
201
|
+
logging.info('=====Task Duration(us)==============================')
|
|
202
|
+
return output_data, {'run_time': cycle}
|
|
203
|
+
|
|
204
|
+
def profiling_analyse_910B(time_before_launch):
|
|
205
|
+
public_path = os.getenv('PROFILING_DIR')
|
|
206
|
+
if public_path is None:
|
|
207
|
+
raise RuntimeError("Environment PROFILING_DIR not set!")
|
|
208
|
+
public_path = validate_and_normalize_path(public_path)
|
|
209
|
+
CANNFileParser(public_path).export_cann_profiling()
|
|
210
|
+
cann_file_parser = OpSummaryParser(public_path)
|
|
211
|
+
profiler_file = cann_file_parser._profiler_path
|
|
212
|
+
logging.debug("prof file is: %s", os.path.basename(profiler_file))
|
|
213
|
+
file_create_time = os.path.getctime(profiler_file)
|
|
214
|
+
if file_create_time < time_before_launch:
|
|
215
|
+
raise RuntimeError("The PROF file is too old")
|
|
216
|
+
datas:dict = cann_file_parser.generate_op_summary_data()
|
|
217
|
+
task_duration = float(datas.get(OpSummaryHeaders.TASK_DURATION,max_time_consume))
|
|
218
|
+
# # aic_total_cycles means ai core cycle
|
|
219
|
+
# # aiv_total_cycles means ai vector cycle
|
|
220
|
+
# aiv_total_cycle = int(datas.get(OpSummaryHeaders.AIV_TOTAL_CYCLES,max_time_consume))
|
|
221
|
+
# aic_total_cycle = int(datas.get(OpSummaryHeaders.AIC_TOTAL_CYCLES,max_time_consume))
|
|
222
|
+
# return aiv_total_cycle+aic_total_cycle
|
|
223
|
+
return task_duration
|
|
409
224
|
|
|
410
225
|
def profiling_analyse(device_id, time_before_launch):
|
|
411
226
|
"""analyse profiling."""
|
|
@@ -559,12 +374,12 @@ def get_kernel_name_from_mod(mod):
|
|
|
559
374
|
return kernel_name
|
|
560
375
|
|
|
561
376
|
|
|
562
|
-
def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1):
|
|
377
|
+
def mod_launch_ascend_profiling(mod, args, outputs=(-1,), tuning=False, device_id=-1, arch=None):
|
|
563
378
|
gc.collect()
|
|
564
379
|
if device_id == -1:
|
|
565
380
|
device_id = int(os.environ.get("DEVICE_ID", 0))
|
|
566
381
|
kernel_name = get_kernel_name_from_mod(mod)
|
|
567
|
-
return profiling_mode_run(kernel_name, args, outputs, tuning, device_id)
|
|
382
|
+
return profiling_mode_run(kernel_name, args, outputs, tuning, device_id, arch=arch)
|
|
568
383
|
|
|
569
384
|
|
|
570
385
|
def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, device_id=-1, repeat_time=400):
|
|
@@ -598,7 +413,7 @@ def mod_launch_default(mod, args, outputs=(-1,), target=CUDA, tuning=False, devi
|
|
|
598
413
|
|
|
599
414
|
|
|
600
415
|
@func_time_required
|
|
601
|
-
def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400):
|
|
416
|
+
def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None, repeat_time=400, arch=None):
|
|
602
417
|
"""
|
|
603
418
|
unified run CCE kernel api.
|
|
604
419
|
|
|
@@ -609,7 +424,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
609
424
|
tuning (bool): tuning model.
|
|
610
425
|
device_id: device_id on device.
|
|
611
426
|
expect: when mode in ["compile_cloud", "compile_mini"], return it.
|
|
612
|
-
|
|
427
|
+
arch: Ascend arch type
|
|
613
428
|
Returns:
|
|
614
429
|
output numpy array, or tuple of numpy array if multi-output.
|
|
615
430
|
"""
|
|
@@ -618,6 +433,20 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
618
433
|
if device_id == -1:
|
|
619
434
|
device_id = int(os.environ.get("DEVICE_ID", 0))
|
|
620
435
|
|
|
436
|
+
# npu-inference process
|
|
437
|
+
if isinstance(mod, str):
|
|
438
|
+
kernel_name = mod
|
|
439
|
+
run_func = ascend_run
|
|
440
|
+
run_args = [kernel_name, args, outputs, device_id]
|
|
441
|
+
if os.environ.get("PROFILING_MODE") == "true":
|
|
442
|
+
run_func = profiling_mode_run
|
|
443
|
+
run_args = [kernel_name, args, outputs, tuning, device_id, arch]
|
|
444
|
+
if os.environ.get("PROFILING_DIR", None) is None:
|
|
445
|
+
os.environ["PROFILING_DIR"] = "."
|
|
446
|
+
logging.info("[RUNTIME_WARNING] In profiling mode, while profiling dir is not set!Set to current dir by default.")
|
|
447
|
+
output = run_func(*run_args)
|
|
448
|
+
return output
|
|
449
|
+
|
|
621
450
|
module = mod if mod.type_key == LLVM else mod.imported_modules[0]
|
|
622
451
|
target = module.type_key
|
|
623
452
|
if target == LLVM or target == CUDA:
|
|
@@ -635,8 +464,6 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
635
464
|
return output
|
|
636
465
|
ra_util.get_ticks(stat_info)
|
|
637
466
|
return output, stat_info
|
|
638
|
-
if mode in ('rpc', 'rpc_cloud'):
|
|
639
|
-
return mod_launch_rpc(mode, mod, args, outputs, tuning)
|
|
640
467
|
|
|
641
468
|
# The air_cloud is the current default mode and needs to be modified in the future
|
|
642
469
|
if mode == 'air_cloud':
|
|
@@ -658,7 +485,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
658
485
|
mod(*tvm_array)
|
|
659
486
|
return tvm_array[-1].asnumpy()
|
|
660
487
|
|
|
661
|
-
raise ValueError("mode must be aic,
|
|
488
|
+
raise ValueError("mode must be aic, aic_cloud, ca, compile_cloud, compile_mini, cpu, csim, ccesim or cdiff")
|
|
662
489
|
|
|
663
490
|
|
|
664
491
|
def _extract_shape_dtype(input_shapes, input_types):
|
|
@@ -1004,44 +831,6 @@ def _create_gpu_mod(s, op_var, target, shape_var, kernel_name, attrs, polyhedral
|
|
|
1004
831
|
return mod
|
|
1005
832
|
|
|
1006
833
|
|
|
1007
|
-
def _create_gpu_tuning_mod(sch_tmpl, shape_var, kernel_name, attrs, binds):
|
|
1008
|
-
"""Create tuning module on gpu."""
|
|
1009
|
-
@autotvm.template
|
|
1010
|
-
def _autotune_template():
|
|
1011
|
-
s = sch_tmpl['schedule'](sch_tmpl['output'])
|
|
1012
|
-
return s, op_var
|
|
1013
|
-
|
|
1014
|
-
# create autotune task
|
|
1015
|
-
task = autotvm.task.create(_autotune_template, args=list(), target='cuda')
|
|
1016
|
-
print("task config: ", task.config_space)
|
|
1017
|
-
|
|
1018
|
-
# set measure_option
|
|
1019
|
-
measure_option = autotvm.measure_option(
|
|
1020
|
-
builder=autotvm.LocalBuilder(),
|
|
1021
|
-
runner=autotvm.LocalRunner(repeat=5, min_repeat_ms=150, timeout=4)
|
|
1022
|
-
)
|
|
1023
|
-
|
|
1024
|
-
# Begin tuning, log records to file `kernel_name.log`
|
|
1025
|
-
tuner = autotvm.tuner.RandomTuner(task)
|
|
1026
|
-
if not os.path.exists(kernel_name + '.log'):
|
|
1027
|
-
tuner.tune(n_trial=len(task.config_space),
|
|
1028
|
-
measure_option=measure_option,
|
|
1029
|
-
callbacks=[autotvm.callback.log_to_file(kernel_name + '.log')])
|
|
1030
|
-
|
|
1031
|
-
# query best config
|
|
1032
|
-
dispatch_context = autotvm.apply_history_best(kernel_name + '.log')
|
|
1033
|
-
best_config = dispatch_context.query(task.target, task.workload)
|
|
1034
|
-
print("\nBest config is:")
|
|
1035
|
-
print(best_config)
|
|
1036
|
-
|
|
1037
|
-
# apply best config
|
|
1038
|
-
with autotvm.apply_history_best(kernel_name + '.log'):
|
|
1039
|
-
s, op_var = _autotune_template()
|
|
1040
|
-
mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs,
|
|
1041
|
-
polyhedral=False, binds=binds)
|
|
1042
|
-
return mod
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
834
|
def create_gpu_mod(sch_tmpl, s, op_func, op_var, shape_var, kernel_name, attrs, polyhedral, binds, dump_ir, dump_code,
|
|
1046
835
|
tuning):
|
|
1047
836
|
"""
|
|
@@ -1079,7 +868,7 @@ def create_gpu_mod(sch_tmpl, s, op_func, op_var, shape_var, kernel_name, attrs,
|
|
|
1079
868
|
s = sch_tmpl['schedule'](sch_tmpl['output'])
|
|
1080
869
|
mod = _create_gpu_mod(s, op_var, "cuda", shape_var, kernel_name, attrs, False, binds, dump_ir)
|
|
1081
870
|
else:
|
|
1082
|
-
|
|
871
|
+
raise ValueError("Tuning is not supported.")
|
|
1083
872
|
else:
|
|
1084
873
|
mod = _create_gpu_mod(s, op_var, target, shape_var, kernel_name, attrs, polyhedral, binds, dump_ir)
|
|
1085
874
|
if dump_code:
|
|
@@ -1213,6 +1002,10 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
|
|
|
1213
1002
|
compute_func(s)
|
|
1214
1003
|
polyhedral = False
|
|
1215
1004
|
|
|
1005
|
+
if attrs.get("simple_mode"):
|
|
1006
|
+
attrs.pop("simple_mode")
|
|
1007
|
+
return s, inputs, output, attrs
|
|
1008
|
+
|
|
1216
1009
|
level = attrs.get("help_tiling") if attrs and "help_tiling" in attrs else None
|
|
1217
1010
|
if tuning or (level is not None and level > help_tiling_level.get('None')):
|
|
1218
1011
|
return gen_spaces_dim_key(op_func, args, s, op_var, kernel_name, attrs, polyhedral, tuning, target)
|
|
@@ -1231,10 +1024,11 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
|
|
|
1231
1024
|
polyhedral=polyhedral, binds=binds)
|
|
1232
1025
|
source_code = mod.get_source()
|
|
1233
1026
|
elif target_name == CCE:
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
polyhedral=polyhedral, binds=binds)
|
|
1027
|
+
mod = npu_op_build(s, op_var, shape_var, kernel_name, binds, attrs, dump_ir, polyhedral)
|
|
1028
|
+
if attrs.get("is_tbe_codegen"):
|
|
1237
1029
|
source_code = mod.imported_modules[0].get_source()
|
|
1030
|
+
else:
|
|
1031
|
+
return mod
|
|
1238
1032
|
|
|
1239
1033
|
if log_code:
|
|
1240
1034
|
logging.debug("#################code####################")
|
|
@@ -1244,11 +1038,41 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
|
|
|
1244
1038
|
return mod
|
|
1245
1039
|
|
|
1246
1040
|
|
|
1041
|
+
def npu_op_build(s, op_var, shape_var, kernel_name="", binds=None, attrs=None,
|
|
1042
|
+
dump_ir=True, polyhedral=True):
|
|
1043
|
+
if attrs.get("is_tbe_codegen"):
|
|
1044
|
+
# use akg + tbe compile
|
|
1045
|
+
from akg.tvm import build_module
|
|
1046
|
+
from akg.python.akg.utils.tbe_codegen_utils import build_tbe_codegen
|
|
1047
|
+
if attrs is None:
|
|
1048
|
+
attrs = {}
|
|
1049
|
+
attrs.update({"is_tbe_codegen":True})
|
|
1050
|
+
binds, arg_list = build_module.get_binds(op_var)
|
|
1051
|
+
stmt = akg.lower(s, op_var, shape_params=shape_var, name=kernel_name, binds=binds, attrs=attrs,
|
|
1052
|
+
simple_mode=True, polyhedral=polyhedral, tuning=False, target="cce")
|
|
1053
|
+
|
|
1054
|
+
json_str = akg.tvm.save_json(stmt, "0.8.0")
|
|
1055
|
+
|
|
1056
|
+
args_json = []
|
|
1057
|
+
for buf in enumerate(arg_list):
|
|
1058
|
+
args_json.append(akg.tvm.save_json(buf, "0.8.0"))
|
|
1059
|
+
|
|
1060
|
+
is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs)
|
|
1061
|
+
if not is_success:
|
|
1062
|
+
raise TypeError("npu_inference codegen failed.")
|
|
1063
|
+
return kernel_name
|
|
1064
|
+
else:
|
|
1065
|
+
# use the whole akg complie
|
|
1066
|
+
with akg.build_config(dump_pass_ir=dump_ir):
|
|
1067
|
+
mod = akg.build(s, op_var, CCE, shape_var, name=kernel_name, attrs=attrs,
|
|
1068
|
+
polyhedral=polyhedral, binds=binds)
|
|
1069
|
+
return mod
|
|
1070
|
+
|
|
1247
1071
|
def get_runtime_mode():
|
|
1248
1072
|
"""get runtime mode."""
|
|
1249
1073
|
env_dic = os.environ
|
|
1250
1074
|
if not env_dic.get('RUNTIME_MODE'):
|
|
1251
|
-
mode = '
|
|
1075
|
+
mode = 'aic_cloud'
|
|
1252
1076
|
else:
|
|
1253
1077
|
mode = env_dic.get('RUNTIME_MODE')
|
|
1254
1078
|
return mode
|
|
@@ -1265,7 +1089,7 @@ def get_profiling_mode():
|
|
|
1265
1089
|
def product_is_mini():
|
|
1266
1090
|
"""check whether in mini environment."""
|
|
1267
1091
|
mode = get_runtime_mode()
|
|
1268
|
-
if mode in ('
|
|
1092
|
+
if mode in ('air', 'aic', 'compile_mini'):
|
|
1269
1093
|
return True
|
|
1270
1094
|
return False
|
|
1271
1095
|
|
|
@@ -351,16 +351,8 @@ def _collect_inputs(input_desc):
|
|
|
351
351
|
return inputs
|
|
352
352
|
|
|
353
353
|
|
|
354
|
-
def _get_op_attr(op_name, attrs, attr_name):
|
|
355
|
-
"""Get op attr value."""
|
|
356
|
-
for attr in attrs:
|
|
357
|
-
if attr["name"] == attr_name:
|
|
358
|
-
return attr["value"]
|
|
359
|
-
raise ValueError("Can not find attr '{}' in op {}".format(attr_name, op_name))
|
|
360
|
-
|
|
361
|
-
|
|
362
354
|
def precision_analyze(desc: dict, tensors):
|
|
363
|
-
exclude_op_list = ["Minimum", "Maximum", "Reshape", "ZerosLike", "Tile", "Select", "
|
|
355
|
+
exclude_op_list = ["Minimum", "Maximum", "Reshape", "ZerosLike", "Tile", "Select", "Greater",
|
|
364
356
|
"SelectGT", "SelectLT", "LessEqual", "Less", "EquivFormat", "ExpandDims", "Transpose",
|
|
365
357
|
"TransData", "BroadcastTo", "Assign"]
|
|
366
358
|
input_tensors = _collect_inputs(desc["input_desc"])
|
|
@@ -369,21 +361,9 @@ def precision_analyze(desc: dict, tensors):
|
|
|
369
361
|
graph = {}
|
|
370
362
|
ops = {} # recorder the operator that generates the current output
|
|
371
363
|
for op in desc["op_desc"]:
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
graph[output] = [inputs]
|
|
376
|
-
ops[output] = op["name"]
|
|
377
|
-
fake_output = _get_op_attr(op["name"], op["attr"], "fake_output")
|
|
378
|
-
if not fake_output:
|
|
379
|
-
output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
|
|
380
|
-
inputs = IOInfo(op["input_desc"][2][0]["tensor_name"], op["input_desc"][2][0]["data_type"])
|
|
381
|
-
graph[output] = [inputs]
|
|
382
|
-
ops[output] = op["name"]
|
|
383
|
-
else:
|
|
384
|
-
output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
|
|
385
|
-
graph[output] = _collect_inputs(op["input_desc"])
|
|
386
|
-
ops[output] = op["name"]
|
|
364
|
+
output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
|
|
365
|
+
graph[output] = _collect_inputs(op["input_desc"])
|
|
366
|
+
ops[output] = op["name"]
|
|
387
367
|
|
|
388
368
|
def _precision_reduce(x: IOInfo):
|
|
389
369
|
if x in input_tensors:
|