mindspore 2.1.0__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +26 -32
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +12 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +61 -71
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +72 -95
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +47 -11
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +13 -0
- mindspore/common/api.py +173 -258
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +240 -145
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +13 -2
- mindspore/config/super_bar_config.json +4 -2
- mindspore/context.py +143 -59
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +28 -5
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +11 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +59 -66
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +0 -14
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/include/mindapi/base/types.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +316 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +21 -28
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +310 -207
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +82 -41
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +13 -18
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +22 -17
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +78 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -2
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +10 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +4 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +273 -72
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +40 -2
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +167 -189
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -8
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +470 -251
- mindspore/ops/function/random_func.py +86 -56
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +235 -19
- mindspore/ops/operations/__init__.py +25 -17
- mindspore/ops/operations/_grad_ops.py +52 -7
- mindspore/ops/operations/_inner_ops.py +213 -12
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +64 -280
- mindspore/ops/operations/comm_ops.py +105 -57
- mindspore/ops/operations/custom_ops.py +10 -3
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/math_ops.py +185 -138
- mindspore/ops/operations/nn_ops.py +716 -492
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +14 -12
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +6 -10
- mindspore/parallel/shard.py +4 -4
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +17 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +104 -252
- mindspore/profiler/parser/ascend_msprof_generator.py +8 -8
- mindspore/profiler/parser/ascend_op_generator.py +5 -5
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +9 -6
- mindspore/profiler/parser/base_timeline_generator.py +9 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +14 -10
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +37 -21
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +2 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +139 -71
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +525 -577
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +2 -2
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/linalg.py +1 -1
- mindspore/scipy/optimize/minimize.py +7 -3
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +14 -7
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +83 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +185 -45
- mindspore/train/serialization.py +390 -150
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +14 -10
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/METADATA +6 -7
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/RECORD +447 -507
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# coding: utf-8
|
|
3
|
-
# Copyright 2019-
|
|
3
|
+
# Copyright 2019-2023 Huawei Technologies Co., Ltd
|
|
4
4
|
#
|
|
5
5
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
6
|
# you may not use this file except in compliance with the License.
|
|
@@ -35,8 +35,6 @@ import numpy as np
|
|
|
35
35
|
|
|
36
36
|
import akg
|
|
37
37
|
import akg.tvm
|
|
38
|
-
from akg.tvm import autotvm
|
|
39
|
-
from akg.tvm import rpc
|
|
40
38
|
from akg.tvm import _api_internal
|
|
41
39
|
from akg.build_module import help_tiling_level
|
|
42
40
|
from akg.utils import result_analysis as ra_util
|
|
@@ -53,8 +51,6 @@ sh = logging.StreamHandler(sys.stdout)
|
|
|
53
51
|
logging.getLogger().addHandler(sh)
|
|
54
52
|
logging.getLogger().setLevel(logging.INFO)
|
|
55
53
|
|
|
56
|
-
rpc_machine = {}
|
|
57
|
-
rpc_lb = {}
|
|
58
54
|
|
|
59
55
|
PERFORMANCE_TEST_FILE = "PERFORMANCE_TEST_FILE"
|
|
60
56
|
BINDS = "binds"
|
|
@@ -173,211 +169,6 @@ def gen_name_kernel(kernel, dtype, shapes):
|
|
|
173
169
|
return res
|
|
174
170
|
|
|
175
171
|
|
|
176
|
-
def load_rpc_server_info(mode):
|
|
177
|
-
"""
|
|
178
|
-
load rpc server host and port info.
|
|
179
|
-
|
|
180
|
-
Args:
|
|
181
|
-
mode (str): string of runtime choose, can set ca aic and rpc.
|
|
182
|
-
"""
|
|
183
|
-
env_dic = os.environ
|
|
184
|
-
if env_dic.get('RPC_HOST') and env_dic.get('RPC_PORT'):
|
|
185
|
-
return
|
|
186
|
-
|
|
187
|
-
if mode == 'rpc_cloud':
|
|
188
|
-
logging.error("runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
189
|
-
raise Exception("ERROR:runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
190
|
-
|
|
191
|
-
rpc_server_info_config = env_dic.get('RPC_SERVER_INFO_FILE')
|
|
192
|
-
if not rpc_server_info_config:
|
|
193
|
-
logging.error("runtime_mode=rpc must set RPC_SERVER_INFO_FILE for rpc server info config")
|
|
194
|
-
raise Exception("ERROR:runtime_mode=rpc must set RPC_SERVER_INFO_FILE for rpc server info config")
|
|
195
|
-
|
|
196
|
-
# load rpc server host and port info from local file.
|
|
197
|
-
import json
|
|
198
|
-
with open(rpc_server_info_config, 'r') as f:
|
|
199
|
-
info = json.load(f)
|
|
200
|
-
|
|
201
|
-
for i in info:
|
|
202
|
-
rpc_machine[i] = info[i]
|
|
203
|
-
rpc_lb[i] = 0.0
|
|
204
|
-
return
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def dispatch(rank=0):
|
|
208
|
-
"""Function for lock waiting dispatch handle version 1."""
|
|
209
|
-
|
|
210
|
-
def _sort_by_value(d):
|
|
211
|
-
items = list(d.items())
|
|
212
|
-
random.shuffle(items)
|
|
213
|
-
items.sort(key=lambda x: x[1])
|
|
214
|
-
return list(item[0] for item in items)
|
|
215
|
-
|
|
216
|
-
for k, v in rpc_lb.items():
|
|
217
|
-
logging.info("######rpc_lb[%s]=%f", rpc_machine.get(k)[0], v)
|
|
218
|
-
lb_list = _sort_by_value(rpc_lb)
|
|
219
|
-
if len(lb_list) > rank:
|
|
220
|
-
return lb_list[rank]
|
|
221
|
-
return lb_list[len(lb_list) - 1]
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
def commit(remote, weight):
|
|
225
|
-
rpc_lb[remote] = weight
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
@func_time_required
|
|
229
|
-
def mod_launch_rpc_worker(mod, args, outputs, host, port, tuning=False):
|
|
230
|
-
"""internal RPC worker, should be called by mod_launch_rpc_thread."""
|
|
231
|
-
logging.info("%s:====start connect to rpc ip: %s, rpc port: %d ",
|
|
232
|
-
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), host, port)
|
|
233
|
-
remote = rpc.connect(host, port, session_timeout=300)
|
|
234
|
-
logging.info("%s:====connect to rpc ip: %s, rpc port: %d finished ",
|
|
235
|
-
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), host, port)
|
|
236
|
-
uuid_str = uuid.uuid4().hex
|
|
237
|
-
temp_file_name = "stackvm_%s.o" % uuid_str
|
|
238
|
-
mod.save(temp_file_name)
|
|
239
|
-
remote.upload(temp_file_name)
|
|
240
|
-
remote_mod = remote.load_module(temp_file_name)
|
|
241
|
-
ctx = remote.cce()
|
|
242
|
-
arg_list = []
|
|
243
|
-
for a in args:
|
|
244
|
-
arg_list.append(akg.tvm.nd.array(a, ctx))
|
|
245
|
-
start_time = timer()
|
|
246
|
-
remote_mod(*arg_list)
|
|
247
|
-
ctx.sync()
|
|
248
|
-
if os.path.exists(temp_file_name):
|
|
249
|
-
os.remove(temp_file_name)
|
|
250
|
-
out_list = []
|
|
251
|
-
for i in outputs:
|
|
252
|
-
out = arg_list[len(arg_list) + i if i < 0 else i].asnumpy()
|
|
253
|
-
out_list.append(out)
|
|
254
|
-
# this time measure is no accurate now, to be improved soon
|
|
255
|
-
t = timer() - start_time
|
|
256
|
-
if not tuning:
|
|
257
|
-
return out_list[0] if len(out_list) == 1 else tuple(out_list)
|
|
258
|
-
stat_info = {"run_time": t}
|
|
259
|
-
return out_list[0] if len(out_list) == 1 else tuple(out_list), stat_info
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def mod_launch_rpc_thread(mode, mod, args, outputs, results, need_retry, retry, tuning=False):
|
|
263
|
-
"""internal RPC thread, should be called by mod_launch_rpc_multithread."""
|
|
264
|
-
remoteevb = '0'
|
|
265
|
-
host = None
|
|
266
|
-
port = None
|
|
267
|
-
env_dic = os.environ
|
|
268
|
-
if env_dic.get('RPC_HOST') and env_dic.get('RPC_PORT'):
|
|
269
|
-
host = env_dic.get('RPC_HOST')
|
|
270
|
-
port = int(env_dic.get('RPC_PORT'))
|
|
271
|
-
else:
|
|
272
|
-
if mode == 'rpc_cloud':
|
|
273
|
-
logging.error("runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
274
|
-
raise Exception("ERROR:runtime_mode=rpc_cloud must set 1980 host ip and port!")
|
|
275
|
-
remoteevb = dispatch(retry)
|
|
276
|
-
host = rpc_machine.get(remoteevb)[0]
|
|
277
|
-
port = rpc_machine.get(remoteevb)[1]
|
|
278
|
-
|
|
279
|
-
start_time = timer()
|
|
280
|
-
end_time = 0.0
|
|
281
|
-
logging.debug("rpc ip: %s, rpc port: %d", host, port)
|
|
282
|
-
try:
|
|
283
|
-
out_list = mod_launch_rpc_worker(mod, args, outputs, host, port, tuning=tuning)
|
|
284
|
-
end_time = timer()
|
|
285
|
-
t = end_time - start_time
|
|
286
|
-
if not env_dic.get('RPC_HOST'):
|
|
287
|
-
commit(remoteevb, 20 if t > 20 else t)
|
|
288
|
-
logging.info("===this round host is %s time is %f", host, (end_time - start_time))
|
|
289
|
-
results[retry] = out_list
|
|
290
|
-
except RuntimeError:
|
|
291
|
-
need_retry[retry] = True
|
|
292
|
-
end_time = timer()
|
|
293
|
-
logging.error("===Failed! this round host is %s time is %f", host, (end_time - start_time))
|
|
294
|
-
if not env_dic.get('RPC_HOST'):
|
|
295
|
-
commit(remoteevb, end_time - start_time + 20 * (retry + 1))
|
|
296
|
-
logging.error("rpc retry error: %d %s", retry, sys.exc_info())
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
def _get_rpc_result(poll_count, threads, thread_index, poll_interval, need_retry, results, retried):
|
|
300
|
-
"""Get rpc run result."""
|
|
301
|
-
while poll_count > 0:
|
|
302
|
-
poll_count -= 1
|
|
303
|
-
# wait for the newly created thread, because it is most likely to complete first
|
|
304
|
-
threads[thread_index].join(poll_interval)
|
|
305
|
-
for poll_index in range(thread_index + 1):
|
|
306
|
-
if not threads[poll_index].is_alive() and not need_retry[poll_index]:
|
|
307
|
-
return True, results[poll_index]
|
|
308
|
-
if need_retry[poll_index] and not retried[poll_index]:
|
|
309
|
-
logging.error("Thread %d exit with error, spawn a new thread immediately", poll_index)
|
|
310
|
-
poll_count = 0
|
|
311
|
-
retried[poll_index] = True
|
|
312
|
-
return False, False
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
def mod_launch_rpc(mode, mod, args, outputs, tuning=False):
|
|
316
|
-
"""
|
|
317
|
-
launch rpc or rpc_cloud module with retry.
|
|
318
|
-
|
|
319
|
-
Note:
|
|
320
|
-
To minimize waiting time of struggler RPC servers, we wait for a short timeout and spawn
|
|
321
|
-
a new thread after the timeout.
|
|
322
|
-
In normal case, RPC would complete before the short timeout, so, only one thread will be created.
|
|
323
|
-
When the RPC server is slow, we create multiple threads that run concurrently.
|
|
324
|
-
We wait for the first thread that successfully completes its work and return the result.
|
|
325
|
-
If a thread fails (an exception is raised), we spawn a new thread to retry.
|
|
326
|
-
Newly spawned threads will use different RPC servers.
|
|
327
|
-
We bound the maximum number of threads, i.e. maximum number of retries.
|
|
328
|
-
"""
|
|
329
|
-
max_num_threads = 5
|
|
330
|
-
|
|
331
|
-
import operator
|
|
332
|
-
arg_filter = filter(lambda x: isinstance(x, np.ndarray), args)
|
|
333
|
-
arg_tensor = list(arg_filter)
|
|
334
|
-
tensor_size = reduce(operator.add, (reduce(operator.mul, arg.shape) for arg in arg_tensor))
|
|
335
|
-
expected_upload_speed = 5e6
|
|
336
|
-
expected_upload_time = int(tensor_size / expected_upload_speed)
|
|
337
|
-
|
|
338
|
-
timeout_before_spawning_new_thread = 200 + expected_upload_time
|
|
339
|
-
poll_interval = 1
|
|
340
|
-
thread_timeout = 400 + expected_upload_time * 3
|
|
341
|
-
|
|
342
|
-
load_rpc_server_info(mode)
|
|
343
|
-
|
|
344
|
-
threads = [None] * max_num_threads
|
|
345
|
-
results = [None] * max_num_threads
|
|
346
|
-
need_retry = [None] * max_num_threads
|
|
347
|
-
retried = [False] * max_num_threads
|
|
348
|
-
for thread_index in range(max_num_threads):
|
|
349
|
-
if thread_index > 0:
|
|
350
|
-
logging.error("Thread %d run for %d seconds, spawn a new thread to retry",
|
|
351
|
-
(thread_index - 1), timeout_before_spawning_new_thread)
|
|
352
|
-
threads[thread_index] = Thread(target=mod_launch_rpc_thread,
|
|
353
|
-
args=(mode, mod, args, outputs, results, need_retry, thread_index, tuning))
|
|
354
|
-
# daemonize the thread to prevent long running threads from hanging the whole process
|
|
355
|
-
threads[thread_index].daemon = True
|
|
356
|
-
threads[thread_index].start()
|
|
357
|
-
poll_count = timeout_before_spawning_new_thread // poll_interval
|
|
358
|
-
has_res, res = _get_rpc_result(poll_count, threads, thread_index, poll_interval, need_retry, results, retried)
|
|
359
|
-
if has_res:
|
|
360
|
-
return res
|
|
361
|
-
|
|
362
|
-
logging.error("All %d threads are created, poll the threads until the first one exits normally, \
|
|
363
|
-
or all threads exit abnormally or timeout", max_num_threads)
|
|
364
|
-
poll_count = thread_timeout // poll_interval
|
|
365
|
-
for _ in range(poll_count):
|
|
366
|
-
threads[max_num_threads - 1].join(poll_interval)
|
|
367
|
-
exit_thread_count = 0
|
|
368
|
-
for poll_index in range(max_num_threads):
|
|
369
|
-
if not threads[poll_index].is_alive() and not need_retry[poll_index]:
|
|
370
|
-
return results[poll_index]
|
|
371
|
-
if not threads[poll_index].is_alive():
|
|
372
|
-
exit_thread_count += 1
|
|
373
|
-
if exit_thread_count == max_num_threads:
|
|
374
|
-
logging.error("All %d threads exit abnormally", max_num_threads)
|
|
375
|
-
return None
|
|
376
|
-
|
|
377
|
-
logging.error("All %d threads timeout", max_num_threads)
|
|
378
|
-
return None
|
|
379
|
-
|
|
380
|
-
|
|
381
172
|
def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
|
|
382
173
|
"""
|
|
383
174
|
Function for collecting cycle data from device.
|
|
@@ -402,9 +193,7 @@ def profiling_mode_run(kernel_name, args, outputs, tuning, device_id):
|
|
|
402
193
|
logging.error("OOPS, can't correctly parsing cycles!")
|
|
403
194
|
TestUtils.record_cycle(cycle)
|
|
404
195
|
logging.info('=====parsing cycles==============================')
|
|
405
|
-
|
|
406
|
-
return output_data, {'run_time': cycle}
|
|
407
|
-
return output_data
|
|
196
|
+
return output_data, {'run_time': cycle}
|
|
408
197
|
|
|
409
198
|
|
|
410
199
|
def profiling_analyse(device_id, time_before_launch):
|
|
@@ -618,6 +407,20 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
618
407
|
if device_id == -1:
|
|
619
408
|
device_id = int(os.environ.get("DEVICE_ID", 0))
|
|
620
409
|
|
|
410
|
+
# npu-inference process
|
|
411
|
+
if isinstance(mod, str):
|
|
412
|
+
kernel_name = mod
|
|
413
|
+
run_func = ascend_run
|
|
414
|
+
run_args = [kernel_name, args, outputs, device_id]
|
|
415
|
+
if os.environ.get("PROFILING_MODE") == "true":
|
|
416
|
+
run_func = profiling_mode_run
|
|
417
|
+
run_args = [kernel_name, args, outputs, tuning, device_id]
|
|
418
|
+
if os.environ.get("PROFILING_DIR", None) is None:
|
|
419
|
+
os.environ["PROFILING_DIR"] = "."
|
|
420
|
+
logging.info("[RUNTIME_WARNING] In profiling mode, while profiling dir is not set!Set to current dir by default.")
|
|
421
|
+
output = run_func(*run_args)
|
|
422
|
+
return output
|
|
423
|
+
|
|
621
424
|
module = mod if mod.type_key == LLVM else mod.imported_modules[0]
|
|
622
425
|
target = module.type_key
|
|
623
426
|
if target == LLVM or target == CUDA:
|
|
@@ -635,8 +438,6 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
635
438
|
return output
|
|
636
439
|
ra_util.get_ticks(stat_info)
|
|
637
440
|
return output, stat_info
|
|
638
|
-
if mode in ('rpc', 'rpc_cloud'):
|
|
639
|
-
return mod_launch_rpc(mode, mod, args, outputs, tuning)
|
|
640
441
|
|
|
641
442
|
# The air_cloud is the current default mode and needs to be modified in the future
|
|
642
443
|
if mode == 'air_cloud':
|
|
@@ -658,7 +459,7 @@ def mod_launch(mod, args, outputs=(-1,), tuning=False, device_id=-1, expect=None
|
|
|
658
459
|
mod(*tvm_array)
|
|
659
460
|
return tvm_array[-1].asnumpy()
|
|
660
461
|
|
|
661
|
-
raise ValueError("mode must be aic,
|
|
462
|
+
raise ValueError("mode must be aic, aic_cloud, ca, compile_cloud, compile_mini, cpu, csim, ccesim or cdiff")
|
|
662
463
|
|
|
663
464
|
|
|
664
465
|
def _extract_shape_dtype(input_shapes, input_types):
|
|
@@ -1004,44 +805,6 @@ def _create_gpu_mod(s, op_var, target, shape_var, kernel_name, attrs, polyhedral
|
|
|
1004
805
|
return mod
|
|
1005
806
|
|
|
1006
807
|
|
|
1007
|
-
def _create_gpu_tuning_mod(sch_tmpl, shape_var, kernel_name, attrs, binds):
|
|
1008
|
-
"""Create tuning module on gpu."""
|
|
1009
|
-
@autotvm.template
|
|
1010
|
-
def _autotune_template():
|
|
1011
|
-
s = sch_tmpl['schedule'](sch_tmpl['output'])
|
|
1012
|
-
return s, op_var
|
|
1013
|
-
|
|
1014
|
-
# create autotune task
|
|
1015
|
-
task = autotvm.task.create(_autotune_template, args=list(), target='cuda')
|
|
1016
|
-
print("task config: ", task.config_space)
|
|
1017
|
-
|
|
1018
|
-
# set measure_option
|
|
1019
|
-
measure_option = autotvm.measure_option(
|
|
1020
|
-
builder=autotvm.LocalBuilder(),
|
|
1021
|
-
runner=autotvm.LocalRunner(repeat=5, min_repeat_ms=150, timeout=4)
|
|
1022
|
-
)
|
|
1023
|
-
|
|
1024
|
-
# Begin tuning, log records to file `kernel_name.log`
|
|
1025
|
-
tuner = autotvm.tuner.RandomTuner(task)
|
|
1026
|
-
if not os.path.exists(kernel_name + '.log'):
|
|
1027
|
-
tuner.tune(n_trial=len(task.config_space),
|
|
1028
|
-
measure_option=measure_option,
|
|
1029
|
-
callbacks=[autotvm.callback.log_to_file(kernel_name + '.log')])
|
|
1030
|
-
|
|
1031
|
-
# query best config
|
|
1032
|
-
dispatch_context = autotvm.apply_history_best(kernel_name + '.log')
|
|
1033
|
-
best_config = dispatch_context.query(task.target, task.workload)
|
|
1034
|
-
print("\nBest config is:")
|
|
1035
|
-
print(best_config)
|
|
1036
|
-
|
|
1037
|
-
# apply best config
|
|
1038
|
-
with autotvm.apply_history_best(kernel_name + '.log'):
|
|
1039
|
-
s, op_var = _autotune_template()
|
|
1040
|
-
mod = akg.build(s, op_var, "cuda", shape_var, name=kernel_name, attrs=attrs,
|
|
1041
|
-
polyhedral=False, binds=binds)
|
|
1042
|
-
return mod
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
808
|
def create_gpu_mod(sch_tmpl, s, op_func, op_var, shape_var, kernel_name, attrs, polyhedral, binds, dump_ir, dump_code,
|
|
1046
809
|
tuning):
|
|
1047
810
|
"""
|
|
@@ -1079,7 +842,7 @@ def create_gpu_mod(sch_tmpl, s, op_func, op_var, shape_var, kernel_name, attrs,
|
|
|
1079
842
|
s = sch_tmpl['schedule'](sch_tmpl['output'])
|
|
1080
843
|
mod = _create_gpu_mod(s, op_var, "cuda", shape_var, kernel_name, attrs, False, binds, dump_ir)
|
|
1081
844
|
else:
|
|
1082
|
-
|
|
845
|
+
raise ValueError("Tuning is not supported.")
|
|
1083
846
|
else:
|
|
1084
847
|
mod = _create_gpu_mod(s, op_var, target, shape_var, kernel_name, attrs, polyhedral, binds, dump_ir)
|
|
1085
848
|
if dump_code:
|
|
@@ -1213,6 +976,10 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
|
|
|
1213
976
|
compute_func(s)
|
|
1214
977
|
polyhedral = False
|
|
1215
978
|
|
|
979
|
+
if attrs.get("simple_mode"):
|
|
980
|
+
attrs.pop("simple_mode")
|
|
981
|
+
return s, inputs, output, attrs
|
|
982
|
+
|
|
1216
983
|
level = attrs.get("help_tiling") if attrs and "help_tiling" in attrs else None
|
|
1217
984
|
if tuning or (level is not None and level > help_tiling_level.get('None')):
|
|
1218
985
|
return gen_spaces_dim_key(op_func, args, s, op_var, kernel_name, attrs, polyhedral, tuning, target)
|
|
@@ -1231,10 +998,11 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
|
|
|
1231
998
|
polyhedral=polyhedral, binds=binds)
|
|
1232
999
|
source_code = mod.get_source()
|
|
1233
1000
|
elif target_name == CCE:
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
polyhedral=polyhedral, binds=binds)
|
|
1001
|
+
mod = npu_op_build(s, op_var, shape_var, kernel_name, binds, attrs, dump_ir, polyhedral)
|
|
1002
|
+
if attrs.get("is_tbe_codegen"):
|
|
1237
1003
|
source_code = mod.imported_modules[0].get_source()
|
|
1004
|
+
else:
|
|
1005
|
+
return mod
|
|
1238
1006
|
|
|
1239
1007
|
if log_code:
|
|
1240
1008
|
logging.debug("#################code####################")
|
|
@@ -1244,11 +1012,41 @@ def op_build(op_func, input_shapes, input_types, op_attrs=None, kernel_name="",
|
|
|
1244
1012
|
return mod
|
|
1245
1013
|
|
|
1246
1014
|
|
|
1015
|
+
def npu_op_build(s, op_var, shape_var, kernel_name="", binds=None, attrs=None,
|
|
1016
|
+
dump_ir=True, polyhedral=True):
|
|
1017
|
+
if attrs.get("is_tbe_codegen"):
|
|
1018
|
+
# use akg + tbe compile
|
|
1019
|
+
from akg.tvm import build_module
|
|
1020
|
+
from akg.python.akg.utils.tbe_codegen_utils import build_tbe_codegen
|
|
1021
|
+
if attrs is None:
|
|
1022
|
+
attrs = {}
|
|
1023
|
+
attrs.update({"is_tbe_codegen":True})
|
|
1024
|
+
binds, arg_list = build_module.get_binds(op_var)
|
|
1025
|
+
stmt = akg.lower(s, op_var, shape_params=shape_var, name=kernel_name, binds=binds, attrs=attrs,
|
|
1026
|
+
simple_mode=True, polyhedral=polyhedral, tuning=False, target="cce")
|
|
1027
|
+
|
|
1028
|
+
json_str = akg.tvm.save_json(stmt, "0.8.0")
|
|
1029
|
+
|
|
1030
|
+
args_json = []
|
|
1031
|
+
for buf in enumerate(arg_list):
|
|
1032
|
+
args_json.append(akg.tvm.save_json(buf, "0.8.0"))
|
|
1033
|
+
|
|
1034
|
+
is_success = build_tbe_codegen(kernel_name, json_str, args_json, attrs.get("dynamic", False))
|
|
1035
|
+
if not is_success:
|
|
1036
|
+
raise TypeError("npu_inference codegen failed.")
|
|
1037
|
+
return kernel_name
|
|
1038
|
+
else:
|
|
1039
|
+
# use the whole akg complie
|
|
1040
|
+
with akg.build_config(dump_pass_ir=dump_ir):
|
|
1041
|
+
mod = akg.build(s, op_var, CCE, shape_var, name=kernel_name, attrs=attrs,
|
|
1042
|
+
polyhedral=polyhedral, binds=binds)
|
|
1043
|
+
return mod
|
|
1044
|
+
|
|
1247
1045
|
def get_runtime_mode():
|
|
1248
1046
|
"""get runtime mode."""
|
|
1249
1047
|
env_dic = os.environ
|
|
1250
1048
|
if not env_dic.get('RUNTIME_MODE'):
|
|
1251
|
-
mode = '
|
|
1049
|
+
mode = 'aic_cloud'
|
|
1252
1050
|
else:
|
|
1253
1051
|
mode = env_dic.get('RUNTIME_MODE')
|
|
1254
1052
|
return mode
|
|
@@ -1265,7 +1063,7 @@ def get_profiling_mode():
|
|
|
1265
1063
|
def product_is_mini():
|
|
1266
1064
|
"""check whether in mini environment."""
|
|
1267
1065
|
mode = get_runtime_mode()
|
|
1268
|
-
if mode in ('
|
|
1066
|
+
if mode in ('air', 'aic', 'compile_mini'):
|
|
1269
1067
|
return True
|
|
1270
1068
|
return False
|
|
1271
1069
|
|
|
@@ -351,16 +351,8 @@ def _collect_inputs(input_desc):
|
|
|
351
351
|
return inputs
|
|
352
352
|
|
|
353
353
|
|
|
354
|
-
def _get_op_attr(op_name, attrs, attr_name):
|
|
355
|
-
"""Get op attr value."""
|
|
356
|
-
for attr in attrs:
|
|
357
|
-
if attr["name"] == attr_name:
|
|
358
|
-
return attr["value"]
|
|
359
|
-
raise ValueError("Can not find attr '{}' in op {}".format(attr_name, op_name))
|
|
360
|
-
|
|
361
|
-
|
|
362
354
|
def precision_analyze(desc: dict, tensors):
|
|
363
|
-
exclude_op_list = ["Minimum", "Maximum", "Reshape", "ZerosLike", "Tile", "Select", "
|
|
355
|
+
exclude_op_list = ["Minimum", "Maximum", "Reshape", "ZerosLike", "Tile", "Select", "Greater",
|
|
364
356
|
"SelectGT", "SelectLT", "LessEqual", "Less", "EquivFormat", "ExpandDims", "Transpose",
|
|
365
357
|
"TransData", "BroadcastTo", "Assign"]
|
|
366
358
|
input_tensors = _collect_inputs(desc["input_desc"])
|
|
@@ -369,21 +361,9 @@ def precision_analyze(desc: dict, tensors):
|
|
|
369
361
|
graph = {}
|
|
370
362
|
ops = {} # recorder the operator that generates the current output
|
|
371
363
|
for op in desc["op_desc"]:
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
graph[output] = [inputs]
|
|
376
|
-
ops[output] = op["name"]
|
|
377
|
-
fake_output = _get_op_attr(op["name"], op["attr"], "fake_output")
|
|
378
|
-
if not fake_output:
|
|
379
|
-
output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
|
|
380
|
-
inputs = IOInfo(op["input_desc"][2][0]["tensor_name"], op["input_desc"][2][0]["data_type"])
|
|
381
|
-
graph[output] = [inputs]
|
|
382
|
-
ops[output] = op["name"]
|
|
383
|
-
else:
|
|
384
|
-
output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
|
|
385
|
-
graph[output] = _collect_inputs(op["input_desc"])
|
|
386
|
-
ops[output] = op["name"]
|
|
364
|
+
output = IOInfo(op["output_desc"][0]["tensor_name"], op["output_desc"][0]["data_type"])
|
|
365
|
+
graph[output] = _collect_inputs(op["input_desc"])
|
|
366
|
+
ops[output] = op["name"]
|
|
387
367
|
|
|
388
368
|
def _precision_reduce(x: IOInfo):
|
|
389
369
|
if x in input_tensors:
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# coding: utf-8
|
|
3
|
+
# Copyright 2023 Huawei Technologies Co., Ltd
|
|
4
|
+
#
|
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
# you may not use this file except in compliance with the License.
|
|
7
|
+
# You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
# See the License for the specific language governing permissions and
|
|
15
|
+
# limitations under the License.
|
|
16
|
+
import os
|
|
17
|
+
import logging
|
|
18
|
+
|
|
19
|
+
logging.getLogger().setLevel(logging.INFO)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def copy_to_akg_kernel_meta(kernel_name, postfixs):
|
|
23
|
+
akg_kernel_mate_str = "akg_kernel_meta"
|
|
24
|
+
source = os.path.realpath(os.getenv('MS_COMPILER_CACHE_PATH', './'))
|
|
25
|
+
import shutil
|
|
26
|
+
target = source + "/" + akg_kernel_mate_str + "/" + kernel_name
|
|
27
|
+
source = source + "/" + "kernel_meta/" + kernel_name
|
|
28
|
+
if not os.path.exists(akg_kernel_mate_str):
|
|
29
|
+
os.mkdir(akg_kernel_mate_str)
|
|
30
|
+
for postfix in postfixs:
|
|
31
|
+
if os.path.exists(source + postfix):
|
|
32
|
+
try:
|
|
33
|
+
shutil.move(source + postfix, target + postfix)
|
|
34
|
+
except IOError as e:
|
|
35
|
+
logging.error("Unable to move file. {}".format(e))
|
|
36
|
+
except Exception as e:
|
|
37
|
+
logging.error("Unexpected error:", e)
|
|
38
|
+
else:
|
|
39
|
+
logging.info("Move {} fail, exit.".format(source + postfix))
|
|
40
|
+
return False
|
|
41
|
+
return True
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def clean_env():
|
|
45
|
+
import gc
|
|
46
|
+
import sys
|
|
47
|
+
|
|
48
|
+
imported_modules = set(sys.modules.keys())
|
|
49
|
+
for obj_key in imported_modules:
|
|
50
|
+
if "conda" in obj_key:
|
|
51
|
+
continue
|
|
52
|
+
if "akg" in obj_key or "topi" in obj_key or "tvm" in obj_key:
|
|
53
|
+
del sys.modules[obj_key]
|
|
54
|
+
try:
|
|
55
|
+
del globals()[obj_key]
|
|
56
|
+
except KeyError:
|
|
57
|
+
pass
|
|
58
|
+
try:
|
|
59
|
+
del locals()[obj_key]
|
|
60
|
+
except KeyError:
|
|
61
|
+
pass
|
|
62
|
+
|
|
63
|
+
gc.collect()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def auto_init_soc(ascend_type):
|
|
67
|
+
from tbe.common.platform import set_current_compile_soc_info
|
|
68
|
+
set_current_compile_soc_info(ascend_type)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def build_npu_for_akg(kernel_name,
|
|
72
|
+
stmt=None,
|
|
73
|
+
arg_list=None,
|
|
74
|
+
is_dynamic=False,
|
|
75
|
+
cfg=None,
|
|
76
|
+
simple_mode=False):
|
|
77
|
+
import tbe
|
|
78
|
+
from tbe.tvm.tir import transform
|
|
79
|
+
from tbe.tvm.driver.cce_build_module import _count_time, generate_cce_code
|
|
80
|
+
from tbe.common.buildcfg import set_current_build_config
|
|
81
|
+
from tbe.common.buildcfg.buildcfg_mapping import dynamic_shape, disable_vectorize, tik, enable_const_fold, \
|
|
82
|
+
dynamic_tik, instrument_bound_checkers, tbe_workspace_size_list_length
|
|
83
|
+
|
|
84
|
+
set_current_build_config(tbe_workspace_size_list_length,
|
|
85
|
+
tbe.tvm.runtime.cce_runtime.tbe_workspace_size_list_length())
|
|
86
|
+
|
|
87
|
+
if stmt is None or arg_list is None:
|
|
88
|
+
raise ValueError("No json, exit.")
|
|
89
|
+
|
|
90
|
+
func = tbe.tvm.tir.PrimFunc(arg_list, stmt)
|
|
91
|
+
mod = tbe.tvm.IRModule({kernel_name : func})
|
|
92
|
+
# _static_lower_phase_0
|
|
93
|
+
mod = transform.InjectSocVersion()(mod)
|
|
94
|
+
mod = transform.DeduceOpPlatform()(mod)
|
|
95
|
+
mod = transform.EmitInsn()(mod)
|
|
96
|
+
|
|
97
|
+
# phase 1 _static_lower_phase_emit_insn
|
|
98
|
+
mod = transform.InjectMultiCoreSync()(mod)
|
|
99
|
+
mod = transform.SplitCoproc()(mod)
|
|
100
|
+
mod = transform.SequenceSprInsn()(mod)
|
|
101
|
+
|
|
102
|
+
# phase 2
|
|
103
|
+
mod = transform.TikDoubleBufferSupport()(mod)
|
|
104
|
+
mod = transform.InjectPipeBuffer()(mod)
|
|
105
|
+
mod = transform.OptimizeDMA()(mod)
|
|
106
|
+
mod = transform.SubstituteInstr()(mod)
|
|
107
|
+
mod = transform.InjectAccessPtrMSG()(mod)
|
|
108
|
+
mod = transform.InjectPipe()(mod)
|
|
109
|
+
mod = transform.DeSequenceSprInsn()(mod)
|
|
110
|
+
mod = transform.CanonicalSimplify()(mod)
|
|
111
|
+
mod = transform.SetSPROptimizer()(mod)
|
|
112
|
+
if cfg[enable_const_fold]:
|
|
113
|
+
mod = transform.ConstantFolding()(mod)
|
|
114
|
+
if not simple_mode:
|
|
115
|
+
mod = transform.LoopPartition()(mod)
|
|
116
|
+
if cfg[disable_vectorize]:
|
|
117
|
+
mod = transform.SkipVectorize()(mod)
|
|
118
|
+
else:
|
|
119
|
+
mod = transform.VectorizeLoop()(mod)
|
|
120
|
+
mod = transform.InjectVirtualThread()(mod)
|
|
121
|
+
|
|
122
|
+
# phase 3 _static_lower_phase_3
|
|
123
|
+
mod = transform.StorageRewriteCCE()(mod)
|
|
124
|
+
mod = transform.ReorderProcess()(mod)
|
|
125
|
+
if cfg[tik] and cfg[dynamic_tik]:
|
|
126
|
+
mod = transform.TikDynamicShapeAllocMem()(mod)
|
|
127
|
+
mod = transform.UnrollLoop()(mod)
|
|
128
|
+
|
|
129
|
+
mod = transform.AutoFuseBuffer()(mod)
|
|
130
|
+
mod = transform.SetCacheMode()(mod)
|
|
131
|
+
mod = transform.Simplify()(mod)
|
|
132
|
+
mod = transform.GMConflictElimination()(mod)
|
|
133
|
+
mod = transform.MarkScalarCoreType()(mod)
|
|
134
|
+
|
|
135
|
+
# phase 4 _static_lower_phase_4
|
|
136
|
+
mod = transform.JumpInstructionElimination()(mod)
|
|
137
|
+
mod = transform.InjectSync()(mod)
|
|
138
|
+
mod = transform.PackIntrinArgConfig()(mod)
|
|
139
|
+
mod = transform.RemoveAccessPtrMSG()(mod)
|
|
140
|
+
mod = transform.Simplify()(mod)
|
|
141
|
+
mod = transform.GmAddrPrompt()(mod)
|
|
142
|
+
mod = transform.InsertCheckInvalidAccessOfDDR()(mod)
|
|
143
|
+
mod = transform.RemoveNoOp()(mod)
|
|
144
|
+
mod = transform.DeviceMark()(mod)
|
|
145
|
+
if cfg[instrument_bound_checkers]:
|
|
146
|
+
mod = transform.InstrumentBoundCheckers()(mod)
|
|
147
|
+
mod = transform.ConvertFloorDivToTruncDiv()(mod)
|
|
148
|
+
mod = transform.BuildVirtualCore()(mod)
|
|
149
|
+
|
|
150
|
+
_count_time(mod)
|
|
151
|
+
mod = transform.SplitCoreCode()(mod)
|
|
152
|
+
generate_cce_code(mod, "cce", None)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def build_tbe_codegen(kernel_name, stmt_json, arg_json, ascend_type=None, is_dynamic=False):
|
|
156
|
+
import sys
|
|
157
|
+
copy_modules = sys.modules.copy()
|
|
158
|
+
clean_env()
|
|
159
|
+
|
|
160
|
+
print("build_cce_for_akg")
|
|
161
|
+
import tbe
|
|
162
|
+
from tbe.common.buildcfg.default_buildcfg import cce_default_static_build_config
|
|
163
|
+
from tbe.common.buildcfg.ascend import AscendPassContext
|
|
164
|
+
from tbe.common.buildcfg.buildcfg_mapping import dump_cce_code, save_temp_cce_file, disable_vectorize, \
|
|
165
|
+
instrument_bound_checkers, partition_const_loop, auto_unroll_max_step, auto_unroll_max_depth, \
|
|
166
|
+
auto_unroll_max_extent, unroll_explicit, dynamic_shape, enable_multicore_sync_with_atomic, \
|
|
167
|
+
kernel_meta_parent_dir
|
|
168
|
+
cfg = cce_default_static_build_config.copy()
|
|
169
|
+
cfg[dump_cce_code] = False
|
|
170
|
+
cfg[save_temp_cce_file] = True
|
|
171
|
+
cfg[disable_vectorize] = False
|
|
172
|
+
cfg[instrument_bound_checkers] = False
|
|
173
|
+
cfg[partition_const_loop] = False
|
|
174
|
+
cfg[auto_unroll_max_step] = 0
|
|
175
|
+
cfg[auto_unroll_max_depth] = 8
|
|
176
|
+
cfg[auto_unroll_max_extent] = 0
|
|
177
|
+
cfg[unroll_explicit] = True
|
|
178
|
+
cfg[dynamic_shape] = False
|
|
179
|
+
cfg[enable_multicore_sync_with_atomic] = True
|
|
180
|
+
cfg[kernel_meta_parent_dir] = os.path.realpath(os.getenv('MS_COMPILER_CACHE_PATH', './'))
|
|
181
|
+
if ascend_type is None:
|
|
182
|
+
ascend_type = "Ascend910"
|
|
183
|
+
auto_init_soc(ascend_type)
|
|
184
|
+
|
|
185
|
+
stmt = tbe.tvm.ir.load_json(stmt_json)
|
|
186
|
+
arg_list = []
|
|
187
|
+
for buff in arg_json:
|
|
188
|
+
arg_list.append(tbe.tvm.ir.load_json(buff))
|
|
189
|
+
with AscendPassContext(config=cfg):
|
|
190
|
+
build_npu_for_akg(kernel_name,
|
|
191
|
+
stmt,
|
|
192
|
+
arg_list,
|
|
193
|
+
is_dynamic=is_dynamic,
|
|
194
|
+
cfg=cfg)
|
|
195
|
+
postfixs = [".o", ".cce", ".json"]
|
|
196
|
+
is_success = copy_to_akg_kernel_meta(kernel_name, postfixs)
|
|
197
|
+
sys.modules = copy_modules
|
|
198
|
+
return is_success
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -96,9 +96,11 @@ def is_invalid_or_jit_forbidden_method(obj, obj_type, attr):
|
|
|
96
96
|
if not hasattr(obj, attr):
|
|
97
97
|
raise AttributeError(f"'{obj_type}' object has no attribute '{attr}'")
|
|
98
98
|
method = getattr(obj, attr)
|
|
99
|
-
if not hasattr(method, "__module__"):
|
|
99
|
+
if not hasattr(method, "__module__") or method.__module__ is None:
|
|
100
100
|
return False
|
|
101
101
|
method_info = method.__module__ + '.' + method.__qualname__
|
|
102
102
|
return method_info in _jit_forbidden_method
|
|
103
103
|
|
|
104
104
|
add_jit_forbidden_module("mindspore.common.initializer")
|
|
105
|
+
add_jit_forbidden_module("mindspore.context")
|
|
106
|
+
add_jit_forbidden_module("mindspore.log")
|