PyPI - mindspore - Versions diffs - 2.1.0__cp38-none-any.whl → 2.2.11__cp38-none-any.whl - Mend

mindspore 2.1.0cp38-none-any.whl → 2.2.11cp38-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (578) hide show

mindspore/.commit_id +1 -1
mindspore/__init__.py +4 -1
mindspore/_akg/akg/build_module.py +5 -6
mindspore/_akg/akg/composite/build_module.py +139 -22
mindspore/_akg/akg/composite/split_stitch.py +10 -11
mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
mindspore/_akg/akg/tvm/api.py +4 -3
mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
mindspore/_akg/akg/tvm/build_module.py +16 -1
mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
mindspore/_akg/akg/tvm/ir_builder.py +1 -1
mindspore/_akg/akg/tvm/module.py +1 -2
mindspore/_akg/akg/tvm/stmt.py +2 -2
mindspore/_akg/akg/utils/ascend_profilier/cann_file_parser.py +76 -0
mindspore/_akg/akg/utils/ascend_profilier/file_manager.py +56 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_bean.py +23 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_headers.py +8 -0
mindspore/_akg/akg/utils/ascend_profilier/op_summary_parser.py +42 -0
mindspore/_akg/akg/utils/ascend_profilier/path_manager.py +65 -0
mindspore/_akg/akg/utils/composite_op_helper.py +16 -12
mindspore/_akg/akg/utils/dump_ascend_meta.py +22 -3
mindspore/_akg/akg/utils/kernel_exec.py +98 -274
mindspore/_akg/akg/utils/result_analysis.py +4 -24
mindspore/_akg/akg/utils/tbe_codegen_utils.py +219 -0
mindspore/_akg/akg/utils/util.py +56 -1
mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
mindspore/_check_jit_forbidden_api.py +3 -1
mindspore/_checkparam.py +23 -29
mindspore/_extends/graph_kernel/__init__.py +0 -1
mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
mindspore/_extends/graph_kernel/splitter.py +4 -11
mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
mindspore/_extends/parse/__init__.py +13 -15
mindspore/_extends/parse/namespace.py +7 -33
mindspore/_extends/parse/parser.py +67 -72
mindspore/_extends/parse/resources.py +1 -1
mindspore/_extends/parse/standard_method.py +86 -106
mindspore/_extends/parse/trope.py +1 -1
mindspore/_extends/remote/kernel_build_server.py +25 -7
mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
mindspore/_install_custom.py +43 -0
mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
mindspore/amp.py +47 -11
mindspore/bin/cache_admin +0 -0
mindspore/bin/cache_server +0 -0
mindspore/boost/boost.py +1 -8
mindspore/boost/boost_cell_wrapper.py +3 -2
mindspore/boost/grad_accumulation.py +1 -1
mindspore/boost/group_loss_scale_manager.py +8 -7
mindspore/common/__init__.py +5 -3
mindspore/common/_jit_fallback_utils.py +6 -0
mindspore/common/_register_for_adapter.py +2 -0
mindspore/common/_register_for_tensor.py +2 -2
mindspore/common/_stub_tensor.py +13 -0
mindspore/common/_utils.py +29 -0
mindspore/common/api.py +174 -259
mindspore/common/auto_dynamic_shape.py +494 -0
mindspore/common/dtype.py +18 -11
mindspore/common/dump.py +6 -4
mindspore/common/initializer.py +14 -14
mindspore/common/jit_config.py +33 -15
mindspore/common/lazy_inline.py +126 -7
mindspore/common/mindir_util.py +101 -0
mindspore/common/parameter.py +51 -41
mindspore/common/seed.py +4 -4
mindspore/common/sparse_tensor.py +13 -14
mindspore/common/tensor.py +243 -165
mindspore/communication/__init__.py +7 -4
mindspore/communication/_comm_helper.py +83 -4
mindspore/communication/management.py +152 -84
mindspore/config/op_info.config +14 -3
mindspore/config/super_bar_config.json +4 -2
mindspore/context.py +152 -61
mindspore/dataset/__init__.py +5 -5
mindspore/dataset/audio/__init__.py +2 -2
mindspore/dataset/audio/transforms.py +52 -52
mindspore/dataset/callback/ds_callback.py +16 -2
mindspore/dataset/core/config.py +68 -51
mindspore/dataset/engine/cache_client.py +33 -7
mindspore/dataset/engine/datasets.py +250 -112
mindspore/dataset/engine/datasets_audio.py +43 -211
mindspore/dataset/engine/datasets_standard_format.py +16 -35
mindspore/dataset/engine/datasets_text.py +43 -67
mindspore/dataset/engine/datasets_user_defined.py +86 -100
mindspore/dataset/engine/datasets_vision.py +219 -1029
mindspore/dataset/engine/iterators.py +11 -4
mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
mindspore/dataset/engine/obs/util.py +3 -0
mindspore/dataset/engine/samplers.py +1 -1
mindspore/dataset/engine/validators.py +19 -5
mindspore/dataset/text/__init__.py +3 -3
mindspore/dataset/text/transforms.py +101 -127
mindspore/dataset/text/utils.py +205 -138
mindspore/dataset/transforms/__init__.py +1 -1
mindspore/dataset/transforms/py_transforms_util.py +40 -12
mindspore/dataset/transforms/transforms.py +95 -40
mindspore/dataset/utils/browse_dataset.py +8 -2
mindspore/dataset/utils/line_reader.py +17 -19
mindspore/dataset/vision/__init__.py +3 -3
mindspore/dataset/vision/c_transforms.py +6 -3
mindspore/dataset/vision/transforms.py +409 -287
mindspore/dataset/vision/utils.py +13 -14
mindspore/dataset/vision/validators.py +11 -1
mindspore/experimental/map_parameter.py +14 -0
mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
mindspore/experimental/optim/lr_scheduler.py +1427 -0
mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
mindspore/gen_ops.py +273 -0
mindspore/include/OWNERS +0 -1
mindspore/include/api/data_type.h +2 -1
mindspore/include/api/graph.h +0 -15
mindspore/include/api/kernel.h +2 -0
mindspore/include/api/kernel_api.h +37 -12
mindspore/include/api/model.h +17 -14
mindspore/include/api/status.h +8 -3
mindspore/include/api/types.h +37 -4
mindspore/include/c_api/ms/abstract.h +67 -0
mindspore/include/c_api/ms/attribute.h +197 -0
mindspore/include/c_api/ms/base/handle_types.h +43 -0
mindspore/include/c_api/ms/base/macros.h +32 -0
mindspore/include/c_api/ms/base/status.h +33 -0
mindspore/include/c_api/ms/base/types.h +282 -0
mindspore/include/c_api/ms/context.h +102 -0
mindspore/include/c_api/ms/graph.h +160 -0
mindspore/include/c_api/ms/node.h +606 -0
mindspore/include/c_api/ms/tensor.h +161 -0
mindspore/include/c_api/ms/value.h +84 -0
mindspore/include/dataset/constants.h +6 -5
mindspore/include/dataset/execute.h +23 -13
mindspore/include/dataset/text.h +26 -26
mindspore/include/dataset/transforms.h +13 -13
mindspore/include/dataset/vision.h +60 -60
mindspore/include/dataset/vision_ascend.h +5 -6
mindspore/include/dataset/vision_lite.h +17 -17
mindspore/include/mindapi/base/type_id.h +1 -0
mindspore/include/mindapi/base/types.h +1 -0
mindspore/lib/libdnnl.so.2 +0 -0
mindspore/lib/libjemalloc.so.2 +0 -0
mindspore/lib/libmindspore.so +0 -0
mindspore/lib/libmindspore_backend.so +0 -0
mindspore/lib/libmindspore_common.so +0 -0
mindspore/lib/libmindspore_core.so +0 -0
mindspore/lib/libmindspore_glog.so.0 +0 -0
mindspore/lib/libmindspore_gpr.so.15 +0 -0
mindspore/lib/libmindspore_grpc++.so.1 +0 -0
mindspore/lib/libmindspore_grpc.so.15 +0 -0
mindspore/lib/libmindspore_shared_lib.so +0 -0
mindspore/lib/libnnacl.so +0 -0
mindspore/lib/libopencv_core.so.4.5 +0 -0
mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
mindspore/lib/libps_cache.so +0 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310/aic-ascend310-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend310p/aic-ascend310p-ops-info.json +123 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910/aic-ascend910-ops-info.json +158 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/config/ascend910b/aic-ascend910b-ops-info.json +37 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py +46 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py +51 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py +241 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/matmul_tik.py +212 -0
mindspore/lib/plugin/ascend/custom_aicore_ops/op_proto/libop_proto.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +8998 -0
mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
mindspore/lib/plugin/ascend/libakg.so +0 -0
mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
mindspore/lib/plugin/cpu/libakg.so +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
mindspore/nn/__init__.py +0 -2
mindspore/nn/cell.py +313 -74
mindspore/nn/dynamic_lr.py +21 -21
mindspore/nn/layer/activation.py +22 -30
mindspore/nn/layer/basic.py +15 -13
mindspore/nn/layer/channel_shuffle.py +1 -1
mindspore/nn/layer/container.py +271 -9
mindspore/nn/layer/conv.py +323 -204
mindspore/nn/layer/dense.py +8 -5
mindspore/nn/layer/embedding.py +33 -27
mindspore/nn/layer/flash_attention.py +61 -95
mindspore/nn/layer/image.py +8 -6
mindspore/nn/layer/math.py +16 -25
mindspore/nn/layer/normalization.py +107 -66
mindspore/nn/layer/padding.py +1 -1
mindspore/nn/layer/pooling.py +131 -109
mindspore/nn/layer/rnn_cells.py +27 -22
mindspore/nn/layer/rnns.py +13 -16
mindspore/nn/layer/thor_layer.py +1 -1
mindspore/nn/layer/transformer.py +221 -154
mindspore/nn/learning_rate_schedule.py +9 -1
mindspore/nn/loss/loss.py +235 -174
mindspore/nn/optim/ada_grad.py +2 -1
mindspore/nn/optim/adadelta.py +1 -0
mindspore/nn/optim/adafactor.py +2 -1
mindspore/nn/optim/adam.py +7 -4
mindspore/nn/optim/adamax.py +3 -2
mindspore/nn/optim/adasum.py +2 -2
mindspore/nn/optim/asgd.py +2 -3
mindspore/nn/optim/ftrl.py +6 -5
mindspore/nn/optim/lamb.py +7 -4
mindspore/nn/optim/lars.py +1 -1
mindspore/nn/optim/lazyadam.py +5 -3
mindspore/nn/optim/momentum.py +2 -1
mindspore/nn/optim/optimizer.py +53 -4
mindspore/nn/optim/proximal_ada_grad.py +3 -4
mindspore/nn/optim/rmsprop.py +4 -3
mindspore/nn/optim/rprop.py +23 -12
mindspore/nn/optim/sgd.py +26 -11
mindspore/nn/optim/thor.py +9 -7
mindspore/nn/probability/bijector/bijector.py +5 -5
mindspore/nn/probability/bijector/power_transform.py +27 -27
mindspore/nn/probability/bijector/softplus.py +3 -3
mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
mindspore/nn/probability/distribution/bernoulli.py +5 -5
mindspore/nn/probability/distribution/beta.py +3 -3
mindspore/nn/probability/distribution/categorical.py +7 -7
mindspore/nn/probability/distribution/cauchy.py +0 -1
mindspore/nn/probability/distribution/distribution.py +3 -3
mindspore/nn/probability/distribution/gamma.py +3 -3
mindspore/nn/probability/distribution/geometric.py +4 -4
mindspore/nn/probability/distribution/gumbel.py +4 -4
mindspore/nn/probability/distribution/log_normal.py +2 -2
mindspore/nn/probability/distribution/logistic.py +2 -2
mindspore/nn/probability/distribution/poisson.py +4 -4
mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
mindspore/nn/probability/distribution/uniform.py +6 -6
mindspore/nn/wrap/__init__.py +4 -2
mindspore/nn/wrap/cell_wrapper.py +87 -34
mindspore/nn/wrap/grad_reducer.py +8 -5
mindspore/nn/wrap/loss_scale.py +105 -42
mindspore/numpy/array_creations.py +1 -2
mindspore/numpy/array_ops.py +3 -2
mindspore/numpy/utils_const.py +5 -5
mindspore/offline_debug/convert_async.py +2 -2
mindspore/ops/_grad_experimental/__init__.py +0 -5
mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
mindspore/ops/_op_impl/aicpu/add.py +3 -3
mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
mindspore/ops/_op_impl/tbe/__init__.py +4 -4
mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
mindspore/ops/_primitive_cache.py +1 -1
mindspore/ops/_tracefunc.py +45 -13
mindspore/ops/_utils/utils.py +6 -1
mindspore/ops/_vmap/vmap_array_ops.py +3 -3
mindspore/ops/_vmap/vmap_base.py +3 -3
mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
mindspore/ops/_vmap/vmap_math_ops.py +5 -2
mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
mindspore/ops/arg_dtype_cast.py +54 -0
mindspore/ops/composite/base.py +37 -10
mindspore/ops/composite/math_ops.py +5 -4
mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
mindspore/ops/deprecated.py +304 -0
mindspore/ops/function/__init__.py +4 -1
mindspore/ops/function/array_func.py +174 -193
mindspore/ops/function/clip_func.py +81 -13
mindspore/ops/function/debug_func.py +1 -1
mindspore/ops/function/grad/grad_func.py +18 -9
mindspore/ops/function/image_func.py +10 -4
mindspore/ops/function/linalg_func.py +5 -5
mindspore/ops/function/math_func.py +575 -386
mindspore/ops/function/nn_func.py +568 -260
mindspore/ops/function/random_func.py +88 -57
mindspore/ops/function/sparse_func.py +1 -1
mindspore/ops/function/sparse_unary_func.py +14 -12
mindspore/ops/function/vmap_func.py +6 -5
mindspore/ops/functional.py +15 -10
mindspore/ops/op_info_register.py +244 -25
mindspore/ops/operations/__init__.py +31 -19
mindspore/ops/operations/_grad_ops.py +71 -7
mindspore/ops/operations/_inner_ops.py +350 -17
mindspore/ops/operations/_quant_ops.py +4 -8
mindspore/ops/operations/_sequence_ops.py +42 -0
mindspore/ops/operations/array_ops.py +68 -282
mindspore/ops/operations/comm_ops.py +107 -59
mindspore/ops/operations/custom_ops.py +94 -70
mindspore/ops/operations/debug_ops.py +8 -4
mindspore/ops/operations/image_ops.py +18 -12
mindspore/ops/operations/inner_ops.py +26 -3
mindspore/ops/operations/math_ops.py +192 -144
mindspore/ops/operations/nn_ops.py +857 -489
mindspore/ops/operations/other_ops.py +0 -22
mindspore/ops/operations/random_ops.py +53 -111
mindspore/ops/operations/sparse_ops.py +3 -1
mindspore/ops/primitive.py +24 -18
mindspore/parallel/_auto_parallel_context.py +68 -8
mindspore/parallel/_cost_model_context.py +2 -2
mindspore/parallel/_offload_context.py +17 -3
mindspore/parallel/_parallel_serialization.py +12 -5
mindspore/parallel/_ps_context.py +12 -0
mindspore/parallel/_tensor.py +18 -13
mindspore/parallel/_transformer/layers.py +5 -3
mindspore/parallel/_transformer/loss.py +1 -0
mindspore/parallel/_transformer/moe.py +2 -2
mindspore/parallel/_transformer/op_parallel_config.py +12 -1
mindspore/parallel/_transformer/transformer.py +23 -3
mindspore/parallel/_utils.py +11 -7
mindspore/parallel/algo_parameter_config.py +85 -5
mindspore/parallel/checkpoint_transform.py +19 -12
mindspore/parallel/shard.py +21 -14
mindspore/profiler/common/struct_type.py +3 -3
mindspore/profiler/common/util.py +4 -2
mindspore/profiler/envprofiling.py +1 -1
mindspore/profiler/parser/aicpu_data_parser.py +5 -3
mindspore/profiler/parser/ascend_flops_generator.py +2 -2
mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
mindspore/profiler/parser/ascend_op_generator.py +6 -6
mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
mindspore/profiler/parser/base_timeline_generator.py +10 -8
mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
mindspore/profiler/parser/flops_parser.py +15 -11
mindspore/profiler/parser/framework_parser.py +38 -22
mindspore/profiler/parser/hccl_parser.py +16 -12
mindspore/profiler/parser/integrator.py +22 -11
mindspore/profiler/parser/memory_usage_parser.py +2 -2
mindspore/profiler/parser/minddata_analyzer.py +12 -14
mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
mindspore/profiler/parser/msadvisor_parser.py +8 -4
mindspore/profiler/parser/op_intermediate_parser.py +5 -2
mindspore/profiler/parser/optime_parser.py +1 -1
mindspore/profiler/parser/profiler_info.py +21 -2
mindspore/profiler/parser/step_trace_parser.py +11 -14
mindspore/profiler/profiling.py +179 -89
mindspore/rewrite/api/node.py +102 -19
mindspore/rewrite/api/node_type.py +5 -1
mindspore/rewrite/api/pattern_engine.py +1 -1
mindspore/rewrite/api/scoped_value.py +9 -17
mindspore/rewrite/api/symbol_tree.py +131 -47
mindspore/rewrite/ast_helpers/__init__.py +2 -1
mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
mindspore/rewrite/common/rewrite_elog.py +5 -1
mindspore/rewrite/namer.py +33 -24
mindspore/rewrite/namespace.py +14 -5
mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
mindspore/rewrite/node/call_function.py +79 -0
mindspore/rewrite/node/cell_container.py +135 -0
mindspore/rewrite/node/control_flow.py +88 -0
mindspore/rewrite/{node.py → node/node.py} +273 -234
mindspore/rewrite/node/node_manager.py +254 -0
mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
mindspore/rewrite/parsers/arguments_parser.py +22 -21
mindspore/rewrite/parsers/assign_parser.py +216 -221
mindspore/rewrite/parsers/attribute_parser.py +9 -7
mindspore/rewrite/parsers/class_def_parser.py +174 -113
mindspore/rewrite/parsers/constant_parser.py +9 -6
mindspore/rewrite/parsers/container_parser.py +9 -7
mindspore/rewrite/parsers/for_parser.py +42 -21
mindspore/rewrite/parsers/function_def_parser.py +24 -16
mindspore/rewrite/parsers/if_parser.py +28 -24
mindspore/rewrite/parsers/module_parser.py +196 -25
mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
mindspore/rewrite/parsers/return_parser.py +6 -6
mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
mindspore/rewrite/sparsify/utils.py +1 -1
mindspore/rewrite/symbol_tree.py +523 -578
mindspore/rewrite/symbol_tree_builder.py +9 -193
mindspore/rewrite/symbol_tree_dumper.py +2 -2
mindspore/run_check/_check_version.py +6 -4
mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
mindspore/safeguard/rewrite_obfuscation.py +541 -0
mindspore/scipy/linalg.py +1 -1
mindspore/scipy/ops.py +55 -5
mindspore/scipy/optimize/__init__.py +3 -2
mindspore/scipy/optimize/linear_sum_assignment.py +38 -33
mindspore/scipy/optimize/minimize.py +7 -3
mindspore/train/_utils.py +7 -3
mindspore/train/amp.py +323 -123
mindspore/train/anf_ir_pb2.py +14 -2
mindspore/train/callback/_backup_and_restore.py +2 -12
mindspore/train/callback/_callback.py +29 -4
mindspore/train/callback/_checkpoint.py +23 -8
mindspore/train/callback/_early_stop.py +2 -2
mindspore/train/callback/_landscape.py +4 -4
mindspore/train/callback/_loss_monitor.py +2 -2
mindspore/train/callback/_on_request_exit.py +2 -2
mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
mindspore/train/callback/_summary_collector.py +15 -8
mindspore/train/callback/_time_monitor.py +58 -5
mindspore/train/data_sink.py +5 -11
mindspore/train/dataset_helper.py +84 -57
mindspore/train/loss_scale_manager.py +2 -2
mindspore/train/metrics/__init__.py +3 -3
mindspore/train/metrics/cosine_similarity.py +1 -1
mindspore/train/metrics/hausdorff_distance.py +3 -2
mindspore/train/metrics/mean_surface_distance.py +3 -2
mindspore/train/metrics/metric.py +39 -19
mindspore/train/metrics/roc.py +2 -2
mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
mindspore/train/mind_ir_pb2.py +85 -36
mindspore/train/model.py +187 -47
mindspore/train/serialization.py +487 -161
mindspore/train/summary/_summary_adapter.py +1 -1
mindspore/train/summary/_writer_pool.py +3 -2
mindspore/train/summary/summary_record.py +37 -17
mindspore/train/train_thor/convert_utils.py +3 -3
mindspore/train/train_thor/dataset_helper.py +1 -1
mindspore/version.py +1 -1
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +8 -8
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +477 -528
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -1
mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
mindspore/_akg/akg/tvm/rpc/base.py +0 -182
mindspore/_akg/akg/tvm/rpc/client.py +0 -436
mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
mindspore/_akg/akg/tvm/rpc/server.py +0 -413
mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
mindspore/_extends/graph_kernel/expander.py +0 -80
mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
mindspore/dataset/datapreprocess/__init__.py +0 -20
mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
mindspore/include/api/net.h +0 -142
mindspore/nn/lr_scheduler.py +0 -262
mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
mindspore/rewrite/node_visitor.py +0 -44
/mindspore/{ops/_op_impl/_custom_op/flash_attention → _akg/akg/utils/ascend_profilier}/__init__.py +0 -0
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
{mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0

mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_dsl.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ascend custom op: add by dsl"""
+import tbe.dsl as tbe
+from tbe import tvm
+from tbe.common.register import register_op_compute
+from tbe.common.utils import para_check
+@register_op_compute("add_dsl")
+def add_dsl_compute(x1, x2, y, kernel_name="add_dsl"):
+    res = tbe.vadd(x1, x2)
+    return res
+@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
+                            para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
+def add_dsl(x1, x2, y, kernel_name="add_dsl"):
+    """add dsl impl function"""
+    data_x1 = tvm.placeholder(
+        x1.get("shape"), dtype=x1.get("dtype"), name="data_x1")
+    data_x2 = tvm.placeholder(
+        x2.get("shape"), dtype=x2.get("dtype"), name="data_x2")
+    res = add_dsl_compute(data_x1, data_x2, y, kernel_name)
+    # auto schedule
+    with tvm.target.cce():
+        schedule = tbe.auto_schedule(res)
+    # operator build
+    config = {"name": kernel_name,
+              "tensor_list": [data_x1, data_x2, res]}
+    tbe.build(schedule, config)

mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/add_tik.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ascend custom op: add by tik"""
+from tbe.common.register import register_op_compute
+from tbe.common.utils import para_check
+from tbe import tik
+@register_op_compute("AddTik")
+@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
+                            para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
+def add_tik(x1, x2, y, kernel_name="add_tik"):
+    """add dsl impl function"""
+    tik_instance = tik.Tik()
+    x1_shape = x1.get("shape")
+    x2_shape = x2.get("shape")
+    y_shape = y.get("shape")
+    data_a = tik_instance.Tensor(
+        "float16", x1_shape, name="x1", scope=tik.scope_gm)
+    data_b = tik_instance.Tensor(
+        "float16", x2_shape, name="x2", scope=tik.scope_gm)
+    data_c = tik_instance.Tensor(
+        "float16", y_shape, name="y", scope=tik.scope_gm)
+    data_a_ub = tik_instance.Tensor(
+        "float16", x1_shape, name="data_A_ub", scope=tik.scope_ubuf)
+    data_b_ub = tik_instance.Tensor(
+        "float16", x2_shape, name="data_B_ub", scope=tik.scope_ubuf)
+    data_c_ub = tik_instance.Tensor(
+        "float16", y_shape, name="data_C_ub", scope=tik.scope_ubuf)
+    tik_instance.data_move(data_a_ub, data_a, 0, 1, 128 // 16, 0, 0)
+    tik_instance.data_move(data_b_ub, data_b, 0, 1, 128 // 16, 0, 0)
+    tik_instance.vec_add(
+        128, data_c_ub[0], data_a_ub[0], data_b_ub[0], 1, 8, 8, 8)
+    tik_instance.data_move(data_c, data_c_ub, 0, 1, 128 // 16, 0, 0)
+    tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[data_a, data_b], outputs=[data_c])
+    return tik_instance

mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/kv_cache_mgr.py ADDED Viewed

@@ -0,0 +1,241 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+# Copyright 2023 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ascend custom op: kv_cache_mgr by tik"""
+import functools
+from tbe import tik
+import tbe.common.platform as tbe_platform
+from tbe.common.utils import para_check
+# 'pylint: disable=unused-argument,unused-variable,too-many-arguments,too-many-locals
+def check_supported(past, cur, index, out, kernel_name="kv_cache_mgr"):
+    """check data type and shape"""
+    # check data type
+    past_dtype = past.get("dtype").lower()
+    cur_dtype = cur.get("dtype").lower()
+    out_dtype = out.get("dtype").lower()
+    if past_dtype != cur_dtype or past_dtype != out_dtype:
+        reason = "past_dtype is %s, cur_dtype is %s, out_dtype is %s" % (past_dtype, cur_dtype, out_dtype)
+        return False, reason
+    support_dtype_list = ["float32", "int32", "uint32",
+                          "float16", "int16", "uint16",
+                          "int8", "uint8"]
+    if past_dtype not in support_dtype_list:
+        reason = "past_dtype(%s) is not support" % (past_dtype)
+        return False, reason
+    index_dtype = index.get("dtype").lower()
+    if index_dtype != "int32":
+        reason = "index_dtype is %s, not int32" % (index_dtype)
+        return False, reason
+    # check shape
+    past_shape = past.get("shape")
+    cur_shape = cur.get("shape")
+    if len(past_shape) != 4 or len(cur_shape) != 4:
+        reason = "len(past_shape) != 4 or len(cur_shape) != 4 "
+        return False, reason
+    # key_past shape: (bs, num_heads, size_per_head, seq_length)
+    # value_past shape: (bs, num_heads, seq_length, size_per_head)
+    # key shape: (bs, num_heads, 1, size_per_head)
+    # value shape: (bs, num_heads, 1, size_per_head)
+    if past_shape[0] != cur_shape[0] or past_shape[1] != cur_shape[1]:
+        reason = "past_shape[0] != cur_shape[0] or past_shape[1] != cur_shape[1] "
+        return False, reason
+    if past_shape[3] != cur_shape[3]:
+        reason = "past_shape[3] != cur_shape[3]"
+        return False, reason
+    return True, ""
+def ceil_div(dividend, divisor):
+    return (dividend + divisor - 1) // divisor
+def get_loop_info(total_num, each_loop_num):
+    loop_times = ceil_div(total_num, each_loop_num)
+    last_loop_num = total_num - each_loop_num * (loop_times - 1)
+    return loop_times, last_loop_num
+def elements_align(index_elements, data_size, align_size):
+    """Get element num align to align_size"""
+    total_size = index_elements * data_size
+    aligned_total_size = (total_size + align_size - 1) // align_size * align_size
+    return aligned_total_size // data_size
+class TilingHelper:
+    """Tiling parameter"""
+    def __init__(self, past, cur, index, out, kernel_name="kv_cache_mgr"):
+        self.kernel_name = kernel_name
+        # sys info
+        self.core_num = tbe_platform.get_soc_spec(tbe_platform.CORE_NUM)
+        self.ub_size = tbe_platform.get_soc_spec(tbe_platform.UB_SIZE)
+        self.past_shape = past.get("shape")
+        self.cur_shape = cur.get("shape")
+        self.index_shape = index.get("shape")
+        self.gm_type = past.get("dtype").lower()
+        self.ub_type = self.gm_type
+        self.index_ub_type = "int32"
+        self.int32_size = 4
+        self.gm_dtype_size = 2
+        if self.gm_type in ["int8", "uint8"]:
+            self.gm_dtype_size = 1
+        elif self.gm_type in ["float16", "int16", "uint16"]:
+            self.gm_dtype_size = 2
+        elif self.gm_type in ["float32", "int32", "uint32"]:
+            self.gm_dtype_size = 4
+        # tiling policy
+        self.seq_length = self.past_shape[2]
+        self.size_per_head = self.past_shape[3]
+        self.update_seq_length = self.cur_shape[2]
+        self.num_head = self.past_shape[1]
+        self.past_elements = functools.reduce(lambda a, b: a * b, self.past_shape)
+        self.cur_elements = functools.reduce(lambda a, b: a * b, self.cur_shape)
+        # The `burst` unit is 32B
+        index_elements = functools.reduce(lambda a, b: a * b, self.index_shape)
+        self.index_elements = elements_align(index_elements, self.int32_size, 32)
+        # split cur
+        self.cur_bs = self.cur_shape[0] * self.cur_shape[1]
+        self.each_core_bs_num = ceil_div(self.cur_bs, self.core_num)
+        self.core_num, self.last_core_bs_num = get_loop_info(self.cur_bs, self.each_core_bs_num)
+        self.cur_ub_elements = self.each_core_bs_num * self.update_seq_length * self.size_per_head
+        self.last_cure_ub_elements = self.last_core_bs_num * self.update_seq_length * self.size_per_head
+class KVCacheImpl(TilingHelper):
+    """KVCacheImpl"""
+    def __init__(self, past, cur, index, out, kernel_name):
+        super().__init__(past, cur, index, out, kernel_name)
+        # key_past or value_past shape: (bs, num_heads, seq_length, size_per_head)
+        # batch_valid_length
+        # cur update shape: (bs, num_heads, 1, size_per_head)
+        self.tik_inst = tik.Tik(disable_debug=True)
+        self.past_gm = self.tik_inst.Tensor(self.gm_type, (self.past_elements,), name="past_gm", scope=tik.scope_gm)
+        self.cur_gm = self.tik_inst.Tensor(self.gm_type, (self.cur_elements,), name="cur_gm", scope=tik.scope_gm)
+        self.index_gm = self.tik_inst.Tensor(self.index_ub_type, (self.index_elements,), name="index_gm",
+                                             scope=tik.scope_gm)
+        # we use is_atomic_add=True to set the out_gm zeros. But if inplace update out_gm, no need to set this flag.
+        self.out_gm = self.tik_inst.Tensor(self.gm_type, (self.past_elements,), name="out_gm", scope=tik.scope_gm)
+    def valid_cur_ub_load(self, core_idx):
+        """KVCacheImpl.valid_cur_ub_load"""
+        cur_ub = self.tik_inst.Tensor(self.ub_type, (self.cur_ub_elements,), name="valid_cur_ub",
+                                      scope=tik.scope_ubuf)
+        cur_gm_offset = core_idx * self.cur_ub_elements
+        with self.tik_inst.if_scope(core_idx != self.core_num -1):
+            self.tik_inst.data_move(cur_ub, self.cur_gm[cur_gm_offset:], 0, 1,
+                                    self.cur_ub_elements * self.gm_dtype_size // 32, 0, 0)
+        with self.tik_inst.else_scope():
+            self.tik_inst.data_move(cur_ub, self.cur_gm[cur_gm_offset:], 0, 1,
+                                    self.last_cure_ub_elements * self.gm_dtype_size // 32, 0, 0)
+        return cur_ub
+    def valid_index_ub_load(self):
+        """KVCacheImpl.valid_index_ub_load"""
+        index_ub = self.tik_inst.Tensor(self.index_ub_type, (self.index_elements,), name="valid_index_ub",
+                                        scope=tik.scope_ubuf)
+        self.tik_inst.data_move(index_ub, self.index_gm, 0, 1, self.index_elements * self.int32_size // 32, 0, 0)
+        return index_ub
+    def valid_pos_update(self, core_idx, cur_ub, index_ub, each_core_bs_num):
+        """KVCacheImpl.valid_pos_update"""
+        src_bs_stride = self.update_seq_length * self.size_per_head
+        dst_bs_stride = self.seq_length * self.size_per_head
+        burst_len = self.update_seq_length * self.size_per_head * self.gm_dtype_size // 32
+        valid_idx = self.tik_inst.Scalar(dtype="int32")
+        with self.tik_inst.for_range(0, each_core_bs_num) as each_core_bs_idx:
+            bs_idx = core_idx * self.each_core_bs_num + each_core_bs_idx
+            # because we fused bs * num_head, we need get the real bs_idx
+            valid_idx.set_as(index_ub[bs_idx // self.num_head])
+            with self.tik_inst.if_scope(valid_idx >= 0):
+                dst_offset = bs_idx * dst_bs_stride + valid_idx * self.size_per_head
+                src_offset = each_core_bs_idx * src_bs_stride
+                if burst_len < 65536:
+                    self.tik_inst.data_move(self.out_gm[dst_offset], cur_ub[src_offset],
+                                            0, 1, burst_len, 0, 0)
+                else:
+                    nburst = 1
+                    each_burst_len = burst_len
+                    while each_burst_len > 65535:
+                        nburst += 1
+                        each_burst_len = burst_len // nburst
+                    self.tik_inst.data_move(self.out_gm[dst_offset], cur_ub[src_offset], 0,
+                                            nburst, each_burst_len, 0, 0)
+    # 'pylint: disable=too-many-arguments
+    def compute_each_core(self, core_idx, core_bs_num):
+        """KVCacheImpl.compute_each_core"""
+        index_ub = self.valid_index_ub_load()
+        cur_ub = self.valid_cur_ub_load(core_idx)
+        self.valid_pos_update(core_idx, cur_ub, index_ub, core_bs_num)
+    def compute(self):
+        """KVCacheImpl.compute"""
+        if self.each_core_bs_num == self.last_core_bs_num:
+            with self.tik_inst.for_range(0, self.core_num, block_num=self.core_num) as core_index:
+                self.compute_each_core(core_idx=core_index, core_bs_num=self.each_core_bs_num)
+        else:
+            with self.tik_inst.for_range(0, self.core_num, block_num=self.core_num) as core_index:
+                with self.tik_inst.if_scope(core_index < self.core_num - 1):
+                    self.compute_each_core(core_idx=core_index, core_bs_num=self.each_core_bs_num)
+                with self.tik_inst.else_scope():
+                    self.compute_each_core(core_idx=core_index, core_bs_num=self.last_core_bs_num)
+        self.tik_inst.BuildCCE(kernel_name=self.kernel_name,
+                               inputs=[self.past_gm, self.cur_gm, self.index_gm],
+                               outputs=[self.out_gm],
+                               )
+        return self.tik_inst
+# 'pylint: disable = unused-argument
+# 'pylint: disable=too-many-arguments,too-many-locals
+@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
+                            para_check.REQUIRED_INPUT, para_check.REQUIRED_OUTPUT,
+                            para_check.KERNEL_NAME)
+def kv_cache_mgr(past, cur, index, out, kernel_name="kv_cache_mgr"):
+    """
+    :param past: key_past or value_past. shape: (bs, num_head, seq_length, size_pre_head)
+    :param cur: key_current or value_current. shape: (bs, num_head, update_seq_length, size_pre_head)
+    :param index: which index to update. shape * len(dtype) need be multiples of 32. Option Input.
+    :param out: output shape: (bs, num_head, seq_length, size_pre_head)
+    :param kernel_name: the name of the op
+    :return:
+    """
+    obj = KVCacheImpl(past, cur, index, out, kernel_name)
+    return obj.compute()

mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/ai_core/tbe/custom_aicore_ops_impl/matmul_tik.py ADDED Viewed

@@ -0,0 +1,212 @@
+"""
+Copyright 2020 Huawei Technologies Co., Ltd. All rights reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+matmul_tik
+"""
+from tbe import tik
+from tbe.common.platform import get_soc_spec
+DTYPE_SIZE = {
+    'bool': 1,
+    'uint8': 1,
+    'int8': 1,
+    'uint16': 2,
+    'int16': 2,
+    'int24': 3,
+    'uint32': 4,
+    'int32': 4,
+    'float16': 2,
+    'float32': 4,
+    'int48': 6,
+    'int64': 8,
+    'uint64': 8,
+    'float64': 8
+}
+def MK_TO_K1MK0(tik_instance, mk_input_tensor, k1mk0_tensor, dtype, k1, m, k0):
+    """data move mk to k1mk0"""
+    src_ub = tik_instance.Tensor(dtype, (k1, m, k0), name='src_ub', scope=tik.scope_ubuf)
+    # data_move(m, k) ---> (k1, m, k0)
+    with tik_instance.for_range(0, k1) as i:
+        tik_instance.data_move(src_ub[i * m * k0:], mk_input_tensor[i * k0:], 0, m, k0 * DTYPE_SIZE[dtype] // 32,
+                               (k1 - 1) * k0 * DTYPE_SIZE[dtype] // 32, 0)
+    tik_instance.data_move(k1mk0_tensor, src_ub, 0, 1, k1 * m * k0 * DTYPE_SIZE[dtype] // 32, 0, 0)
+def KN_TO_K1NK0(tik_instance, kn_input_tensor, k1nk0_tensor, dtype, k1, n, k0):
+    """data move kn to k1nk0"""
+    with tik_instance.for_range(0, k1) as index:
+        k1nk0_ub = tik_instance.Tensor(dtype, (n, k0), tik.scope_ubuf, "k1nk0_ub")
+        src_ub = tik_instance.Tensor(dtype, (k0, n), tik.scope_ubuf, "src_ub")
+        burst_len = k0 * n * DTYPE_SIZE[dtype] // 32
+        tik_instance.data_move(src_ub, kn_input_tensor[index * k0 * n], 0, 1, burst_len, 0, 0)
+        dst_list = [k1nk0_ub[16 * i] for i in range(16)]
+        src_list = [src_ub[n * i] for i in range(16)]
+        rep_times = n // k0
+        dst_rep_stride = k0
+        src_rep_stride = 1
+        tik_instance.vec_trans_scatter(False, False, dst_list, src_list, rep_times, dst_rep_stride, src_rep_stride)
+        tik_instance.data_move(k1nk0_tensor[index * k0 * n], k1nk0_ub, 0, 1, burst_len, 0, 0)
+def N1MN0_TO_MN(tik_instance, mn_output_tensor, n1mn0_tensor, dtype, n1, m, n0):
+    """data move mn to n1mn0"""
+    src_ub = tik_instance.Tensor(dtype, (m, n1 * n0), name='src_ub', scope=tik.scope_ubuf)
+    # data_move(n1, m, n0) ---> (m, n)
+    with tik_instance.for_range(0, n1) as i:
+        tik_instance.data_move(src_ub[i * n0:], n1mn0_tensor[i * m * n0:], 0, m,
+                               n0 * DTYPE_SIZE[dtype] // 32, 0, (n1 - 1) * n0 * DTYPE_SIZE[dtype] // 32)
+    tik_instance.data_move(mn_output_tensor, src_ub, 0, 1, m * n1 * n0 * DTYPE_SIZE[dtype] // 32, 0, 0)
+def matmul_tik_compute(params, kernel_name):
+    """
+    matmul tik compute
+    @param params: matmul data
+    @param kernel_name: kernel name
+    @return: tik instance
+    """
+    tik_instance = tik.Tik()
+    if not isinstance(params, dict):
+        params = params.__dict__
+    m_size, k_size, n_size = params['M'], params['K'], params['N']
+    data_type = params["data_type"]
+    m_tiling_size = int(params["m_tiling_size"])
+    n_tiling_size = int(params["n_tiling_size"])
+    k_tiling_size = int(params['k_tiling_size'])
+    m_cycle_times = params["m_cycle_times"]
+    n_cycle_times = params["n_cycle_times"]
+    k_cycle_times = params["k_cycle_times"]
+    # Determine the output type
+    if data_type == "float16":
+        if get_soc_spec("SOC_VERSION") in ["SD3403", "OPTG", "Hi3796CV300CS", "TsnsC"]:
+            C_loc_out_type = "float16"
+        else:
+            C_loc_out_type = "float32"
+        K0 = 16
+    else:
+        C_loc_out_type = "int32"
+        K0 = 32
+    block_size = 16
+    n_thread_num = params['n_thread_num']
+    m_thread_num = params['m_thread_num']
+    k_thread_num = params['k_thread_num']
+    mk_gm_input = tik_instance.Tensor(data_type, (m_size, k_size), name="mk_input_gm", scope=tik.scope_gm)
+    kn_gm_input = tik_instance.Tensor(data_type, (k_size, n_size), name="kn_input_gm", scope=tik.scope_gm)
+    k1mk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, m_size, K0), name="k1mk0_workspace",
+                                          scope=tik.scope_gm, is_workspace=True)
+    k1nk0_workspace = tik_instance.Tensor(data_type, (k_size // K0, n_size, K0), name="k1nk0_workspace",
+                                          scope=tik.scope_gm, is_workspace=True)
+    mn_gm_output = tik_instance.Tensor(C_loc_out_type, (m_size, n_size), tik.scope_gm, name="mn_output_gm")
+    nmk0_workspace = tik_instance.Tensor(C_loc_out_type, (n_size // block_size, m_size, block_size),
+                                         name="nmk0_workspace", scope=tik.scope_gm, is_workspace=True)
+    MK_TO_K1MK0(tik_instance, mk_gm_input, k1mk0_workspace, data_type, k_size // K0, m_size, K0)
+    KN_TO_K1NK0(tik_instance, kn_gm_input, k1nk0_workspace, data_type, k_size // K0, n_size, K0)
+    # Tiling is realized through the for_range() loop.
+    with tik_instance.for_range(0, 2, block_num=1) as core_id:
+        with tik_instance.for_range(0, n_cycle_times // 2, thread_num=n_thread_num) as n_idx:
+            with tik_instance.for_range(0, m_cycle_times, thread_num=m_thread_num) as m_idx:
+                dst_l0c = tik_instance.Tensor(C_loc_out_type, [n_tiling_size // 16, m_tiling_size, 16], name='dst_l0c',
+                                              scope=tik.scope_cbuf_out)
+                with tik_instance.for_range(0, k_cycle_times,
+                                            thread_num=k_thread_num) as k_idx:
+                    # Calculation result data transfer.
+                    inputa_l1 = tik_instance.Tensor(params['data_type'], [k_tiling_size // K0, m_tiling_size, K0],
+                                                    name="A_tiling_l1", scope=tik.scope_cbuf)
+                    tik_instance.data_move(inputa_l1,
+                                           k1mk0_workspace[k_idx * k_tiling_size // K0, m_idx * m_tiling_size, :],
+                                           0, k_tiling_size // K0, m_tiling_size, m_size - m_tiling_size, 0)
+                    inputb_l1 = tik_instance.Tensor(params["data_type"], [k_tiling_size // K0, n_tiling_size, K0],
+                                                    name="B_tiling_l1", scope=tik.scope_cbuf)
+                    if n_size - n_tiling_size > 65535:
+                        with tik_instance.for_range(0, k_tiling_size // K0) \
+                                as dma_k_idx:
+                            tik_instance.data_move(inputb_l1[dma_k_idx, :, :],
+                                                   k1nk0_workspace[k_idx * k_tiling_size // K0 + dma_k_idx,
+                                                                   (core_id * n_cycle_times // 2 + n_idx)
+                                                                   * n_tiling_size, :],
+                                                   0, 1, n_tiling_size, 0, 0)
+                    else:
+                        tik_instance.data_move(inputb_l1, k1nk0_workspace[k_idx * k_tiling_size // K0,
+                                                                          (core_id * n_cycle_times // 2 + n_idx)
+                                                                          * n_tiling_size, :],
+                                               0, k_tiling_size // K0, n_tiling_size, n_size - n_tiling_size, 0)
+                    # Call matmul API to matrix multiplication calculation.
+                    with tik_instance.if_scope(k_idx == 0):
+                        tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size,
+                                            init_l1out=True)
+                    with tik_instance.else_scope():
+                        tik_instance.matmul(dst_l0c, inputa_l1, inputb_l1, m_tiling_size, k_tiling_size, n_tiling_size,
+                                            init_l1out=False)
+                tik_instance.fixpipe(nmk0_workspace[n_tiling_size // 16 * (core_id * n_cycle_times // 2 + n_idx),
+                                                    m_idx * m_tiling_size, :],
+                                     dst_l0c, n_tiling_size // 16,
+                                     m_tiling_size * 16 * DTYPE_SIZE[C_loc_out_type] // 32,
+                                     (m_size - m_tiling_size) * 16 * DTYPE_SIZE[C_loc_out_type] // 32, 0)
+    N1MN0_TO_MN(tik_instance, mn_gm_output, nmk0_workspace, C_loc_out_type, n_size // K0, m_size, K0)
+    tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[mk_gm_input, kn_gm_input], outputs=[mn_gm_output])
+    return tik_instance
+def matmul_tik(input_x1, input_x2, output_y=None, kernel_name="simple_matmul"):
+    """
+    matmul_tik main func
+    Parameters
+    ----------
+    input_x1: input data 1
+    input_x2: input data 2
+    output_y: output dta
+    """
+    shape_a = input_x1.get("ori_shape")
+    shape_b = input_x2.get("ori_shape")
+    m = shape_a[0]
+    k = shape_a[1]
+    n = shape_b[1]
+    data_type = input_x1.get("dtype").lower()
+    params = {
+        'M': m,
+        'K': k,
+        'N': n,
+        'data_type': data_type,
+        'm_tiling_size': 16,
+        'm_cycle_times': 1,
+        'm_thread_num': 1,
+        'n_tiling_size': 64,
+        'n_cycle_times': 16,
+        'n_thread_num': 1,
+        'k_tiling_size': 32,
+        'k_cycle_times': 2,
+        'k_thread_num': 2,
+        'output_y': output_y
+    }
+    return matmul_tik_compute(params, kernel_name)

mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_dsl.py ADDED Viewed

@@ -0,0 +1,46 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ascend custom op: add by dsl"""
+import tbe.dsl as tbe
+from tbe import tvm
+from tbe.common.register import register_op_compute
+from tbe.common.utils import para_check
+@register_op_compute("add_dsl")
+def add_dsl_compute(x1, x2, y, kernel_name="add_dsl"):
+    res = tbe.vadd(x1, x2)
+    return res
+@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
+                            para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
+def add_dsl(x1, x2, y, kernel_name="add_dsl"):
+    """add dsl impl function"""
+    data_x1 = tvm.placeholder(
+        x1.get("shape"), dtype=x1.get("dtype"), name="data_x1")
+    data_x2 = tvm.placeholder(
+        x2.get("shape"), dtype=x2.get("dtype"), name="data_x2")
+    res = add_dsl_compute(data_x1, data_x2, y, kernel_name)
+    # auto schedule
+    with tvm.target.cce():
+        schedule = tbe.auto_schedule(res)
+    # operator build
+    config = {"name": kernel_name,
+              "tensor_list": [data_x1, data_x2, res]}
+    tbe.build(schedule, config)

mindspore/lib/plugin/ascend/custom_aicore_ops/op_impl/vector_core/tbe/custom_aicore_ops_impl/add_tik.py ADDED Viewed

@@ -0,0 +1,51 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""ascend custom op: add by tik"""
+from tbe.common.register import register_op_compute
+from tbe.common.utils import para_check
+from tbe import tik
+@register_op_compute("AddTik")
+@para_check.check_op_params(para_check.REQUIRED_INPUT, para_check.REQUIRED_INPUT,
+                            para_check.REQUIRED_OUTPUT, para_check.KERNEL_NAME)
+def add_tik(x1, x2, y, kernel_name="add_tik"):
+    """add dsl impl function"""
+    tik_instance = tik.Tik()
+    x1_shape = x1.get("shape")
+    x2_shape = x2.get("shape")
+    y_shape = y.get("shape")
+    data_a = tik_instance.Tensor(
+        "float16", x1_shape, name="x1", scope=tik.scope_gm)
+    data_b = tik_instance.Tensor(
+        "float16", x2_shape, name="x2", scope=tik.scope_gm)
+    data_c = tik_instance.Tensor(
+        "float16", y_shape, name="y", scope=tik.scope_gm)
+    data_a_ub = tik_instance.Tensor(
+        "float16", x1_shape, name="data_A_ub", scope=tik.scope_ubuf)
+    data_b_ub = tik_instance.Tensor(
+        "float16", x2_shape, name="data_B_ub", scope=tik.scope_ubuf)
+    data_c_ub = tik_instance.Tensor(
+        "float16", y_shape, name="data_C_ub", scope=tik.scope_ubuf)
+    tik_instance.data_move(data_a_ub, data_a, 0, 1, 128 // 16, 0, 0)
+    tik_instance.data_move(data_b_ub, data_b, 0, 1, 128 // 16, 0, 0)
+    tik_instance.vec_add(
+        128, data_c_ub[0], data_a_ub[0], data_b_ub[0], 1, 8, 8, 8)
+    tik_instance.data_move(data_c, data_c_ub, 0, 1, 128 // 16, 0, 0)
+    tik_instance.BuildCCE(kernel_name=kernel_name, inputs=[data_a, data_b], outputs=[data_c])
+    return tik_instance

mindspore 2.1.0__cp38-none-any.whl → 2.2.11__cp38-none-any.whl

Potentially problematic release.

mindspore 2.1.0cp38-none-any.whl → 2.2.11cp38-none-any.whl