mindspore 2.1.0__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +26 -32
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +12 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +61 -71
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +72 -95
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +47 -11
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +13 -0
- mindspore/common/api.py +173 -258
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +240 -145
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +13 -2
- mindspore/config/super_bar_config.json +4 -2
- mindspore/context.py +143 -59
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +28 -5
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +11 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +59 -66
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +0 -14
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/include/mindapi/base/types.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +316 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +21 -28
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +310 -207
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +82 -41
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +13 -18
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +22 -17
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +78 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -2
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +10 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +4 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +273 -72
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +40 -2
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +167 -189
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -8
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +470 -251
- mindspore/ops/function/random_func.py +86 -56
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +235 -19
- mindspore/ops/operations/__init__.py +25 -17
- mindspore/ops/operations/_grad_ops.py +52 -7
- mindspore/ops/operations/_inner_ops.py +213 -12
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +64 -280
- mindspore/ops/operations/comm_ops.py +105 -57
- mindspore/ops/operations/custom_ops.py +10 -3
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/math_ops.py +185 -138
- mindspore/ops/operations/nn_ops.py +716 -492
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +14 -12
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +6 -10
- mindspore/parallel/shard.py +4 -4
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +17 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +104 -252
- mindspore/profiler/parser/ascend_msprof_generator.py +8 -8
- mindspore/profiler/parser/ascend_op_generator.py +5 -5
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +9 -6
- mindspore/profiler/parser/base_timeline_generator.py +9 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +14 -10
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +37 -21
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +2 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +139 -71
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +525 -577
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +2 -2
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/linalg.py +1 -1
- mindspore/scipy/optimize/minimize.py +7 -3
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +14 -7
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +83 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +185 -45
- mindspore/train/serialization.py +390 -150
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +14 -10
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/METADATA +6 -7
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/RECORD +447 -507
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2020-
|
|
1
|
+
# Copyright 2020-2023 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -438,7 +438,10 @@ class Softmax(Primitive):
|
|
|
438
438
|
|
|
439
439
|
Inputs:
|
|
440
440
|
- **logits** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
|
|
441
|
-
additional dimensions
|
|
441
|
+
additional dimensions. Supported dtypes:
|
|
442
|
+
|
|
443
|
+
- Ascend: float16, float32.
|
|
444
|
+
- GPU/CPU: float16, float32, float64.
|
|
442
445
|
|
|
443
446
|
Outputs:
|
|
444
447
|
Tensor, with the same type and shape as the logits.
|
|
@@ -517,7 +520,11 @@ class Softplus(Primitive):
|
|
|
517
520
|
\text{output} = \log(1 + \exp(\text{x}))
|
|
518
521
|
|
|
519
522
|
Inputs:
|
|
520
|
-
- **input_x** (Tensor) - Tensor of any dimension
|
|
523
|
+
- **input_x** (Tensor) - Tensor of any dimension.
|
|
524
|
+
Supported dtypes:
|
|
525
|
+
|
|
526
|
+
- GPU/CPU: float16, float32, float64.
|
|
527
|
+
- Ascend: float16, float32.
|
|
521
528
|
|
|
522
529
|
Outputs:
|
|
523
530
|
Tensor, with the same type and shape as the `input_x`.
|
|
@@ -626,7 +633,7 @@ class ReLUV3(Primitive):
|
|
|
626
633
|
Inputs:
|
|
627
634
|
- **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
|
|
628
635
|
additional dimensions, data type is
|
|
629
|
-
`number <https://www.mindspore.cn/docs/en/r2.
|
|
636
|
+
`number <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.html#mindspore.dtype>`_.
|
|
630
637
|
|
|
631
638
|
Outputs:
|
|
632
639
|
Tensor of shape :math:`(N, *)`, with the same type and shape as the `input_x`.
|
|
@@ -659,7 +666,11 @@ class Mish(PrimitiveWithInfer):
|
|
|
659
666
|
Refer to :func:`mindspore.ops.mish` for more details.
|
|
660
667
|
|
|
661
668
|
Inputs:
|
|
662
|
-
- **x** (Tensor) - The input Tensor
|
|
669
|
+
- **x** (Tensor) - The input Tensor.
|
|
670
|
+
Supported dtypes:
|
|
671
|
+
|
|
672
|
+
- GPU/CPU: float16, float32, float64.
|
|
673
|
+
- Ascend: float16, float32.
|
|
663
674
|
|
|
664
675
|
Outputs:
|
|
665
676
|
Tensor, with the same type and shape as the `x`.
|
|
@@ -745,7 +756,9 @@ class ReLU6(PrimitiveWithCheck):
|
|
|
745
756
|
Refer to :func:`mindspore.ops.relu6` for more details.
|
|
746
757
|
|
|
747
758
|
Inputs:
|
|
748
|
-
- **input_x** (Tensor) -
|
|
759
|
+
- **input_x** (Tensor) - Tensor of shape :math:`(N, *)`,
|
|
760
|
+
where :math:`*` means any number of additional dimensions.
|
|
761
|
+
Data type must be float16, float32.
|
|
749
762
|
|
|
750
763
|
Outputs:
|
|
751
764
|
Tensor, with the same type and shape as the `input_x`.
|
|
@@ -1216,54 +1229,6 @@ class InstanceNormV2(Primitive):
|
|
|
1216
1229
|
validator.check_bool(is_training, "is_training", self.name)
|
|
1217
1230
|
|
|
1218
1231
|
|
|
1219
|
-
class BNTrainingReduce(Primitive):
|
|
1220
|
-
"""
|
|
1221
|
-
The BNTrainingReduce interface is deprecated, please use the :class:`mindspore.ops.BatchNorm` instead.
|
|
1222
|
-
|
|
1223
|
-
Supported Platforms:
|
|
1224
|
-
Deprecated
|
|
1225
|
-
"""
|
|
1226
|
-
|
|
1227
|
-
@deprecated("1.5", "ops.BatchNorm", False)
|
|
1228
|
-
@prim_attr_register
|
|
1229
|
-
def __init__(self, data_format="NCHW"):
|
|
1230
|
-
"""Initialize BNTrainingReduce."""
|
|
1231
|
-
self.init_prim_io_names(inputs=['x'], outputs=['sum', 'square_sum'])
|
|
1232
|
-
self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
|
|
1233
|
-
if context.get_context("device_target") != "GPU" and self.format == "NHWC":
|
|
1234
|
-
raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
|
|
1235
|
-
f"but got the 'data_format' is {self.format} and "
|
|
1236
|
-
f"the platform is {context.get_context('device_target')}.")
|
|
1237
|
-
self.add_prim_attr('data_format', self.format)
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
class BNTrainingUpdate(Primitive):
|
|
1241
|
-
"""
|
|
1242
|
-
The BNTrainingUpdate interface is deprecated, please use the :class:`mindspore.ops.BatchNorm` instead.
|
|
1243
|
-
|
|
1244
|
-
Supported Platforms:
|
|
1245
|
-
Deprecated
|
|
1246
|
-
"""
|
|
1247
|
-
|
|
1248
|
-
@deprecated("1.5", "ops.BatchNorm", False)
|
|
1249
|
-
@prim_attr_register
|
|
1250
|
-
def __init__(self, isRef=True, epsilon=1e-5, factor=0.1, data_format="NCHW"):
|
|
1251
|
-
"""Initialize BNTrainingUpdate."""
|
|
1252
|
-
self.init_prim_io_names(inputs=['x', 'sum', 'square_sum', 'scale', 'b', 'mean', 'variance'],
|
|
1253
|
-
outputs=['y', 'running_mean', 'running_variance', 'save_mean', 'save_inv_variance'])
|
|
1254
|
-
validator.check_value_type("isRef", isRef, [bool], self.name)
|
|
1255
|
-
validator.check_value_type("epsilon", epsilon, [float], self.name)
|
|
1256
|
-
validator.check_value_type("factor", factor, [float], self.name)
|
|
1257
|
-
self.epsilon = validator.check_float_range(epsilon, 0, 1, validator.INC_RIGHT, 'epsilon', 'BNTrainingUpdate')
|
|
1258
|
-
self.factor = validator.check_float_range(factor, 0, 1, validator.INC_BOTH, 'factor', 'BNTrainingUpdate')
|
|
1259
|
-
self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
|
|
1260
|
-
if context.get_context("device_target") != "GPU" and self.format == "NHWC":
|
|
1261
|
-
raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
|
|
1262
|
-
f"but got the 'data_format' is {self.format} and "
|
|
1263
|
-
f"the platform is {context.get_context('device_target')}.")
|
|
1264
|
-
self.add_prim_attr('data_format', self.format)
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
1232
|
class BatchNorm(PrimitiveWithInfer):
|
|
1268
1233
|
r"""
|
|
1269
1234
|
Batch Normalization for input data and updated parameters.
|
|
@@ -1400,33 +1365,40 @@ class Conv2D(Primitive):
|
|
|
1400
1365
|
2D convolution layer.
|
|
1401
1366
|
|
|
1402
1367
|
Applies a 2D convolution over an input tensor which is typically of shape :math:`(N, C_{in}, H_{in}, W_{in})`,
|
|
1403
|
-
where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is height, :math:`W` is width
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
For each batch of shape :math:`(C_{in}, H_{in}, W_{in})`, the formula is defined as:
|
|
1368
|
+
where :math:`N` is batch size, :math:`C` is channel number, :math:`H` is feature height, :math:`W` is feature width.
|
|
1369
|
+
|
|
1370
|
+
The output is calculated based on formula:
|
|
1407
1371
|
|
|
1408
1372
|
.. math::
|
|
1409
1373
|
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
:math:`
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
:math:`
|
|
1428
|
-
|
|
1429
|
-
|
|
1374
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
1375
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
1376
|
+
|
|
1377
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
1378
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
1379
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
1380
|
+
|
|
1381
|
+
Here are the indices' meanings:
|
|
1382
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
1383
|
+
|
|
1384
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
1385
|
+
output channels, which is also equal to the number of kernels.
|
|
1386
|
+
|
|
1387
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
1388
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
1389
|
+
|
|
1390
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
1391
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
1392
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
1393
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
1394
|
+
|
|
1395
|
+
The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
|
|
1396
|
+
where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
|
|
1397
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
1398
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
|
|
1399
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
1400
|
+
|
|
1401
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
1430
1402
|
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
|
|
1431
1403
|
|
|
1432
1404
|
Note:
|
|
@@ -1434,57 +1406,72 @@ class Conv2D(Primitive):
|
|
|
1434
1406
|
That is, when `group>1`, condition `in\_channels` = `out\_channels` = `group` must be satisfied.
|
|
1435
1407
|
|
|
1436
1408
|
Args:
|
|
1437
|
-
out_channel (int):
|
|
1438
|
-
kernel_size (Union[int, tuple[int]]):
|
|
1439
|
-
|
|
1440
|
-
the
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
Otherwise, the last extra padding will be calculated from the bottom and the right side.
|
|
1409
|
+
out_channel (int): Specifies output channel :math:`C_{out}`.
|
|
1410
|
+
kernel_size (Union[int, tuple[int]]): Specifies the height and width of the 2D convolution kernel.
|
|
1411
|
+
It can be a single int or a tuple of 2 integers. A single int means the value is for both the height
|
|
1412
|
+
and the width. A tuple of 2 ints means the first value is for the height and the other is for the width.
|
|
1413
|
+
mode (int, optional): Modes for different convolutions. The value is currently not used. Default: ``1`` .
|
|
1414
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
1415
|
+
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
1416
|
+
|
|
1417
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
1418
|
+
are the same when `stride` is set to ``1``.
|
|
1419
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
1420
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
1450
1421
|
If this mode is set, `pad` must be 0.
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1422
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
1423
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
1424
|
+
be discarded. If this mode is set, `pad` must be 0.
|
|
1425
|
+
- ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
1426
|
+
in the height and width directions is determined by the `pad` parameter.
|
|
1427
|
+
If this mode is set, `pad` must be greater than or equal to 0.
|
|
1428
|
+
|
|
1429
|
+
pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input
|
|
1430
|
+
when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 4 ints.
|
|
1431
|
+
If `pad` is one integer, the paddings of top, bottom, left and right are the same, equal to `pad`.
|
|
1432
|
+
If `pad` is a tuple with four integers, the paddings of top, bottom, left and right will be equal to pad[0],
|
|
1433
|
+
pad[1], pad[2], and pad[3] accordingly. Default: ``0`` .
|
|
1434
|
+
stride (Union(int, tuple[int]), optional): Specifies the stride of the convolution kernel's movement.
|
|
1435
|
+
It can be a single int or a tuple of two or four ints. A single int means the stride is the same in
|
|
1436
|
+
both the height and width directions. A tuple of two ints indicates the strides in the height and
|
|
1437
|
+
width directions, respectively. For a tuple of four ints, the two ints correspond to (N, C) dimension
|
|
1438
|
+
are treated as 1, and the two correspond to (H, W) dimensions is the step size in the height
|
|
1439
|
+
and width directions respectively. Default: ``1`` .
|
|
1440
|
+
dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
|
|
1441
|
+
It can be a single int or a tuple of 2 or 4 integers. A single int means the dilation size is the same
|
|
1442
|
+
in both the height and width directions. A tuple of two ints represents the dilation size in
|
|
1443
|
+
the height and width directions, respectively. For a tuple of four ints, the two ints correspond
|
|
1444
|
+
to (N, C) dimension are treated as 1, and the two correspond to (H, W) dimensions is the
|
|
1445
|
+
dilation size in the height and width directions respectively.
|
|
1446
|
+
Assuming :math:`dilation=(d0, d1)`, the convolutional kernel samples the input with a
|
|
1447
|
+
spacing of :math:`d0-1` elements in the height direction and :math:`d1-1` elements in the width direction.
|
|
1448
|
+
The values in the height and width dimensions are in the ranges [1, H] and [1, W], respectively.
|
|
1449
|
+
Default: ``1`` .
|
|
1450
|
+
group (int, optional): Specifies the number of groups dividing `x`'s input channel when applying
|
|
1451
|
+
group convolution. Default: ``1`` .
|
|
1452
|
+
data_format (str, optional): The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
|
|
1453
|
+
Default: ``"NCHW"`` .
|
|
1471
1454
|
|
|
1472
1455
|
Inputs:
|
|
1473
|
-
- **x** (Tensor) -
|
|
1474
|
-
|
|
1475
|
-
|
|
1456
|
+
- **x** (Tensor) - Input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or
|
|
1457
|
+
:math:`(N, H_{in}, W_{in}, C_{in}, )` depending on `data_format` .
|
|
1458
|
+
- **weight** (Tensor) - The convolutional kernel value, it should has shape
|
|
1459
|
+
:math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})` .
|
|
1476
1460
|
|
|
1477
1461
|
Outputs:
|
|
1478
|
-
Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})
|
|
1462
|
+
Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`
|
|
1463
|
+
or :math:`(N, H_{out}, W_{out}, C_{out}, )`.
|
|
1464
|
+
To see how different pad modes affect the output shape, please refer to
|
|
1465
|
+
:class:`mindspore.nn.Conv2d` for more details.
|
|
1479
1466
|
|
|
1480
1467
|
Raises:
|
|
1481
1468
|
TypeError: If `kernel_size`, `stride`, `pad` or `dilation` is neither an int nor a tuple.
|
|
1482
1469
|
TypeError: If `out_channel` or `group` is not an int.
|
|
1483
1470
|
ValueError: If `kernel_size`, `stride` or `dilation` is less than 1.
|
|
1484
|
-
ValueError: If `pad_mode` is not one of 'same'
|
|
1471
|
+
ValueError: If `pad_mode` is not one of ``'same'``, ``'valid'`` or ``'pad'``.
|
|
1485
1472
|
ValueError: If `pad` is a tuple whose length is not equal to 4.
|
|
1486
|
-
ValueError: If `pad_mode` it not equal to 'pad' and `pad` is not equal to (0, 0, 0, 0)
|
|
1487
|
-
ValueError: If `data_format` is neither '
|
|
1473
|
+
ValueError: If `pad_mode` it not equal to ``'pad'`` and `pad` is not equal to ``(0, 0, 0, 0)``.
|
|
1474
|
+
ValueError: If `data_format` is neither ``'NHWC'`` nor ``'NCHW'`` .
|
|
1488
1475
|
|
|
1489
1476
|
Supported Platforms:
|
|
1490
1477
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -1493,12 +1480,49 @@ class Conv2D(Primitive):
|
|
|
1493
1480
|
>>> import mindspore
|
|
1494
1481
|
>>> import numpy as np
|
|
1495
1482
|
>>> from mindspore import Tensor, ops
|
|
1483
|
+
>>> # case 1: All parameters use default values.
|
|
1496
1484
|
>>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
|
|
1497
1485
|
>>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
|
|
1498
1486
|
>>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3)
|
|
1499
1487
|
>>> output = conv2d(x, weight)
|
|
1500
1488
|
>>> print(output.shape)
|
|
1501
1489
|
(10, 32, 30, 30)
|
|
1490
|
+
>>> # case 2: pad_mode="pad", other parameters being default.
|
|
1491
|
+
>>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
|
|
1492
|
+
>>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
|
|
1493
|
+
>>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad", pad=(4, 10, 4, 10))
|
|
1494
|
+
>>> output = conv2d(x, weight)
|
|
1495
|
+
>>> print(output.shape)
|
|
1496
|
+
(10, 32, 44, 44)
|
|
1497
|
+
>>> # case 3: stride=(2, 4), other parameters being default.
|
|
1498
|
+
>>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
|
|
1499
|
+
>>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
|
|
1500
|
+
>>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, stride=(2, 4))
|
|
1501
|
+
>>> output = conv2d(x, weight)
|
|
1502
|
+
>>> print(output.shape)
|
|
1503
|
+
(10, 32, 15, 8)
|
|
1504
|
+
>>> # case 4: dilation=2, other parameters being default.
|
|
1505
|
+
>>> x = Tensor(np.ones([10, 32, 32, 32]), mindspore.float32)
|
|
1506
|
+
>>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
|
|
1507
|
+
>>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, dilation=2)
|
|
1508
|
+
>>> output = conv2d(x, weight)
|
|
1509
|
+
>>> print(output.shape)
|
|
1510
|
+
(10, 32, 28, 28)
|
|
1511
|
+
>>> # case 5: group=2, other parameters being default.
|
|
1512
|
+
>>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32)
|
|
1513
|
+
>>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
|
|
1514
|
+
>>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, group=2)
|
|
1515
|
+
>>> output = conv2d(x, weight)
|
|
1516
|
+
>>> print(output.shape)
|
|
1517
|
+
(10, 32, 30, 30)
|
|
1518
|
+
>>> # case 6: All parameters are specified.
|
|
1519
|
+
>>> x = Tensor(np.ones([10, 64, 32, 32]), mindspore.float32)
|
|
1520
|
+
>>> weight = Tensor(np.ones([32, 32, 3, 3]), mindspore.float32)
|
|
1521
|
+
>>> conv2d = ops.Conv2D(out_channel=32, kernel_size=3, pad_mode="pad",
|
|
1522
|
+
... pad=(4, 10, 4, 10), stride=(2, 4), dilation=2, group=2)
|
|
1523
|
+
>>> output = conv2d(x, weight)
|
|
1524
|
+
>>> print(output.shape)
|
|
1525
|
+
(10, 32, 21, 11)
|
|
1502
1526
|
"""
|
|
1503
1527
|
|
|
1504
1528
|
@prim_attr_register
|
|
@@ -1779,8 +1803,13 @@ class _Pool(PrimitiveWithInfer):
|
|
|
1779
1803
|
out_w = math.ceil(input_w / stride_w)
|
|
1780
1804
|
out_shape = [batch, channel, out_h, out_w] if self.format == "NCHW" else [batch, out_h, out_w, channel]
|
|
1781
1805
|
|
|
1782
|
-
|
|
1783
|
-
|
|
1806
|
+
is_dynamic_shape = False
|
|
1807
|
+
for in_shape_val in x_shape_norm:
|
|
1808
|
+
if in_shape_val == -1:
|
|
1809
|
+
is_dynamic_shape = True
|
|
1810
|
+
|
|
1811
|
+
for out_shape_val in out_shape:
|
|
1812
|
+
if out_shape_val <= 0 and not is_dynamic_shape:
|
|
1784
1813
|
raise ValueError(f"For '{self.name}', the each element of the output shape must be larger than 0, "
|
|
1785
1814
|
f"but got output shape: {out_shape}. The input shape: {x_shape}, "
|
|
1786
1815
|
f"kernel size: {self.kernel_size}, strides: {self.strides}."
|
|
@@ -1814,22 +1843,26 @@ class MaxPool(_Pool):
|
|
|
1814
1843
|
strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
|
|
1815
1844
|
not only the height of movement but also the width of movement, or a tuple of two int numbers that
|
|
1816
1845
|
represent height and width of movement respectively. Default: ``1`` .
|
|
1817
|
-
pad_mode (str):
|
|
1818
|
-
Default: ``"valid"`` .
|
|
1846
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
1847
|
+
``"same"`` or ``"valid"`` . Default: ``"valid"`` .
|
|
1819
1848
|
|
|
1820
|
-
- ``"same"``:
|
|
1821
|
-
|
|
1822
|
-
|
|
1823
|
-
|
|
1849
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
1850
|
+
are the same when `stride` is set to ``1``.
|
|
1851
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
1852
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
1853
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
1854
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
1855
|
+
be discarded.
|
|
1824
1856
|
|
|
1825
|
-
- ``"valid"``: Adopts the way of discarding. The possible largest height and width of output
|
|
1826
|
-
will be returned without padding. Extra pixels will be discarded.
|
|
1827
1857
|
data_format (str) : The optional value for data format, is ``'NHWC'`` or ``'NCHW'`` .
|
|
1828
1858
|
Default: ``'NCHW'`` .
|
|
1829
1859
|
|
|
1830
1860
|
Inputs:
|
|
1831
1861
|
- **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
|
|
1832
|
-
Supported dtypes:
|
|
1862
|
+
Supported dtypes:
|
|
1863
|
+
|
|
1864
|
+
- CPU: float16, float32, float64.
|
|
1865
|
+
- GPU/Ascend: float16, float32.
|
|
1833
1866
|
|
|
1834
1867
|
Outputs:
|
|
1835
1868
|
Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
|
|
@@ -1887,16 +1920,17 @@ class MaxPoolV1(Primitive):
|
|
|
1887
1920
|
strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents
|
|
1888
1921
|
the height and width of movement are both strides, or a tuple of two integers that
|
|
1889
1922
|
represent height and width of movement, respectively. Default: ``1`` .
|
|
1890
|
-
pad_mode (str):
|
|
1891
|
-
Default: ``"valid"`` .
|
|
1923
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
1924
|
+
``"same"`` or ``"valid"`` . Default: ``"valid"`` .
|
|
1892
1925
|
|
|
1893
|
-
- ``"same"``:
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
1926
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
1927
|
+
are the same when `stride` is set to ``1``.
|
|
1928
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
1929
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
1930
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
1931
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
1932
|
+
be discarded.
|
|
1897
1933
|
|
|
1898
|
-
- ``"valid"``: Adopts the way of discarding. The possible largest height and width of the
|
|
1899
|
-
output will be returned without padding. Extra pixels will be discarded.
|
|
1900
1934
|
data_format (str) : The optional value for data format, is ``'NCHW'`` or ``'NHWC'`` .
|
|
1901
1935
|
Default: ``'NCHW'`` .
|
|
1902
1936
|
|
|
@@ -1956,56 +1990,6 @@ class MaxPoolV1(Primitive):
|
|
|
1956
1990
|
self.add_prim_attr("kernel_size", kernel_size_adapted)
|
|
1957
1991
|
self.add_prim_attr("strides", strides_adapted)
|
|
1958
1992
|
|
|
1959
|
-
|
|
1960
|
-
class MaxPoolWithArgmax(Primitive):
|
|
1961
|
-
r"""
|
|
1962
|
-
:class:`mindspore.ops.MaxPoolWithArgmax` is deprecated from version 2.0 and will be removed in a future version,
|
|
1963
|
-
use :class:`mindspore.ops.MaxPoolWithArgmaxV2` instead.
|
|
1964
|
-
|
|
1965
|
-
Supported Platforms:
|
|
1966
|
-
Deprecated
|
|
1967
|
-
|
|
1968
|
-
Examples:
|
|
1969
|
-
>>> import mindspore
|
|
1970
|
-
>>> import numpy as np
|
|
1971
|
-
>>> from mindspore import Tensor, ops
|
|
1972
|
-
>>> x = Tensor(np.arange(1 * 3 * 3 * 4).reshape((1, 3, 3, 4)), mindspore.float32)
|
|
1973
|
-
>>> maxpool_arg_op = ops.MaxPoolWithArgmax(pad_mode="VALID", kernel_size=2, strides=1)
|
|
1974
|
-
>>> output_tensor, argmax = maxpool_arg_op(x)
|
|
1975
|
-
>>> print(output_tensor)
|
|
1976
|
-
[[[[ 5. 6. 7.]
|
|
1977
|
-
[ 9. 10. 11.]]
|
|
1978
|
-
[[17. 18. 19.]
|
|
1979
|
-
[21. 22. 23.]]
|
|
1980
|
-
[[29. 30. 31.]
|
|
1981
|
-
[33. 34. 35.]]]]
|
|
1982
|
-
"""
|
|
1983
|
-
|
|
1984
|
-
@deprecated("2.0", "ops.MaxPoolWithArgmaxV2", False)
|
|
1985
|
-
@prim_attr_register
|
|
1986
|
-
def __init__(self, kernel_size=1, strides=1, pad_mode="valid", data_format="NCHW"):
|
|
1987
|
-
"""Initialize MaxPoolWithArgmax."""
|
|
1988
|
-
self.init_prim_io_names(inputs=['x'], outputs=['output', 'mask'])
|
|
1989
|
-
validator.check_value_type('kernel_size', kernel_size, [int, tuple], self.name)
|
|
1990
|
-
validator.check_value_type('strides', strides, [int, tuple], self.name)
|
|
1991
|
-
validator.check_value_type('pad_mode', pad_mode, [str], self.name)
|
|
1992
|
-
self.pad_mode = validator.check_string(pad_mode.upper(), ['VALID', 'SAME'], 'pad_mode', self.name)
|
|
1993
|
-
self.add_prim_attr("pad_mode", self.pad_mode)
|
|
1994
|
-
self.format = validator.check_string(data_format, ['NCHW', 'NHWC'], 'format', self.name)
|
|
1995
|
-
if context.get_context("device_target") != "GPU" and self.format == "NHWC":
|
|
1996
|
-
raise ValueError(f"For '{self.name}', the 'NHWC' format is only supported in GPU target, "
|
|
1997
|
-
f"but got the 'data_format' is {self.format} and "
|
|
1998
|
-
f"the platform is {context.get_context('device_target')}.")
|
|
1999
|
-
self.kernel_size = _check_positive_int_or_tuple(
|
|
2000
|
-
"kernel_size", kernel_size, self.name, allow_four=False, ret_four=True)
|
|
2001
|
-
self.kernel_size = (1, self.kernel_size[-2], self.kernel_size[-1], 1)
|
|
2002
|
-
self.add_prim_attr("kernel_size", self.kernel_size)
|
|
2003
|
-
|
|
2004
|
-
self.strides = _check_positive_int_or_tuple("strides", strides, self.name, allow_four=False, ret_four=True)
|
|
2005
|
-
self.strides = (1, self.strides[-2], self.strides[-1], 1)
|
|
2006
|
-
self.add_prim_attr("strides", self.strides)
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
1993
|
class MaxPool3D(Primitive):
|
|
2010
1994
|
r"""
|
|
2011
1995
|
Applies a 3D max pooling over an input Tensor which can be regarded as a composition of 3D planes.
|
|
@@ -2026,19 +2010,21 @@ class MaxPool3D(Primitive):
|
|
|
2026
2010
|
strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
|
|
2027
2011
|
not only the depth, height of movement but also the width of movement,, or a tuple of three int numbers that
|
|
2028
2012
|
represent depth, height and width of movement respectively. Default: ``1`` .
|
|
2029
|
-
pad_mode (str):
|
|
2030
|
-
Default: ``"VALID"`` .
|
|
2031
|
-
|
|
2032
|
-
- ``"SAME"``:
|
|
2033
|
-
|
|
2034
|
-
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
|
|
2039
|
-
|
|
2040
|
-
|
|
2041
|
-
|
|
2013
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
2014
|
+
``"SAME"`` , ``"VALID"`` or ``"PAD"`` . Default: ``"VALID"`` .
|
|
2015
|
+
|
|
2016
|
+
- ``"SAME"``: Pad the input around its depth/height/width dimension so that the shape of input and output
|
|
2017
|
+
are the same when `stride` is set to ``1``.
|
|
2018
|
+
The amount of padding to is calculated by the operator internally. If the amount is even,
|
|
2019
|
+
it isuniformly distributed around the input, if it is odd, the excess amount goes
|
|
2020
|
+
to the front/right/bottom side.
|
|
2021
|
+
If this mode is set, `pad_list` must be 0.
|
|
2022
|
+
- ``"VALID"``: No padding is applied to the input, and the output returns the maximum
|
|
2023
|
+
possible depth, height and width. Extra pixels that could not complete a full stride will
|
|
2024
|
+
be discarded. If this mode is set, `pad_list` must be 0.
|
|
2025
|
+
- ``"PAD"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
2026
|
+
in the depth, height and width dimension is determined by the `pad_list` parameter.
|
|
2027
|
+
If this mode is set, `pad_list` must be greater than or equal to 0.
|
|
2042
2028
|
|
|
2043
2029
|
pad_list (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the
|
|
2044
2030
|
paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of six
|
|
@@ -2347,14 +2333,17 @@ class AvgPool(Primitive):
|
|
|
2347
2333
|
strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
|
|
2348
2334
|
the height and width of movement are both strides, or a tuple of two int numbers that
|
|
2349
2335
|
represent height and width of movement respectively. Default: ``1`` .
|
|
2350
|
-
pad_mode (str, optional):
|
|
2351
|
-
Default: ``
|
|
2336
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
2337
|
+
``"same"`` or ``"valid"`` . Default: ``"valid"`` .
|
|
2352
2338
|
|
|
2353
|
-
- ``
|
|
2354
|
-
|
|
2339
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
2340
|
+
are the same when `stride` is set to ``1``.
|
|
2341
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
2342
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
2343
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
2344
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
2345
|
+
be discarded.
|
|
2355
2346
|
|
|
2356
|
-
- ``'valid'``: Returns the output of the valid calculation without filling. Redundant pixels that
|
|
2357
|
-
do not satisfy the calculation will be discarded.
|
|
2358
2347
|
data_format (str, optional): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
|
|
2359
2348
|
Default: ``'NCHW'`` .
|
|
2360
2349
|
|
|
@@ -2451,16 +2440,17 @@ class AvgPoolV1(Primitive):
|
|
|
2451
2440
|
strides (Union[int, tuple[int]]): The distance of kernel moving, an integer that represents
|
|
2452
2441
|
the height and width of movement are both strides, or a tuple of two integers that
|
|
2453
2442
|
represent height and width of movement, respectively. Default: ``1`` .
|
|
2454
|
-
pad_mode (str):
|
|
2455
|
-
Default: ``"valid"`` .
|
|
2443
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
2444
|
+
``"same"`` or ``"valid"`` . Default: ``"valid"`` .
|
|
2456
2445
|
|
|
2457
|
-
- ``"same"``:
|
|
2458
|
-
the
|
|
2459
|
-
|
|
2460
|
-
|
|
2446
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
2447
|
+
are the same when `stride` is set to ``1``.
|
|
2448
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
2449
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
2450
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
2451
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
2452
|
+
be discarded.
|
|
2461
2453
|
|
|
2462
|
-
- ``"valid"``: Adopts the way of discarding. The largest possible height and width of output
|
|
2463
|
-
will be returned without padding. Extra pixels will be discarded.
|
|
2464
2454
|
data_format (str): The format of input and output data. Should be ``'NHWC'`` or ``'NCHW'`` .
|
|
2465
2455
|
Default: ``'NCHW'`` .
|
|
2466
2456
|
|
|
@@ -2708,8 +2698,21 @@ class Conv2DTranspose(Conv2DBackpropInput):
|
|
|
2708
2698
|
Args:
|
|
2709
2699
|
out_channel (int): The dimensionality of the output space.
|
|
2710
2700
|
kernel_size (Union[int, tuple[int]]): The size of the convolution window.
|
|
2711
|
-
pad_mode (str):
|
|
2712
|
-
Default: ``"valid"`` .
|
|
2701
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
2702
|
+
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
2703
|
+
|
|
2704
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
2705
|
+
are the same when `stride` is set to ``1``.
|
|
2706
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
2707
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
2708
|
+
If this mode is set, `pad` must be 0.
|
|
2709
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
2710
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
2711
|
+
be discarded. If this mode is set, `pad` must be 0.
|
|
2712
|
+
- ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
2713
|
+
in the height and width directions is determined by the `pad` parameter.
|
|
2714
|
+
If this mode is set, `pad` must be greater than or equal to 0.
|
|
2715
|
+
|
|
2713
2716
|
Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`.
|
|
2714
2717
|
pad (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the paddings
|
|
2715
2718
|
of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers,
|
|
@@ -2779,9 +2782,13 @@ class BiasAdd(Primitive):
|
|
|
2779
2782
|
Default is ``"NCHW"`` .
|
|
2780
2783
|
|
|
2781
2784
|
Inputs:
|
|
2782
|
-
- **input_x** (Tensor) - The input tensor. The shape can be 2-5 dimensions.
|
|
2785
|
+
- **input_x** (Tensor) - The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
|
|
2786
|
+
|
|
2787
|
+
- Ascend/CPU: all Number type.
|
|
2788
|
+
- GPU: float16, float32, int8.
|
|
2789
|
+
|
|
2783
2790
|
- **bias** (Tensor) - The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
|
|
2784
|
-
`input_x`.
|
|
2791
|
+
`input_x`. It has the same type as `input_x`.
|
|
2785
2792
|
|
|
2786
2793
|
Outputs:
|
|
2787
2794
|
Tensor, with the same shape and data type as `input_x`.
|
|
@@ -2790,7 +2797,7 @@ class BiasAdd(Primitive):
|
|
|
2790
2797
|
TypeError: If `data_format` is not a str.
|
|
2791
2798
|
ValueError: If value of `data_format` is not in the range of ['NHWC','NCHW','NCDHW'].
|
|
2792
2799
|
TypeError: If `input_x` or `bias` is not a Tensor.
|
|
2793
|
-
TypeError: If dtype of `input_x`
|
|
2800
|
+
TypeError: If dtype of `input_x` and `bias` is inconsistent.
|
|
2794
2801
|
TypeError: If dimension of `input_x` is not in the range [2, 5].
|
|
2795
2802
|
|
|
2796
2803
|
Supported Platforms:
|
|
@@ -2820,7 +2827,7 @@ class NLLLoss(Primitive):
|
|
|
2820
2827
|
r"""
|
|
2821
2828
|
Gets the negative log likelihood loss between logits and labels.
|
|
2822
2829
|
|
|
2823
|
-
The nll loss with reduction=none can be described as:
|
|
2830
|
+
The nll loss with :math:`reduction = none` can be described as:
|
|
2824
2831
|
|
|
2825
2832
|
.. math::
|
|
2826
2833
|
|
|
@@ -2831,7 +2838,7 @@ class NLLLoss(Primitive):
|
|
|
2831
2838
|
where :math:`x` is the logits, :math:`t` is the labels, :math:`w` is the weight,
|
|
2832
2839
|
N is the batch size, :math:`c` belonging to [0, C-1] is class index, where :math:`C` is the number of classes.
|
|
2833
2840
|
|
|
2834
|
-
If reduction
|
|
2841
|
+
If :math:`reduction \neq none` (default ``'mean'`` ), then
|
|
2835
2842
|
|
|
2836
2843
|
.. math::
|
|
2837
2844
|
|
|
@@ -2841,8 +2848,13 @@ class NLLLoss(Primitive):
|
|
|
2841
2848
|
\end{array}\right.
|
|
2842
2849
|
|
|
2843
2850
|
Args:
|
|
2844
|
-
reduction (str): Apply specific reduction method to the output: ``
|
|
2845
|
-
|
|
2851
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
2852
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
2853
|
+
|
|
2854
|
+
- ``'none'``: no reduction will be applied.
|
|
2855
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
2856
|
+
- ``'sum'``: the output elements will be summed.
|
|
2857
|
+
|
|
2846
2858
|
ignore_index (int): Specifies a target value that is ignored
|
|
2847
2859
|
and does not contribute to the input gradient. Default: ``-100`` .
|
|
2848
2860
|
|
|
@@ -2856,8 +2868,9 @@ class NLLLoss(Primitive):
|
|
|
2856
2868
|
Outputs:
|
|
2857
2869
|
Tuple of 2 tensors composed with `loss` and `total_weight`.
|
|
2858
2870
|
|
|
2859
|
-
- **loss** (Tensor) - When `reduction` is 'none' and `logits` is a 2D tensor,
|
|
2860
|
-
Otherwise, the `loss` is a scalar.
|
|
2871
|
+
- **loss** (Tensor) - When `reduction` is ``'none'`` and `logits` is a 2D tensor,
|
|
2872
|
+
the `loss` shape is :math:`(N,)`. Otherwise, the `loss` is a scalar.
|
|
2873
|
+
The data type is the same with `input's`.
|
|
2861
2874
|
- **total_weight** (Tensor) - The `total_weight` is a scalar. The data type is the same with `weight's`.
|
|
2862
2875
|
|
|
2863
2876
|
Raises:
|
|
@@ -3155,6 +3168,10 @@ class SmoothL1Loss(Primitive):
|
|
|
3155
3168
|
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3156
3169
|
``'sum'`` . Default: ``'none'`` .
|
|
3157
3170
|
|
|
3171
|
+
- ``'none'``: no reduction will be applied.
|
|
3172
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
3173
|
+
- ``'sum'``: the output elements will be summed.
|
|
3174
|
+
|
|
3158
3175
|
Inputs:
|
|
3159
3176
|
- **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64.
|
|
3160
3177
|
- **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`.
|
|
@@ -3202,12 +3219,12 @@ class MultiMarginLoss(Primitive):
|
|
|
3202
3219
|
Args:
|
|
3203
3220
|
p (int, optional): The norm degree for pairwise distance. Should be 1 or 2. Default: ``1`` .
|
|
3204
3221
|
margin (int, optional): A parameter to change pairwise distance. Default: ``1.0`` .
|
|
3205
|
-
reduction (str, optional): Apply specific reduction method to the output: ``
|
|
3206
|
-
``
|
|
3222
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3223
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3207
3224
|
|
|
3208
|
-
- ``
|
|
3209
|
-
- ``
|
|
3210
|
-
- ``
|
|
3225
|
+
- ``'none'``: no reduction will be applied.
|
|
3226
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
3227
|
+
- ``'sum'``: the output elements will be summed.
|
|
3211
3228
|
|
|
3212
3229
|
Inputs:
|
|
3213
3230
|
- **inputs** (Tensor) - Input , with shape :math:`(N, C)`. Data type only support float32, float16
|
|
@@ -3218,7 +3235,7 @@ class MultiMarginLoss(Primitive):
|
|
|
3218
3235
|
support float16, float32 or float64.
|
|
3219
3236
|
|
|
3220
3237
|
Outputs:
|
|
3221
|
-
Tensor, When `reduction` is 'none'
|
|
3238
|
+
Tensor, When `reduction` is ``'none'``, the shape is :math:`(N,)`.
|
|
3222
3239
|
Otherwise, it is a scalar. Has the same data type with `inputs`.
|
|
3223
3240
|
|
|
3224
3241
|
Supported Platforms:
|
|
@@ -3261,15 +3278,19 @@ class SoftMarginLoss(Primitive):
|
|
|
3261
3278
|
where :math:`x.nelement()` is the number of elements of x.
|
|
3262
3279
|
|
|
3263
3280
|
Args:
|
|
3264
|
-
reduction (str): Apply specific reduction method to the output: ``
|
|
3265
|
-
|
|
3281
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3282
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3283
|
+
|
|
3284
|
+
- ``'none'``: no reduction will be applied.
|
|
3285
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
3286
|
+
- ``'sum'``: the output elements will be summed.
|
|
3266
3287
|
|
|
3267
3288
|
Inputs:
|
|
3268
3289
|
- **logits** (Tensor) - Predict data. Data type must be float16 or float32.
|
|
3269
3290
|
- **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
|
|
3270
3291
|
|
|
3271
3292
|
Outputs:
|
|
3272
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
3293
|
+
Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `logits`.
|
|
3273
3294
|
Otherwise, a scalar value will be returned.
|
|
3274
3295
|
|
|
3275
3296
|
Raises:
|
|
@@ -3736,26 +3757,28 @@ class LayerNorm(Primitive):
|
|
|
3736
3757
|
|
|
3737
3758
|
Args:
|
|
3738
3759
|
begin_norm_axis (int): The begin axis of the `input_x` to apply LayerNorm,
|
|
3739
|
-
the value must be in [-1, rank(
|
|
3760
|
+
the value must be in [-1, rank(input_x)). Default: ``1`` .
|
|
3740
3761
|
begin_params_axis (int): The begin axis of the parameter input (`gamma`, `beta`) to
|
|
3741
|
-
apply LayerNorm, the value must be in [-1, rank(
|
|
3742
|
-
epsilon (float): A value added to the denominator for numerical stability. Default: ``1e-7`` .
|
|
3762
|
+
apply LayerNorm, the value must be in [-1, rank(input_x)). Default: ``1`` .
|
|
3763
|
+
epsilon (float): A value added to the denominator for numerical stability(:math:`\epsilon`). Default: ``1e-7`` .
|
|
3743
3764
|
|
|
3744
3765
|
Inputs:
|
|
3745
3766
|
- **input_x** (Tensor) - Tensor of shape :math:`(N, \ldots)`.
|
|
3746
3767
|
The input of LayerNorm. Supported dtypes: float16, float32, float64.
|
|
3747
|
-
- **gamma** (Tensor) - Tensor of shape :math:`(
|
|
3768
|
+
- **gamma** (Tensor) - Tensor of shape :math:`(P_\text{begin_params_axis}, \ldots, P_\text{rank(input_x)-1})`.
|
|
3748
3769
|
The learnable parameter :math:`\gamma` as the scale on norm. Supported dtypes: float16, float32, float64.
|
|
3749
|
-
- **beta** (Tensor) - Tensor of shape :math:`(
|
|
3770
|
+
- **beta** (Tensor) - Tensor of shape :math:`(P_\text{begin_params_axis}, \ldots, P_\text{rank(input_x)-1})`.
|
|
3750
3771
|
The learnable parameter :math:`\beta` as the scale on norm. Supported dtypes: float16, float32, float64.
|
|
3751
3772
|
|
|
3752
3773
|
Outputs:
|
|
3753
3774
|
tuple[Tensor], tuple of 3 tensors, the normalized input and the updated parameters.
|
|
3754
3775
|
|
|
3755
3776
|
- **output_x** (Tensor) - The normalized input, has the same type and shape as the `input_x`.
|
|
3756
|
-
|
|
3757
|
-
|
|
3758
|
-
|
|
3777
|
+
- **mean** (Tensor) - The first `begin_norm_axis` dimensions of `mean` shape is the same as `input_x`,
|
|
3778
|
+
and the remaining dimensions are 1. Suppose the shape of the `input_x` is :math:`(x_1, x_2, \ldots, x_R)`,
|
|
3779
|
+
the shape of the `mean` is :math:`(x_1, \ldots, x_{begin_params_axis}, 1, \ldots, 1)`
|
|
3780
|
+
(when `begin_params_axis=0`, the shape of `mean` is :math:`(1, \ldots, 1)` ).
|
|
3781
|
+
- **variance** (Tensor) - Shape is the same as `mean` .
|
|
3759
3782
|
|
|
3760
3783
|
Raises:
|
|
3761
3784
|
TypeError: If `begin_norm_axis` or `begin_params_axis` is not an int.
|
|
@@ -3855,38 +3878,6 @@ class L2Normalize(Primitive):
|
|
|
3855
3878
|
self.axis = axis
|
|
3856
3879
|
|
|
3857
3880
|
|
|
3858
|
-
class DropoutGenMask(Primitive):
|
|
3859
|
-
"""
|
|
3860
|
-
The DropoutGenMask interface is deprecated, please use the :class:`mindspore.ops.Dropout` instead.
|
|
3861
|
-
|
|
3862
|
-
Supported Platforms:
|
|
3863
|
-
Deprecated
|
|
3864
|
-
"""
|
|
3865
|
-
|
|
3866
|
-
@deprecated("1.5", "ops.Dropout", False)
|
|
3867
|
-
@prim_attr_register
|
|
3868
|
-
def __init__(self, Seed0=0, Seed1=0):
|
|
3869
|
-
"""Initialize DropoutGenMask."""
|
|
3870
|
-
self.init_prim_io_names(inputs=['shape', 'keep_prob'], outputs=['output'])
|
|
3871
|
-
validator.check_value_type("Seed0", Seed0, [int], self.name)
|
|
3872
|
-
validator.check_value_type("Seed1", Seed1, [int], self.name)
|
|
3873
|
-
self.add_prim_attr("side_effect_hidden", True)
|
|
3874
|
-
|
|
3875
|
-
|
|
3876
|
-
class DropoutDoMask(Primitive):
|
|
3877
|
-
"""
|
|
3878
|
-
The DropoutDoMask interface is deprecated, please use the :class:`mindspore.ops.Dropout` instead.
|
|
3879
|
-
|
|
3880
|
-
Supported Platforms:
|
|
3881
|
-
Deprecated
|
|
3882
|
-
"""
|
|
3883
|
-
|
|
3884
|
-
@deprecated("1.5", "ops.Dropout", False)
|
|
3885
|
-
@prim_attr_register
|
|
3886
|
-
def __init__(self):
|
|
3887
|
-
pass
|
|
3888
|
-
|
|
3889
|
-
|
|
3890
3881
|
class ResizeBilinear(PrimitiveWithInfer):
|
|
3891
3882
|
r"""
|
|
3892
3883
|
This API is deprecated, please use the :class:`mindspore.ops.ResizeBilinearV2` instead.
|
|
@@ -3927,6 +3918,7 @@ class ResizeBilinear(PrimitiveWithInfer):
|
|
|
3927
3918
|
def infer_dtype(self, input_dtype):
|
|
3928
3919
|
validator.check_tensor_dtype_valid('input_dtype', input_dtype, [mstype.float16, mstype.float32],
|
|
3929
3920
|
self.name)
|
|
3921
|
+
self.add_prim_attr("dtype", input_dtype)
|
|
3930
3922
|
return input_dtype
|
|
3931
3923
|
|
|
3932
3924
|
|
|
@@ -4025,7 +4017,7 @@ class OneHot(Primitive):
|
|
|
4025
4017
|
|
|
4026
4018
|
Inputs:
|
|
4027
4019
|
- **indices** (Tensor) - A tensor of indices. Tensor of shape :math:`(X_0, \ldots, X_n)`.
|
|
4028
|
-
Data type must be
|
|
4020
|
+
Data type must be int32 or int64.
|
|
4029
4021
|
- **depth** (int) - A scalar defining the depth of the one-hot dimension.
|
|
4030
4022
|
- **on_value** (Tensor) - A value to fill in output when `indices[j] = i`.
|
|
4031
4023
|
- **off_value** (Tensor) - A value to fill in output when `indices[j] != i`.
|
|
@@ -4036,7 +4028,7 @@ class OneHot(Primitive):
|
|
|
4036
4028
|
|
|
4037
4029
|
Raises:
|
|
4038
4030
|
TypeError: If `axis` or `depth` is not an int.
|
|
4039
|
-
TypeError: If dtype of `indices` is not
|
|
4031
|
+
TypeError: If dtype of `indices` is not int32 or int64.
|
|
4040
4032
|
TypeError: If `indices`, `on_value` or `off_value` is not a Tensor.
|
|
4041
4033
|
ValueError: If `axis` is not in range [-1, len(indices_shape)].
|
|
4042
4034
|
ValueError: If `depth` is less than 0.
|
|
@@ -4065,26 +4057,6 @@ class OneHot(Primitive):
|
|
|
4065
4057
|
validator.check_value_type("axis", axis, [int], self.name)
|
|
4066
4058
|
|
|
4067
4059
|
|
|
4068
|
-
class Gelu(PrimitiveWithInfer):
|
|
4069
|
-
"""
|
|
4070
|
-
Same as operator GeLU. Gelu will be deprecated in the future.
|
|
4071
|
-
Please use GeLU instead.
|
|
4072
|
-
"""
|
|
4073
|
-
|
|
4074
|
-
@deprecated("1.1", "GeLU", True)
|
|
4075
|
-
@prim_attr_register
|
|
4076
|
-
def __init__(self):
|
|
4077
|
-
"""Initialize Gelu"""
|
|
4078
|
-
self.init_prim_io_names(inputs=['x'], outputs=['output'])
|
|
4079
|
-
|
|
4080
|
-
def infer_shape(self, input_x):
|
|
4081
|
-
return input_x
|
|
4082
|
-
|
|
4083
|
-
def infer_dtype(self, input_x):
|
|
4084
|
-
validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name)
|
|
4085
|
-
return input_x
|
|
4086
|
-
|
|
4087
|
-
|
|
4088
4060
|
class GeLU(Primitive):
|
|
4089
4061
|
r"""
|
|
4090
4062
|
Gaussian Error Linear Units activation function.
|
|
@@ -4131,26 +4103,6 @@ class GeLU(Primitive):
|
|
|
4131
4103
|
self.init_prim_io_names(inputs=['x'], outputs=['output'])
|
|
4132
4104
|
|
|
4133
4105
|
|
|
4134
|
-
class FastGelu(PrimitiveWithInfer):
|
|
4135
|
-
"""
|
|
4136
|
-
Same as operator FastGeLU. FastGelu will be deprecated in the future.
|
|
4137
|
-
Please use FastGeLU instead.
|
|
4138
|
-
"""
|
|
4139
|
-
|
|
4140
|
-
@deprecated("1.1", "FastGeLU", True)
|
|
4141
|
-
@prim_attr_register
|
|
4142
|
-
def __init__(self):
|
|
4143
|
-
"""Initialize FastGelu."""
|
|
4144
|
-
self.init_prim_io_names(inputs=['x'], outputs=['output'])
|
|
4145
|
-
|
|
4146
|
-
def infer_shape(self, input_x):
|
|
4147
|
-
return input_x
|
|
4148
|
-
|
|
4149
|
-
def infer_dtype(self, input_x):
|
|
4150
|
-
validator.check_tensor_dtype_valid("input_x", input_x, (mstype.float16, mstype.float32), self.name)
|
|
4151
|
-
return input_x
|
|
4152
|
-
|
|
4153
|
-
|
|
4154
4106
|
class FastGeLU(Primitive):
|
|
4155
4107
|
r"""
|
|
4156
4108
|
Fast Gaussian Error Linear Units activation function.
|
|
@@ -4301,19 +4253,24 @@ class LSTM(Primitive):
|
|
|
4301
4253
|
bidirectional (bool): Specifies whether it is a bidirectional LSTM.
|
|
4302
4254
|
dropout (float): If not 0, append `Dropout` layer on the outputs of each
|
|
4303
4255
|
LSTM layer except the last layer. The range of dropout is [0.0, 1.0].
|
|
4256
|
+
proj_size (int): If `proj_size` > 0, a projection of the corresponding size will be used,
|
|
4257
|
+
which is only supported on CPU now. Default: ``0`` .
|
|
4304
4258
|
|
|
4305
4259
|
Inputs:
|
|
4306
4260
|
- **input** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, input\_size)` or
|
|
4307
4261
|
:math:`(batch\_size, seq\_len, input\_size)`.
|
|
4308
|
-
- **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size,
|
|
4262
|
+
- **h** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`.
|
|
4309
4263
|
- **c** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
|
|
4310
4264
|
- **w** (Tensor) - A weight Tensor.
|
|
4311
4265
|
|
|
4266
|
+
If :math:`proj\_size > 0` , :math:`real\_hidden\_size = proj\_size` , otherwise
|
|
4267
|
+
:math:`real\_hidden\_size = hidden\_size` .
|
|
4268
|
+
|
|
4312
4269
|
Outputs:
|
|
4313
|
-
Tuple, a tuple contains (
|
|
4270
|
+
Tuple, a tuple contains `(output, h_n, c_n, reserve, state)`.
|
|
4314
4271
|
|
|
4315
|
-
- **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions *
|
|
4316
|
-
- **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size,
|
|
4272
|
+
- **output** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * real\_hidden\_size)`.
|
|
4273
|
+
- **h_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, real\_hidden\_size)`.
|
|
4317
4274
|
- **c_n** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
|
|
4318
4275
|
- **reserve** (Tensor) - Tensor of shape :math:`(r, 1)`.
|
|
4319
4276
|
- **state** (Tensor) - Random number generator state and its shape is :math:`(s, 1)`.
|
|
@@ -4323,6 +4280,7 @@ class LSTM(Primitive):
|
|
|
4323
4280
|
TypeError: If `has_bias` or `bidirectional` is not a bool.
|
|
4324
4281
|
TypeError: If `dropout` is not a float.
|
|
4325
4282
|
ValueError: If `dropout` is not in range [0.0, 1.0].
|
|
4283
|
+
ValueError: If `proj_size` is not in range [0, `hidden_size`).
|
|
4326
4284
|
|
|
4327
4285
|
Supported Platforms:
|
|
4328
4286
|
``GPU`` ``CPU``
|
|
@@ -4356,10 +4314,12 @@ class LSTM(Primitive):
|
|
|
4356
4314
|
"""
|
|
4357
4315
|
|
|
4358
4316
|
@prim_attr_register
|
|
4359
|
-
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout):
|
|
4317
|
+
def __init__(self, input_size, hidden_size, num_layers, has_bias, bidirectional, dropout, proj_size=0):
|
|
4360
4318
|
"""Initialize LSTM."""
|
|
4361
4319
|
self.input_size = validator.check_positive_int(input_size, "input_size", self.name)
|
|
4362
4320
|
self.hidden_size = validator.check_positive_int(hidden_size, "hidden_size", self.name)
|
|
4321
|
+
self.proj_size = validator.check_int_range(proj_size, 0, hidden_size, validator.INC_LEFT,
|
|
4322
|
+
'proj_size', self.name)
|
|
4363
4323
|
self.num_layers = validator.check_positive_int(num_layers, "num_layers", self.name)
|
|
4364
4324
|
self.has_bias = validator.check_value_type("has_bias", has_bias, (bool,), self.name)
|
|
4365
4325
|
self.bidirectional = validator.check_value_type("bidirectional", bidirectional, (bool,), self.name)
|
|
@@ -4466,8 +4426,12 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
|
|
|
4466
4426
|
:math:`P_c>1` increases the recall, :math:`P_c<1` increases the precision.
|
|
4467
4427
|
|
|
4468
4428
|
Args:
|
|
4469
|
-
reduction (str):
|
|
4470
|
-
``'
|
|
4429
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4430
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4431
|
+
|
|
4432
|
+
- ``'none'``: no reduction will be applied.
|
|
4433
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
4434
|
+
- ``'sum'``: the output elements will be summed.
|
|
4471
4435
|
|
|
4472
4436
|
Inputs:
|
|
4473
4437
|
- **logits** (Tensor) - Input logits. Data type must be float16 or float32.
|
|
@@ -4481,7 +4445,7 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
|
|
|
4481
4445
|
Data type must be float16 or float32.
|
|
4482
4446
|
|
|
4483
4447
|
Outputs:
|
|
4484
|
-
Tensor or Scalar, if `reduction` is 'none'
|
|
4448
|
+
Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
|
|
4485
4449
|
Otherwise, the output is a scalar.
|
|
4486
4450
|
|
|
4487
4451
|
Raises:
|
|
@@ -4489,7 +4453,7 @@ class BCEWithLogitsLoss(PrimitiveWithInfer):
|
|
|
4489
4453
|
TypeError: If data type of any input is neither float16 nor float32.
|
|
4490
4454
|
TypeError: If data type of `reduction` is not string.
|
|
4491
4455
|
ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
|
|
4492
|
-
ValueError: If `reduction` is not one of 'none'
|
|
4456
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
|
|
4493
4457
|
|
|
4494
4458
|
Supported Platforms:
|
|
4495
4459
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -4669,9 +4633,15 @@ class MirrorPad(Primitive):
|
|
|
4669
4633
|
Pads the input tensor according to the paddings and mode.
|
|
4670
4634
|
|
|
4671
4635
|
Args:
|
|
4672
|
-
mode (str):
|
|
4636
|
+
mode (str, optional): An optional string specifying the pad method.
|
|
4637
|
+
The optional values are ``'REFLECT'`` and ``'SYMMETRIC'`` .
|
|
4673
4638
|
Default: ``'REFLECT'`` .
|
|
4674
4639
|
|
|
4640
|
+
- ``'REFLECT'``: Reflect the value on the edge while omitting the last one.
|
|
4641
|
+
For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [3, 2, 1, 2, 3, 4, 3, 2].
|
|
4642
|
+
- ``'SYMMETRIC'``: Reflect the value on the edge while repeating the last one.
|
|
4643
|
+
For example, pad [1, 2, 3, 4] with 2 elements on both sides will result in [2, 1, 1, 2, 3, 4, 4, 3].
|
|
4644
|
+
|
|
4675
4645
|
Inputs:
|
|
4676
4646
|
- **input_x** (Tensor) - Tensor of shape :math:`(N, *)`, where :math:`*` means, any number of
|
|
4677
4647
|
additional dimensions.
|
|
@@ -4683,15 +4653,14 @@ class MirrorPad(Primitive):
|
|
|
4683
4653
|
paddings[D, 0] and paddings[D, 1] must be no greater than input_x.dim_size(D)
|
|
4684
4654
|
(or input_x.dim_size(D) - 1) if mode is SYMMETRIC (if REFLECT, respectively).
|
|
4685
4655
|
|
|
4686
|
-
|
|
4687
4656
|
Outputs:
|
|
4688
4657
|
Tensor, the tensor after padding.
|
|
4689
4658
|
|
|
4690
|
-
- If `mode` is
|
|
4659
|
+
- If `mode` is ``'REFLECT'``, it uses a way of symmetrical copying through the axis of symmetry to fill in.
|
|
4691
4660
|
If the `input_x` is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the
|
|
4692
4661
|
`Outputs` is [[6,5,4,5,6,5,4], [3,2,1,2,3,2,1], [6,5,4,5,6,5,4], [9,8,7,8,9,8,7], [6,5,4,5,6,5,4]].
|
|
4693
4662
|
For a more intuitive understanding, please see the example below.
|
|
4694
|
-
- If `mode` is
|
|
4663
|
+
- If `mode` is ``'SYMMETRIC'``, the filling method is similar to the ``'REFLECT'``. It is also copied
|
|
4695
4664
|
according to the symmetry axis, except that it includes the symmetry axis. If the `input_x`
|
|
4696
4665
|
is [[1,2,3], [4,5,6], [7,8,9]] and `paddings` is [[1,1], [2,2]], then the `Outputs` is
|
|
4697
4666
|
[[2,1,1,2,3,3,2], [2,1,1,2,3,3,2], [5,4,4,5,6,6,5], [8,7,7,8,9,9,8], [8,7,7,8,9,9,8]].
|
|
@@ -5675,7 +5644,7 @@ class KLDivLoss(Primitive):
|
|
|
5675
5644
|
- **labels** (Tensor) - The label Tensor which has the same shape and data type as `logits`.
|
|
5676
5645
|
|
|
5677
5646
|
Outputs:
|
|
5678
|
-
Tensor or Scalar, if `reduction` is 'none'
|
|
5647
|
+
Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
|
|
5679
5648
|
Otherwise it is a scalar.
|
|
5680
5649
|
|
|
5681
5650
|
Raises:
|
|
@@ -5750,8 +5719,12 @@ class BinaryCrossEntropy(Primitive):
|
|
|
5750
5719
|
- The value of :math:`x` must range from 0 to 1.
|
|
5751
5720
|
|
|
5752
5721
|
Args:
|
|
5753
|
-
reduction (str):
|
|
5754
|
-
|
|
5722
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
5723
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
5724
|
+
|
|
5725
|
+
- ``'none'``: no reduction will be applied.
|
|
5726
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
5727
|
+
- ``'sum'``: the output elements will be summed.
|
|
5755
5728
|
|
|
5756
5729
|
Inputs:
|
|
5757
5730
|
- **logits** (Tensor) - The predictive value whose data type must be float16 or float32,
|
|
@@ -5766,7 +5739,7 @@ class BinaryCrossEntropy(Primitive):
|
|
|
5766
5739
|
|
|
5767
5740
|
Raises:
|
|
5768
5741
|
TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
|
|
5769
|
-
ValueError: If `reduction` is not one of 'none'
|
|
5742
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
|
|
5770
5743
|
ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
|
|
5771
5744
|
TypeError: If `logits`, `labels` or `weight` is not a Tensor.
|
|
5772
5745
|
|
|
@@ -7173,7 +7146,19 @@ class Dropout(PrimitiveWithCheck):
|
|
|
7173
7146
|
|
|
7174
7147
|
Outputs:
|
|
7175
7148
|
- **output** (Tensor) - With the same shape and data type as `x`.
|
|
7176
|
-
- **mask** (Tensor) -
|
|
7149
|
+
- **mask** (Tensor) - The mask applied to `x`.
|
|
7150
|
+
|
|
7151
|
+
- On GPU and CPU, `mask` has the same shape and data type as `x`.
|
|
7152
|
+
- On Ascend, to achieve a better performance, it is denoted as a 1-D Tensor
|
|
7153
|
+
with Uint8 data type. It has shape :math:`(byte\_counts, )` where :math:`byte\_counts` is the
|
|
7154
|
+
number of bytes needed to mask the input `x`, :math:`byte\_counts` is calculated using the
|
|
7155
|
+
following formula:
|
|
7156
|
+
|
|
7157
|
+
.. math::
|
|
7158
|
+
|
|
7159
|
+
byte\_counts = \text{ceil}(\text{cumprod}(x.shape) / 128) * 16
|
|
7160
|
+
|
|
7161
|
+
If shape of `x` is :math:`(2, 3, 4, 5, 6)`, the shape of `mask` will be :math:`(96, )`.
|
|
7177
7162
|
|
|
7178
7163
|
Supported Platforms:
|
|
7179
7164
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -7195,6 +7180,7 @@ class Dropout(PrimitiveWithCheck):
|
|
|
7195
7180
|
self.seed0 = validator.check_value_type("Seed0", Seed0, [int], self.name)
|
|
7196
7181
|
self.seed1 = validator.check_value_type("Seed1", Seed1, [int], self.name)
|
|
7197
7182
|
self.keep_prob = validator.check_float_range(keep_prob, 0, 1, validator.INC_RIGHT, "keep_prob", self.name)
|
|
7183
|
+
self.add_prim_attr("side_effect_hidden", True)
|
|
7198
7184
|
|
|
7199
7185
|
def check_shape(self, x_shape):
|
|
7200
7186
|
validator.check_int(len(x_shape), 1, validator.GE, "x_shape", self.name)
|
|
@@ -7402,6 +7388,9 @@ class CTCGreedyDecoder(Primitive):
|
|
|
7402
7388
|
|
|
7403
7389
|
Refer to :func:`mindspore.ops.ctc_greedy_decoder` for more details.
|
|
7404
7390
|
|
|
7391
|
+
Note:
|
|
7392
|
+
On Ascend, 'merge_repeated' can not be set to false.
|
|
7393
|
+
|
|
7405
7394
|
Args:
|
|
7406
7395
|
merge_repeated (bool, optional): If ``True`` , merge repeated classes in output. Default: ``True`` .
|
|
7407
7396
|
|
|
@@ -7824,6 +7813,10 @@ class LRN(Primitive):
|
|
|
7824
7813
|
r"""
|
|
7825
7814
|
Local Response Normalization.
|
|
7826
7815
|
|
|
7816
|
+
.. warning::
|
|
7817
|
+
LRN is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
|
|
7818
|
+
normalization methods, e.g. :class:`mindspore.ops.BatchNorm`.
|
|
7819
|
+
|
|
7827
7820
|
.. math::
|
|
7828
7821
|
|
|
7829
7822
|
b_{c} = a_{c}\left(k + \frac{\alpha}{n}
|
|
@@ -7854,7 +7847,7 @@ class LRN(Primitive):
|
|
|
7854
7847
|
TypeError: If `x` is not a Tensor.
|
|
7855
7848
|
|
|
7856
7849
|
Supported Platforms:
|
|
7857
|
-
``
|
|
7850
|
+
``GPU`` ``CPU``
|
|
7858
7851
|
|
|
7859
7852
|
Examples:
|
|
7860
7853
|
>>> import mindspore
|
|
@@ -7908,21 +7901,22 @@ class AvgPool3D(Primitive):
|
|
|
7908
7901
|
strides (Union[int, tuple[int]]): The distance of kernel moving, an int number that represents
|
|
7909
7902
|
the depth, height and width of movement are both strides, or a tuple of three int numbers that
|
|
7910
7903
|
represent depth, height and width of movement respectively. Default: ``1`` .
|
|
7911
|
-
pad_mode (str):
|
|
7912
|
-
Default: ``"valid"`` .
|
|
7913
|
-
|
|
7914
|
-
- ``"same"``:
|
|
7915
|
-
|
|
7916
|
-
|
|
7917
|
-
|
|
7904
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
7905
|
+
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
7906
|
+
|
|
7907
|
+
- ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
|
|
7908
|
+
are the same when `stride` is set to ``1``.
|
|
7909
|
+
The amount of padding to is calculated by the operator internally. If the amount is even,
|
|
7910
|
+
it isuniformly distributed around the input, if it is odd, the excess amount goes
|
|
7911
|
+
to the front/right/bottom side.
|
|
7918
7912
|
If this mode is set, `pad` must be 0.
|
|
7913
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
7914
|
+
possible depth, height and width. Extra pixels that could not complete a full stride will
|
|
7915
|
+
be discarded. If this mode is set, `pad` must be 0.
|
|
7916
|
+
- ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
7917
|
+
in the depth, height and width dimension is determined by the `pad` parameter.
|
|
7918
|
+
If this mode is set, `pad` must be greater than or equal to 0.
|
|
7919
7919
|
|
|
7920
|
-
- ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
|
|
7921
|
-
will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
|
|
7922
|
-
must be 0.
|
|
7923
|
-
|
|
7924
|
-
- pad: Implicit paddings on both sides of the input in depth, height, width. The number of `pad` will
|
|
7925
|
-
be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
|
|
7926
7920
|
pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
|
|
7927
7921
|
the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
|
|
7928
7922
|
If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
|
|
@@ -8005,74 +7999,97 @@ class AvgPool3D(Primitive):
|
|
|
8005
7999
|
|
|
8006
8000
|
class Conv3D(Primitive):
|
|
8007
8001
|
r"""
|
|
8008
|
-
|
|
8009
|
-
|
|
8010
|
-
|
|
8011
|
-
:math:`
|
|
8012
|
-
|
|
8002
|
+
3D convolution layer.
|
|
8003
|
+
|
|
8004
|
+
Applies a 3D convolution over an input tensor which is typically of shape
|
|
8005
|
+
:math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`,
|
|
8006
|
+
where :math:`N` is batch size, :math:`C` is channel number, :math:`D` is feature depth,
|
|
8007
|
+
:math:`H` is feature height, :math:`W` is feature width.
|
|
8008
|
+
|
|
8009
|
+
The output is calculated based on formula:
|
|
8013
8010
|
|
|
8014
8011
|
.. math::
|
|
8015
|
-
|
|
8016
|
-
\
|
|
8017
|
-
\
|
|
8018
|
-
|
|
8019
|
-
where :math:`
|
|
8020
|
-
|
|
8021
|
-
:math:`
|
|
8022
|
-
|
|
8023
|
-
|
|
8024
|
-
:math:`
|
|
8025
|
-
|
|
8026
|
-
|
|
8027
|
-
|
|
8028
|
-
|
|
8029
|
-
:math:`
|
|
8030
|
-
|
|
8031
|
-
|
|
8032
|
-
|
|
8033
|
-
|
|
8012
|
+
|
|
8013
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
8014
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
8015
|
+
|
|
8016
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
8017
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
8018
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
8019
|
+
|
|
8020
|
+
Here are the indices' meanings:
|
|
8021
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
8022
|
+
|
|
8023
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
8024
|
+
output channels, which is also equal to the number of kernels.
|
|
8025
|
+
|
|
8026
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
8027
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
8028
|
+
|
|
8029
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
8030
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
8031
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
8032
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
8033
|
+
|
|
8034
|
+
The shape of the convolutional kernel is given by
|
|
8035
|
+
:math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
|
|
8036
|
+
where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
|
|
8037
|
+
height and width of the kernel, respectively.
|
|
8038
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
8039
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
|
|
8040
|
+
\text{kernel_size[1]}, \text{kernel_size[2]})`,
|
|
8041
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
8042
|
+
|
|
8043
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
8044
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
|
|
8034
8045
|
|
|
8035
8046
|
Note:
|
|
8036
|
-
On Ascend platform, `
|
|
8047
|
+
1. On Ascend platform, `groups = 1` must be satisfied.
|
|
8048
|
+
2. On Ascend `dilation` on depth only supports the case of 1.
|
|
8037
8049
|
|
|
8038
8050
|
Args:
|
|
8039
|
-
out_channel (int):
|
|
8040
|
-
kernel_size (Union[int, tuple[int]]): Specifies the depth, height
|
|
8041
|
-
|
|
8042
|
-
|
|
8043
|
-
|
|
8051
|
+
out_channel (int): Specifies output channel :math:`C_{out}`.
|
|
8052
|
+
kernel_size (Union[int, tuple[int]]): Specifies the depth, height and width of the 3D convolution kernel.
|
|
8053
|
+
It can be a single int or a tuple of 3 integers. A single int means the value is for depth, height
|
|
8054
|
+
and the width. A tuple of 3 ints means the first value is for depth and
|
|
8055
|
+
the rest is for the height and width.
|
|
8044
8056
|
mode (int, optional): Modes for different convolutions. It is currently not used. Default: ``1`` .
|
|
8045
8057
|
stride (Union[int, tuple[int]], optional): The distance of kernel moving, it can be an int number
|
|
8046
8058
|
that represents the depth, height and width of movement or a tuple of three int numbers that
|
|
8047
8059
|
represent depth, height and width movement respectively. Default: ``1`` .
|
|
8048
|
-
pad_mode (str, optional): Specifies padding mode.
|
|
8049
|
-
``"same"`` , ``"valid"``
|
|
8050
|
-
|
|
8051
|
-
- ``"same"``:
|
|
8052
|
-
the
|
|
8053
|
-
|
|
8054
|
-
|
|
8060
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
8061
|
+
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
8062
|
+
|
|
8063
|
+
- ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
|
|
8064
|
+
are the same when `stride` is set to ``1``.
|
|
8065
|
+
The amount of padding to is calculated by the operator internally. If the amount is even,
|
|
8066
|
+
it isuniformly distributed around the input, if it is odd, the excess amount goes
|
|
8067
|
+
to the front/right/bottom side.
|
|
8055
8068
|
If this mode is set, `pad` must be 0.
|
|
8056
|
-
|
|
8057
|
-
|
|
8058
|
-
|
|
8059
|
-
|
|
8060
|
-
|
|
8061
|
-
|
|
8062
|
-
|
|
8063
|
-
|
|
8064
|
-
|
|
8065
|
-
If `pad` is
|
|
8066
|
-
|
|
8067
|
-
|
|
8068
|
-
pad[3], pad[4] and pad[5]
|
|
8069
|
-
dilation (Union[int, tuple[int]], optional):
|
|
8070
|
-
|
|
8071
|
-
|
|
8072
|
-
|
|
8073
|
-
|
|
8074
|
-
|
|
8075
|
-
|
|
8069
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
8070
|
+
possible depth, height and width. Extra pixels that could not complete a full stride will
|
|
8071
|
+
be discarded. If this mode is set, `pad` must be 0.
|
|
8072
|
+
- ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
8073
|
+
in the depth, height and width dimension is determined by the `pad` parameter.
|
|
8074
|
+
If this mode is set, `pad` must be greater than or equal to 0.
|
|
8075
|
+
|
|
8076
|
+
pad (Union(int, tuple[int]), optional): Specifies the amount of padding to apply on input
|
|
8077
|
+
when `pad_mode` is set to ``"pad"``. It can be a single int or a tuple of 6 ints.
|
|
8078
|
+
If `pad` is one integer, the paddings of head, tail, top, bottom,
|
|
8079
|
+
left and right are the same, equal to `pad`. If `pad` is a tuple with 6 integers, the
|
|
8080
|
+
paddings of head, tail, top, bottom, left and right is equal to pad[0],
|
|
8081
|
+
pad[1], pad[2], pad[3], pad[4] and pad[5] accordingly. Default: ``0`` .
|
|
8082
|
+
dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to use for dilated convolution.
|
|
8083
|
+
It can be a single int or a tuple of 3 integers. A single int means the dilation size is the same
|
|
8084
|
+
in the depth, height and width directions. A tuple of 3 ints represents the dilation size in
|
|
8085
|
+
the depth, height and width directions, respectively.
|
|
8086
|
+
Assuming :math:`dilation=(d0, d1, d2)`, the convolutional kernel samples the input with a
|
|
8087
|
+
spacing of :math:`d0-1` elements in the depth direction,
|
|
8088
|
+
:math:`d1-1` elements in the height direction, :math:`d2-1` elements in the
|
|
8089
|
+
width direction respectively. The values in the depth, height and width dimensions are in the
|
|
8090
|
+
ranges [1, D], [1, H] and [1, W], respectively.
|
|
8091
|
+
Default: ``1`` .
|
|
8092
|
+
group (int, optional): The number of groups into which the filter is divided. `in_channels`
|
|
8076
8093
|
and `out_channels` must be divisible by `group`. Default: ``1`` .
|
|
8077
8094
|
data_format (str, optional): The optional value for data format. Currently only support ``"NCDHW"`` .
|
|
8078
8095
|
|
|
@@ -8088,7 +8105,7 @@ class Conv3D(Primitive):
|
|
|
8088
8105
|
Outputs:
|
|
8089
8106
|
Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
8090
8107
|
|
|
8091
|
-
`pad_mode` is
|
|
8108
|
+
`pad_mode` is ``"same"``:
|
|
8092
8109
|
|
|
8093
8110
|
.. math::
|
|
8094
8111
|
\begin{array}{ll} \\
|
|
@@ -8097,7 +8114,7 @@ class Conv3D(Primitive):
|
|
|
8097
8114
|
W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
|
|
8098
8115
|
\end{array}
|
|
8099
8116
|
|
|
8100
|
-
`pad_mode` is
|
|
8117
|
+
`pad_mode` is ``"valid"``:
|
|
8101
8118
|
|
|
8102
8119
|
.. math::
|
|
8103
8120
|
\begin{array}{ll} \\
|
|
@@ -8109,15 +8126,15 @@ class Conv3D(Primitive):
|
|
|
8109
8126
|
{\text{stride[2]}} + 1} \right \rfloor \\
|
|
8110
8127
|
\end{array}
|
|
8111
8128
|
|
|
8112
|
-
`pad_mode` is
|
|
8129
|
+
`pad_mode` is ``"pad"``:
|
|
8113
8130
|
|
|
8114
8131
|
.. math::
|
|
8115
8132
|
\begin{array}{ll} \\
|
|
8116
|
-
D_{out} = \left \lfloor{\frac{D_{in} +
|
|
8133
|
+
D_{out} = \left \lfloor{\frac{D_{in} + pad[0] + pad[1] - (\text{dilation[0]} - 1) \times
|
|
8117
8134
|
\text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
|
|
8118
|
-
H_{out} = \left \lfloor{\frac{H_{in} +
|
|
8135
|
+
H_{out} = \left \lfloor{\frac{H_{in} + pad[2] + pad[3] - (\text{dilation[1]} - 1) \times
|
|
8119
8136
|
\text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
|
|
8120
|
-
W_{out} = \left \lfloor{\frac{W_{in} +
|
|
8137
|
+
W_{out} = \left \lfloor{\frac{W_{in} + pad[4] + pad[5] - (\text{dilation[2]} - 1) \times
|
|
8121
8138
|
\text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
|
|
8122
8139
|
\end{array}
|
|
8123
8140
|
|
|
@@ -8138,12 +8155,56 @@ class Conv3D(Primitive):
|
|
|
8138
8155
|
>>> import mindspore
|
|
8139
8156
|
>>> import numpy as np
|
|
8140
8157
|
>>> from mindspore import Tensor, ops
|
|
8158
|
+
>>> # case 1: specify kernel_size with tuple, all parameters use default values.
|
|
8141
8159
|
>>> x = Tensor(np.ones([16, 3, 10, 32, 32]), mindspore.float16)
|
|
8142
8160
|
>>> weight = Tensor(np.ones([32, 3, 4, 3, 3]), mindspore.float16)
|
|
8143
8161
|
>>> conv3d = ops.Conv3D(out_channel=32, kernel_size=(4, 3, 3))
|
|
8144
8162
|
>>> output = conv3d(x, weight)
|
|
8145
8163
|
>>> print(output.shape)
|
|
8146
8164
|
(16, 32, 7, 30, 30)
|
|
8165
|
+
>>> # case 2: specify kernel_size with int, all parameters use default values.
|
|
8166
|
+
>>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
|
|
8167
|
+
>>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
|
|
8168
|
+
>>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3)
|
|
8169
|
+
>>> output = conv3d(x, weight)
|
|
8170
|
+
>>> print(output.shape)
|
|
8171
|
+
(10, 40, 30, 30, 30)
|
|
8172
|
+
>>> # case 3: stride=(1, 2, 3), other parameters being default.
|
|
8173
|
+
>>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
|
|
8174
|
+
>>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
|
|
8175
|
+
>>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3))
|
|
8176
|
+
>>> output = conv3d(x, weight)
|
|
8177
|
+
>>> print(output.shape)
|
|
8178
|
+
(10, 40, 30, 15, 10)
|
|
8179
|
+
>>> # case 4: pad_mode="pad", other parameters being default.
|
|
8180
|
+
>>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
|
|
8181
|
+
>>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
|
|
8182
|
+
>>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, pad_mode="pad", pad=2)
|
|
8183
|
+
>>> output = conv3d(x, weight)
|
|
8184
|
+
>>> print(output.shape)
|
|
8185
|
+
(10, 40, 34, 34, 34)
|
|
8186
|
+
>>> # case 5: dilation=(1, 1, 1), other parameters being default.
|
|
8187
|
+
>>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
|
|
8188
|
+
>>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
|
|
8189
|
+
>>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, dilation=(1, 1, 1))
|
|
8190
|
+
>>> output = conv3d(x, weight)
|
|
8191
|
+
>>> print(output.shape)
|
|
8192
|
+
(10, 40, 30, 30, 30)
|
|
8193
|
+
>>> # case 6: group=1, other parameters being default.
|
|
8194
|
+
>>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
|
|
8195
|
+
>>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
|
|
8196
|
+
>>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, group=1)
|
|
8197
|
+
>>> output = conv3d(x, weight)
|
|
8198
|
+
>>> print(output.shape)
|
|
8199
|
+
(10, 40, 30, 30, 30)
|
|
8200
|
+
>>> # case 7: All parameters are specified.
|
|
8201
|
+
>>> x = Tensor(np.ones([10, 20, 32, 32, 32]), mindspore.float32)
|
|
8202
|
+
>>> weight = Tensor(np.ones([40, 20, 3, 3, 3]), mindspore.float32)
|
|
8203
|
+
>>> conv3d = ops.Conv3D(out_channel=40, kernel_size=3, stride=(1, 2, 3), pad_mode="pad",
|
|
8204
|
+
... pad=2, dilation=(1), group=1)
|
|
8205
|
+
>>> output = conv3d(x, weight)
|
|
8206
|
+
>>> print(output.shape)
|
|
8207
|
+
(10, 40, 34, 17, 12)
|
|
8147
8208
|
"""
|
|
8148
8209
|
|
|
8149
8210
|
@prim_attr_register
|
|
@@ -8198,12 +8259,8 @@ class Conv3D(Primitive):
|
|
|
8198
8259
|
self.add_prim_attr('data_format', self.format)
|
|
8199
8260
|
self.out_channel = validator.check_positive_int(out_channel, 'out_channel', self.name)
|
|
8200
8261
|
validator.check_value_type("group", group, (int,), self.name)
|
|
8201
|
-
validator.check_int_range(group, 1, out_channel, validator.INC_BOTH, "group", self.name)
|
|
8202
|
-
device_target = context.get_context("device_target")
|
|
8203
8262
|
if self.out_channel % group != 0:
|
|
8204
8263
|
raise ValueError("The argument 'group' should be divisible by 'out_channel'")
|
|
8205
|
-
if device_target == "Ascend" and group != 1:
|
|
8206
|
-
raise ValueError("On Ascend platform, group = 1 must be satisfied.")
|
|
8207
8264
|
|
|
8208
8265
|
self.group = group
|
|
8209
8266
|
self.add_prim_attr('groups', self.group)
|
|
@@ -8218,8 +8275,22 @@ class Conv3DBackpropInput(Primitive):
|
|
|
8218
8275
|
out_channel (int): The dimension of the output.
|
|
8219
8276
|
kernel_size (Union[int, tuple[int]]): The kernel size of the 3D convolution.
|
|
8220
8277
|
mode (int): Modes for different convolutions. Not currently used.
|
|
8221
|
-
pad_mode (str):
|
|
8222
|
-
|
|
8278
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
8279
|
+
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
8280
|
+
|
|
8281
|
+
- ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
|
|
8282
|
+
are the same when `stride` is set to ``1``.
|
|
8283
|
+
The amount of padding to is calculated by the operator internally. If the amount is even,
|
|
8284
|
+
it isuniformly distributed around the input, if it is odd, the excess amount goes
|
|
8285
|
+
to the front/right/bottom side.
|
|
8286
|
+
If this mode is set, `pad` must be 0.
|
|
8287
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
8288
|
+
possible depth, height and width. Extra pixels that could not complete a full stride will
|
|
8289
|
+
be discarded. If this mode is set, `pad` must be 0.
|
|
8290
|
+
- ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
8291
|
+
in the depth, height and width dimension is determined by the `pad` parameter.
|
|
8292
|
+
If this mode is set, `pad` must be greater than or equal to 0.
|
|
8293
|
+
|
|
8223
8294
|
pad (Union(int, tuple[int])): The pad value to be filled. Default: ``0`` . If `pad` is an integer, the
|
|
8224
8295
|
paddings of head, tail, top, bottom, left and right are the same, equal to pad. If `pad` is a
|
|
8225
8296
|
tuple of four integers, the padding of head, tail, top, bottom, left and right equal to pad[0],
|
|
@@ -8443,13 +8514,14 @@ class CTCLossV2(Primitive):
|
|
|
8443
8514
|
|
|
8444
8515
|
Args:
|
|
8445
8516
|
blank (int, optional): The blank label. Default: ``0`` .
|
|
8446
|
-
reduction (str, optional): Apply specific reduction method to the output. Currently only support ``'none'
|
|
8447
|
-
|
|
8517
|
+
reduction (str, optional): Apply specific reduction method to the output. Currently only support ``'none'``.
|
|
8518
|
+
Default: ``'none'`` .
|
|
8519
|
+
|
|
8448
8520
|
zero_infinity (bool, optional): If loss is infinite, this parameter determines whether to set that loss
|
|
8449
8521
|
and its correlated gradient to zero. Default: ``False`` .
|
|
8450
8522
|
|
|
8451
8523
|
Inputs:
|
|
8452
|
-
- **log_probs** (Tensor) - A tensor of shape :math:`(T,
|
|
8524
|
+
- **log_probs** (Tensor) - A tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
|
|
8453
8525
|
batch size and :math:`C` is number of classes (including blank). Supported dtypes: float32, float64.
|
|
8454
8526
|
- **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
|
|
8455
8527
|
means the target sequences. Supported dtypes: int32, int64.
|
|
@@ -8601,35 +8673,37 @@ class Conv3DTranspose(Primitive):
|
|
|
8601
8673
|
Single int means the value is for the depth, height and width of the kernel.
|
|
8602
8674
|
A tuple of 3 ints means the first value is for the depth, the second value is for the height and the
|
|
8603
8675
|
other is for the width of the kernel.
|
|
8604
|
-
mode (int): Modes for different convolutions. Default is ``1`` . It is currently not used.
|
|
8605
|
-
pad_mode (str): Specifies padding mode.
|
|
8606
|
-
``"same"`` , ``"valid"``
|
|
8607
|
-
|
|
8608
|
-
- ``"same"``:
|
|
8609
|
-
the
|
|
8610
|
-
|
|
8611
|
-
|
|
8676
|
+
mode (int, optional): Modes for different convolutions. Default is ``1`` . It is currently not used.
|
|
8677
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
8678
|
+
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
8679
|
+
|
|
8680
|
+
- ``"same"``: Pad the input around its depth/height/width dimension so that the shape of input and output
|
|
8681
|
+
are the same when `stride` is set to ``1``.
|
|
8682
|
+
The amount of padding to is calculated by the operator internally. If the amount is even,
|
|
8683
|
+
it isuniformly distributed around the input, if it is odd, the excess amount goes
|
|
8684
|
+
to the front/right/bottom side.
|
|
8612
8685
|
If this mode is set, `pad` must be 0.
|
|
8613
|
-
|
|
8614
|
-
|
|
8615
|
-
|
|
8616
|
-
|
|
8617
|
-
|
|
8618
|
-
|
|
8619
|
-
|
|
8620
|
-
|
|
8621
|
-
|
|
8622
|
-
|
|
8623
|
-
|
|
8624
|
-
|
|
8625
|
-
stride (Union(int, tuple[int])): The distance of kernel moving, an int number that represents
|
|
8686
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
8687
|
+
possible depth, height and width. Extra pixels that could not complete a full stride will
|
|
8688
|
+
be discarded. If this mode is set, `pad` must be 0.
|
|
8689
|
+
- ``"pad"``: Pad the input with a specified amount. In this mode, the amount of padding
|
|
8690
|
+
in the depth, height and width dimension is determined by the `pad` parameter.
|
|
8691
|
+
If this mode is set, `pad` must be greater than or equal to 0.
|
|
8692
|
+
|
|
8693
|
+
pad (Union(int, tuple[int]), optional): The pad value to be filled. Default: ``0`` . If `pad` is an integer,
|
|
8694
|
+
the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
|
|
8695
|
+
If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal
|
|
8696
|
+
to pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
|
|
8697
|
+
stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
|
|
8626
8698
|
the depth, height and width of movement are both strides, or a tuple of three int numbers that
|
|
8627
8699
|
represent depth, height and width of movement respectively. Default: ``1`` .
|
|
8628
|
-
dilation (Union(int, tuple[int])): Specifies the space to use between kernel elements.
|
|
8629
|
-
|
|
8700
|
+
dilation (Union(int, tuple[int]), optional): Specifies the space to use between kernel elements.
|
|
8701
|
+
Default: ``1`` .
|
|
8702
|
+
group (int, optional): The number of groups into which the filter is divided. `in_channels`
|
|
8630
8703
|
and `out_channels` must be divisible by `group`. Default: ``1`` .
|
|
8631
|
-
output_padding (Union(int, tuple[int])): Add extra size to each dimension of the output.
|
|
8632
|
-
|
|
8704
|
+
output_padding (Union(int, tuple[int]), optional): Add extra size to each dimension of the output.
|
|
8705
|
+
Default: ``0`` .
|
|
8706
|
+
data_format (str, optional): The optional value for data format. Currently only ``'NCDHW'`` is supported.
|
|
8633
8707
|
Default: ``'NCDHW'``.
|
|
8634
8708
|
|
|
8635
8709
|
Inputs:
|
|
@@ -8794,14 +8868,17 @@ class Dilation2D(Primitive):
|
|
|
8794
8868
|
each sampling location. Its value must be greater or equal to 1 and bounded by
|
|
8795
8869
|
the height and width of the input `x`.
|
|
8796
8870
|
|
|
8797
|
-
pad_mode (str, optional): Specifies padding mode.
|
|
8798
|
-
``"same"``
|
|
8871
|
+
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
8872
|
+
``"same"`` or ``"valid"`` . Default: ``"valid"`` .
|
|
8799
8873
|
|
|
8800
|
-
- ``"same"``:
|
|
8801
|
-
the
|
|
8874
|
+
- ``"same"``: Pad the input around its edges so that the shape of input and output
|
|
8875
|
+
are the same when `stride` is set to ``1``.
|
|
8876
|
+
The amount of padding to is calculated by the operator internally, If the amount is even, it is
|
|
8877
|
+
uniformly distributed around the input, if it is odd, the excess amount goes to the right/bottom side.
|
|
8878
|
+
- ``"valid"``: No padding is applied to the input, and the output returns the maximum
|
|
8879
|
+
possible height and width. Extra pixels that could not complete a full stride will
|
|
8880
|
+
be discarded.
|
|
8802
8881
|
|
|
8803
|
-
- ``"valid"``: Adopts the way of discarding. The possible largest height and width of output will be
|
|
8804
|
-
returned without padding. Extra pixels will be discarded.
|
|
8805
8882
|
data_format (str, optional): The value for data format, only ``'NCHW'`` is supported at present.
|
|
8806
8883
|
Default: ``"NCHW"`` .
|
|
8807
8884
|
|
|
@@ -8879,7 +8956,9 @@ class Dilation2D(Primitive):
|
|
|
8879
8956
|
self.pad_mode = validator.check_string(pad_mode, ['VALID', 'SAME', 'valid', 'same'], 'pad_mode', self.name)
|
|
8880
8957
|
self.add_prim_attr('pad_mode', self.pad_mode.upper())
|
|
8881
8958
|
self.stride = _check_format_stride_or_dilation("stride", stride, self.name, self.data_format)
|
|
8882
|
-
|
|
8959
|
+
def is_in_range(x):
|
|
8960
|
+
return 1 <= x <= 255
|
|
8961
|
+
if not is_in_range(self.stride[2]) or not is_in_range(self.stride[3]):
|
|
8883
8962
|
raise ValueError(f'For Dilation2D, size of stride is not supported, '
|
|
8884
8963
|
f'stride should be in the range of [1, 255], '
|
|
8885
8964
|
f'but got stride_h: `{self.stride[2]}`, stride_w: `{self.stride[3]}`.')
|
|
@@ -9418,8 +9497,8 @@ class MultilabelMarginLoss(Primitive):
|
|
|
9418
9497
|
``'sum'`` . Default: ``'mean'`` .
|
|
9419
9498
|
|
|
9420
9499
|
- ``'none'``: no reduction will be applied.
|
|
9421
|
-
- ``'mean'``:
|
|
9422
|
-
- ``'sum'``: the output will be summed.
|
|
9500
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
9501
|
+
- ``'sum'``: the output elements will be summed.
|
|
9423
9502
|
|
|
9424
9503
|
Inputs:
|
|
9425
9504
|
- **x** (Tensor) - Predict data. Tensor of shape :math:`(C)` or :math:`(N, C)`, where :math:`N`
|
|
@@ -9428,7 +9507,7 @@ class MultilabelMarginLoss(Primitive):
|
|
|
9428
9507
|
label targets padded by -1.
|
|
9429
9508
|
|
|
9430
9509
|
Outputs:
|
|
9431
|
-
- **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is "none"
|
|
9510
|
+
- **y** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss. If `reduction` is ``"none"``, its shape
|
|
9432
9511
|
is :math:`(N)`. Otherwise, a scalar value will be returned.
|
|
9433
9512
|
- **is_target** (Tensor) - Output tensor for backward input, with the same shape as `target`,
|
|
9434
9513
|
data type must be int32.
|
|
@@ -9694,8 +9773,22 @@ class GridSampler3D(Primitive):
|
|
|
9694
9773
|
Args:
|
|
9695
9774
|
interpolation_mode (str, optional): An optional string specifying the interpolation method.
|
|
9696
9775
|
The optional values are ``"bilinear"`` or ``"nearest"`` . Default: ``"bilinear"`` .
|
|
9776
|
+
|
|
9777
|
+
- ``"nearest"``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
|
|
9778
|
+
nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
|
|
9779
|
+
- ``"bilinear"``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
|
|
9780
|
+
pixels, computed using bilinear interpolation. This method produces smoother results compared
|
|
9781
|
+
to nearest neighbor interpolation.
|
|
9782
|
+
|
|
9697
9783
|
padding_mode (str, optional): An optional string specifying the pad method.
|
|
9698
9784
|
The optional values are ``"zeros"`` , ``"border"`` or ``"reflection"`` . Default: ``"zeros"`` .
|
|
9785
|
+
When the sampling grid is outside input's bounds, effects of various padding modes are as follows:
|
|
9786
|
+
|
|
9787
|
+
- ``"zeros"``: Pads the input tensor with zeros.
|
|
9788
|
+
- ``"border"``: Pads the input tensor with the values of the pixels on the border of the tensor.
|
|
9789
|
+
- ``"reflection"``: Pads the input tensor by reflecting the values of the pixels at the
|
|
9790
|
+
boundary of the tensor.
|
|
9791
|
+
|
|
9699
9792
|
align_corners (bool, optional): An optional bool specifying alignment method. If set to ``True`` ,
|
|
9700
9793
|
the extrema (-1 and 1) are considered as referring to
|
|
9701
9794
|
the center points of the input’s corner pixels. If set to ``False`` , they are instead considered as
|
|
@@ -10178,8 +10271,12 @@ class TripletMarginLoss(Primitive):
|
|
|
10178
10271
|
p (int, optional): The norm degree for pairwise distance. Default: ``2`` .
|
|
10179
10272
|
eps (float, optional): Default: ``1e-6`` .
|
|
10180
10273
|
swap (bool, optional): The distance swap. Default: ``False`` .
|
|
10181
|
-
reduction (str, optional): Apply specific reduction method to the
|
|
10182
|
-
|
|
10274
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
10275
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
10276
|
+
|
|
10277
|
+
- ``'none'``: no reduction will be applied.
|
|
10278
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
10279
|
+
- ``'sum'``: the output elements will be summed.
|
|
10183
10280
|
|
|
10184
10281
|
Inputs:
|
|
10185
10282
|
- **x** (Tensor) - A sample randomly selected from the training set. Data type must be BasicType.
|
|
@@ -10190,7 +10287,7 @@ class TripletMarginLoss(Primitive):
|
|
|
10190
10287
|
- **margin** (Tensor) - Make a margin between the positive pair and the negative pair.
|
|
10191
10288
|
|
|
10192
10289
|
Outputs:
|
|
10193
|
-
Union[Tensor, Scalar], if `reduction` is "none"
|
|
10290
|
+
Union[Tensor, Scalar], if `reduction` is ``"none"``, its shape is :math:`(N)`.
|
|
10194
10291
|
Otherwise, a scalar value will be returned.
|
|
10195
10292
|
|
|
10196
10293
|
Raises:
|
|
@@ -10207,7 +10304,7 @@ class TripletMarginLoss(Primitive):
|
|
|
10207
10304
|
is bigger than or equal to 8.
|
|
10208
10305
|
ValueError: If length of shape of `margin` is not 0.
|
|
10209
10306
|
ValueError: If shape of `x`, `positive` and `negative` cannot broadcast.
|
|
10210
|
-
ValueError: If `reduction` is not one of 'none'
|
|
10307
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
10211
10308
|
|
|
10212
10309
|
Supported Platforms:
|
|
10213
10310
|
``GPU``
|
|
@@ -10303,6 +10400,13 @@ class GridSampler2D(Primitive):
|
|
|
10303
10400
|
interpolation_mode (str, optional): An optional string specifying the interpolation method.
|
|
10304
10401
|
The optional values are
|
|
10305
10402
|
``"bilinear"`` or ``"nearest"`` . Default: ``"bilinear"`` .
|
|
10403
|
+
|
|
10404
|
+
- ``"nearest"``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
|
|
10405
|
+
nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
|
|
10406
|
+
- ``"bilinear"``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
|
|
10407
|
+
pixels, computed using bilinear interpolation. This method produces smoother results compared
|
|
10408
|
+
to nearest neighbor interpolation.
|
|
10409
|
+
|
|
10306
10410
|
padding_mode (str, optional): An optional string specifying the pad method.
|
|
10307
10411
|
The optional values are ``"zeros"`` , ``"border"`` or ``"reflection"`` . Default: ``"zeros"`` .
|
|
10308
10412
|
When the sampling grid is outside input's bounds, effects of various padding modes are as follows:
|
|
@@ -10317,8 +10421,12 @@ class GridSampler2D(Primitive):
|
|
|
10317
10421
|
and output tensors are aligned. When set to ``False`` , it is not aligned. Default: ``False`` .
|
|
10318
10422
|
|
|
10319
10423
|
Inputs:
|
|
10320
|
-
- **input_x** (Tensor) - A 4-D tensor with
|
|
10321
|
-
:math:`(N, C, H_{in}, W_{in})`.
|
|
10424
|
+
- **input_x** (Tensor) - A 4-D tensor with shape
|
|
10425
|
+
:math:`(N, C, H_{in}, W_{in})`. Supported dtypes:
|
|
10426
|
+
|
|
10427
|
+
- Ascend: float16, float32.
|
|
10428
|
+
- GPU/CPU: float16, float32, float64.
|
|
10429
|
+
|
|
10322
10430
|
- **grid** (Tensor) - A 4-D tensor whose dtype is the same as `input_x` and whose shape is
|
|
10323
10431
|
:math:`(N, H_{out}, W_{out}, 2)`.
|
|
10324
10432
|
Used to specify the sampling pixel locations normalized by the input spatial
|
|
@@ -10409,7 +10517,7 @@ class UpsampleNearest3D(Primitive):
|
|
|
10409
10517
|
This operator scale up the volumetric input with specified `output_size` or `scales` factors, using nearest
|
|
10410
10518
|
neighbor algorithm.
|
|
10411
10519
|
|
|
10412
|
-
One of `output_size` or `scales` must be given, and can not
|
|
10520
|
+
One of `output_size` or `scales` must be given, and can not specified both at the same time.
|
|
10413
10521
|
|
|
10414
10522
|
Inputs:
|
|
10415
10523
|
- **x** (Tensor) - 5D tensor of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`.
|
|
@@ -11116,46 +11224,45 @@ class Dense(Primitive):
|
|
|
11116
11224
|
Applies dense connected operator for the input. The implement of the operation is as:
|
|
11117
11225
|
|
|
11118
11226
|
.. math::
|
|
11119
|
-
|
|
11227
|
+
output = x @ w ^ T + b,
|
|
11120
11228
|
|
|
11121
|
-
where :math:`x` is the input tensor, :math
|
|
11122
|
-
and :math
|
|
11123
|
-
|
|
11124
|
-
Args:
|
|
11125
|
-
has_bias (bool): Specifies whether the layer uses a bias vector :math:`\text{b}`. Default: True.
|
|
11229
|
+
where :math:`x` is the input tensor, :math:`w` is a weight matrix with the same data type as the :math:`x` ,
|
|
11230
|
+
and :math:`b` is a bias vector with the same data type as the :math:`x` (only if `b` is not ``None``).
|
|
11126
11231
|
|
|
11127
11232
|
Inputs:
|
|
11128
|
-
- **x** (
|
|
11129
|
-
- **w** (
|
|
11130
|
-
|
|
11233
|
+
- **x** (Tensor) - The shape must meet the following requirement: :math:`len(x.shape)>0`.
|
|
11234
|
+
- **w** (Tensor) - The shape must meet the following requirements:
|
|
11235
|
+
If :math:`len(x.shape)>1`, :math:`len(w.shape)=2`. If :math:`len(x.shape)=1`, :math:`len(w.shape)=1`.
|
|
11236
|
+
:math:`w.shape[-1]=x.shape[-1]`.
|
|
11237
|
+
- **b** (Union[Tensor, None]) - If `b` is not ``None``, the shape must meet the following requirements:
|
|
11238
|
+
If :math:`len(x.shape)>1`, :math:`len(b.shape)=0` or :math:`len(b.shape)=1` .
|
|
11239
|
+
If :math:`len(b.shape)=1`, :math:`b.shape[0]=w.shape[0]`.
|
|
11240
|
+
If :math:`len(x.shape)=1`, :math:`len(b.shape)=0`.
|
|
11131
11241
|
|
|
11132
11242
|
Outputs:
|
|
11133
|
-
Tensor of shape :math:`(*x.shape[:-1], w.shape[0])`.
|
|
11134
|
-
|
|
11135
|
-
Raises:
|
|
11136
|
-
TypeError: If `has_bias` is not a bool.
|
|
11243
|
+
If :math:`len(x.shape)>1`, Tensor of shape :math:`(*x.shape[:-1], w.shape[0])`.
|
|
11244
|
+
If :math:`len(x.shape)=1`, Tensor of shape :math:`()`.
|
|
11137
11245
|
|
|
11138
11246
|
Supported Platforms:
|
|
11139
|
-
``GPU``
|
|
11247
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
11140
11248
|
|
|
11141
11249
|
Examples:
|
|
11142
|
-
>>>
|
|
11250
|
+
>>> import numpy as np
|
|
11251
|
+
>>> from mindspore import Tensor, ops
|
|
11143
11252
|
>>> x = Tensor(np.random.random((4, 5, 6, 7)).astype(np.float32))
|
|
11144
|
-
>>> weight =
|
|
11145
|
-
>>> bias =
|
|
11146
|
-
>>> dense =
|
|
11253
|
+
>>> weight = Tensor(np.random.random((6, 7)).astype(np.float32))
|
|
11254
|
+
>>> bias = Tensor(np.random.random((6,)).astype(np.float32))
|
|
11255
|
+
>>> dense = ops.Dense()
|
|
11147
11256
|
>>> output = dense(x, weight, bias)
|
|
11148
11257
|
>>> print(output.shape)
|
|
11149
11258
|
(4, 5, 6, 6)
|
|
11150
11259
|
"""
|
|
11151
11260
|
|
|
11152
11261
|
@prim_attr_register
|
|
11153
|
-
def __init__(self
|
|
11262
|
+
def __init__(self):
|
|
11154
11263
|
"""Initialize Dense."""
|
|
11155
11264
|
self.init_prim_io_names(inputs=['x', 'w', 'b'], outputs=["output"])
|
|
11156
|
-
self.has_bias
|
|
11157
|
-
self.has_bias = validator.check_bool(has_bias, "has_bias", "Dense")
|
|
11158
|
-
self.add_prim_attr("has_bias", self.has_bias)
|
|
11265
|
+
self.add_prim_attr("has_bias", True)
|
|
11159
11266
|
|
|
11160
11267
|
|
|
11161
11268
|
class WKV(Primitive):
|
|
@@ -11166,22 +11273,22 @@ class WKV(Primitive):
|
|
|
11166
11273
|
|
|
11167
11274
|
Inputs:
|
|
11168
11275
|
- **w** (Tensor) - The time_first tensor with data type of float32.
|
|
11169
|
-
Input tensor of shape :math:`(
|
|
11276
|
+
Input tensor of shape :math:`(hidden\_size,)`.
|
|
11170
11277
|
- **u** (Tensor]) - The time_decay tensor with data type of float32.
|
|
11171
|
-
Input tensor of shape :math:`(
|
|
11278
|
+
Input tensor of shape :math:`(hidden\_size,)`.
|
|
11172
11279
|
- **k** (Tensor) - The key tensor with data type of float32.
|
|
11173
|
-
Input tensor of shape :math:`(
|
|
11280
|
+
Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
|
|
11174
11281
|
- **v** (Tensor) - The value tensor with data type of float32.
|
|
11175
|
-
Input tensor of shape :math:`(
|
|
11282
|
+
Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
|
|
11176
11283
|
- **sp** (Tensor) - The states_p tensor with data type of float32.
|
|
11177
|
-
Input tensor of shape :math:`(
|
|
11284
|
+
Input tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
|
|
11178
11285
|
- **sq** (Tensor) - The states_q tensor with data type of float32.
|
|
11179
|
-
Input tensor of shape :math:`(
|
|
11286
|
+
Input tensor of shape :math:`(batch\_size, hidden\_size)`.
|
|
11180
11287
|
- **sm** (Tensor) - The states_m tensor with data type of float32.
|
|
11181
|
-
Input tensor of shape :math:`(
|
|
11288
|
+
Input tensor of shape :math:`(batch\_size, hidden\_size)`.
|
|
11182
11289
|
|
|
11183
11290
|
Outputs:
|
|
11184
|
-
Tensor of shape :math:`(
|
|
11291
|
+
Tensor of shape :math:`(batch\_size, seq\_length, hidden\_size)`.
|
|
11185
11292
|
|
|
11186
11293
|
Supported Platforms:
|
|
11187
11294
|
``Ascend``
|
|
@@ -11209,3 +11316,120 @@ class WKV(Primitive):
|
|
|
11209
11316
|
"""Initialize WKV."""
|
|
11210
11317
|
self.init_prim_io_names(inputs=["time_first", "time_decay", "key", "value", "sp", "sq", "sm"],
|
|
11211
11318
|
outputs=["output", "out_sp", "out_sq", "out_sm"])
|
|
11319
|
+
|
|
11320
|
+
|
|
11321
|
+
class PromptFlashAttention(Primitive):
|
|
11322
|
+
r"""
|
|
11323
|
+
The interface for fully inference.
|
|
11324
|
+
B -- Batch size
|
|
11325
|
+
S -- Sequence length
|
|
11326
|
+
H -- Hidden size
|
|
11327
|
+
|
|
11328
|
+
.. warning::
|
|
11329
|
+
This is an experimental API that is subject to change or deletion.
|
|
11330
|
+
|
|
11331
|
+
Inputs:
|
|
11332
|
+
- **query** (Tensor) - The query tensor with data type of float16 or float32.
|
|
11333
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
11334
|
+
- **key** (Tensor) - The key tensor with data type of float16 or float32.
|
|
11335
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
11336
|
+
- **value** (Tensor) - The value tensor with data type of float16 or float32.
|
|
11337
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
11338
|
+
- **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
11339
|
+
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
11340
|
+
- **padding_mask** (Tensor) - The padding mask tensor with data type of float16 or float32
|
|
11341
|
+
- **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int.
|
|
11342
|
+
- **num_heads** (int): The number of heads.
|
|
11343
|
+
- **scale_value** (float): The scale value indicating the scale coefficient, which is used as the scalar of
|
|
11344
|
+
Muls in the calculation. Default: 1.0.
|
|
11345
|
+
- **pre_tokens** (int): Previous tokens. Default: 2147483547.
|
|
11346
|
+
- **next_tokens** (int): next tokens. Default: 0.
|
|
11347
|
+
indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
|
|
11348
|
+
indicates that the data blocks in the upper triangle are not involved in the calculation
|
|
11349
|
+
- **input_layout** (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
|
|
11350
|
+
- **num_key_value_heads** (int): head numbers of key/value which are used in GQA algorithm.
|
|
11351
|
+
The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
|
|
11352
|
+
|
|
11353
|
+
Outputs:
|
|
11354
|
+
- **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
11355
|
+
|
|
11356
|
+
Supported Platforms:
|
|
11357
|
+
``Ascend910B``
|
|
11358
|
+
"""
|
|
11359
|
+
@prim_attr_register
|
|
11360
|
+
def __init__(self, num_heads, scale_value=1.0, pre_tokens=2147483547, next_tokens=0, input_layout='BSH',
|
|
11361
|
+
num_key_value_heads=0):
|
|
11362
|
+
"""Initialize PromptFlashAttention."""
|
|
11363
|
+
validator.check_value_type('num_heads', num_heads, [int], self.name)
|
|
11364
|
+
validator.check_value_type('scale_value', scale_value, [float], self.name)
|
|
11365
|
+
validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
|
|
11366
|
+
validator.check_value_type('next_tokens', next_tokens, [int], self.name)
|
|
11367
|
+
validator.check_value_type('input_layout', input_layout, [str], self.name)
|
|
11368
|
+
validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
|
|
11369
|
+
self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "padding_mask", "actual_seq_lengths"],
|
|
11370
|
+
outputs=["attention_out"])
|
|
11371
|
+
|
|
11372
|
+
|
|
11373
|
+
class FlashAttentionScore(Primitive):
|
|
11374
|
+
r"""
|
|
11375
|
+
FlashAttentionScore.
|
|
11376
|
+
.. warning::
|
|
11377
|
+
This is an experimental API that is subject to change or deletion.
|
|
11378
|
+
B -- Batch size
|
|
11379
|
+
S -- Sequence length
|
|
11380
|
+
H -- Hidden size
|
|
11381
|
+
N -- Num heads
|
|
11382
|
+
D -- Dim size
|
|
11383
|
+
Args:
|
|
11384
|
+
head_num (int): The number of the heads.
|
|
11385
|
+
keep_prob (float): The keep probability of dropout. Default: 1.0.
|
|
11386
|
+
scale_value (float): The scale value. Default: 1.0.
|
|
11387
|
+
pre_tokens (int): Previous tokens. Default: 65536.
|
|
11388
|
+
next_tokens (int): Next tokens. Default: 65536.
|
|
11389
|
+
inner_precise (int): Specify the execution mode, where 0 indicates high precision mode and 1 indicates high
|
|
11390
|
+
performance mode. Default: 0.
|
|
11391
|
+
input_layout (str, optional): Specifies the layout of `query`, the value must be one of ["BSH", "SBH"].
|
|
11392
|
+
Currently, only BSH is supported. Default: "BSH".
|
|
11393
|
+
|
|
11394
|
+
Inputs:
|
|
11395
|
+
- **query** (Tensor) - The query tensor with data type of float16 or float32.
|
|
11396
|
+
Input tensor of shape :math:`(B, S, H)`.
|
|
11397
|
+
- **key** (Tensor) - The key tensor with data type of float16 or float32.
|
|
11398
|
+
Input tensor of shape :math:`(B, S, H)`.
|
|
11399
|
+
- **value** (Tensor) - The value tensor with data type of float16 or float32.
|
|
11400
|
+
Input tensor of shape :math:`(B, S, H)`.
|
|
11401
|
+
- **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
11402
|
+
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
11403
|
+
- **drop_mask** (Tensor) - The dropout mask tensor with data type of UInt8.
|
|
11404
|
+
Input tensor of shape :math:`(B, N, S, S // 8) or ()`.
|
|
11405
|
+
- **real_shift** (None) - The position embedding code of float16 or float32, not implemented yet.
|
|
11406
|
+
- **padding_mask** (None) - The padding mask of float16 or float32, not implemented yet.
|
|
11407
|
+
|
|
11408
|
+
Outputs:
|
|
11409
|
+
- **attention_out** (Tensor) - (B, S, H)
|
|
11410
|
+
- **softmax_max** (Tensor) - (B, N, S, 16)/(B, N, S, 8) when fp16/fp32
|
|
11411
|
+
- **softmax_sum** (Tensor) - (B, N, S, 16)/(B, N, S, 8) when fp16/fp32
|
|
11412
|
+
Supported Platforms:
|
|
11413
|
+
``Ascend``
|
|
11414
|
+
"""
|
|
11415
|
+
|
|
11416
|
+
@prim_attr_register
|
|
11417
|
+
def __init__(self, head_num, keep_prob=1.0, scale_value=1.0, pre_tokens=65536, next_tokens=65536, inner_precise=0,
|
|
11418
|
+
input_layout="BSH"):
|
|
11419
|
+
"""Initialize FlashAttentionScore"""
|
|
11420
|
+
validator.check_value_type('head_num', head_num, [int], self.name)
|
|
11421
|
+
validator.check_value_type('keep_prob', keep_prob, [int, float], self.name)
|
|
11422
|
+
validator.check_float(keep_prob, 0.0, validator.GE, "keep_prob", self.name)
|
|
11423
|
+
validator.check_float(keep_prob, 1.0, validator.LE, "keep_prob", self.name)
|
|
11424
|
+
validator.check_value_type('scale_value', scale_value, [float], self.name)
|
|
11425
|
+
validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
|
|
11426
|
+
validator.check_value_type('next_tokens', next_tokens, [int], self.name)
|
|
11427
|
+
validator.check_value_type('inner_precise', inner_precise, [int], self.name)
|
|
11428
|
+
if inner_precise not in [0, 1]:
|
|
11429
|
+
raise ValueError(f"Attribute 'inner_precise' must be either 0 or 1, but got {inner_precise}")
|
|
11430
|
+
validator.check_value_type('input_layout', input_layout, [str], self.name)
|
|
11431
|
+
if input_layout not in ["BSH"]:
|
|
11432
|
+
raise ValueError(f"Attribute 'input_layout' must be either 'bsh' or 'sbh', but got {input_layout}")
|
|
11433
|
+
self.init_prim_io_names(
|
|
11434
|
+
inputs=['query', 'key', 'value', 'attn_mask', 'drop_mask', 'real_shift', 'padding_mask'],
|
|
11435
|
+
outputs=['attention_out', 'softmax_max', 'softmax_sum'])
|