mindspore 2.1.0__cp38-none-any.whl → 2.2.0__cp38-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +4 -1
- mindspore/_akg/akg/build_module.py +5 -6
- mindspore/_akg/akg/composite/build_module.py +49 -16
- mindspore/_akg/akg/composite/split_stitch.py +10 -11
- mindspore/_akg/akg/ms/info_version_adapt.py +67 -1
- mindspore/_akg/akg/tvm/api.py +4 -3
- mindspore/_akg/akg/tvm/autotvm/__init__.py +1 -2
- mindspore/_akg/akg/tvm/autotvm/graph_tuner/base_graph_tuner.py +1 -5
- mindspore/_akg/akg/tvm/autotvm/measure/__init__.py +1 -1
- mindspore/_akg/akg/tvm/autotvm/measure/measure.py +1 -10
- mindspore/_akg/akg/tvm/autotvm/measure/measure_methods.py +1 -372
- mindspore/_akg/akg/tvm/build_module.py +16 -1
- mindspore/_akg/akg/tvm/contrib/graph_runtime.py +0 -53
- mindspore/_akg/akg/tvm/hybrid/parser.py +7 -6
- mindspore/_akg/akg/tvm/ir_builder.py +1 -1
- mindspore/_akg/akg/tvm/module.py +1 -2
- mindspore/_akg/akg/tvm/stmt.py +2 -2
- mindspore/_akg/akg/utils/composite_op_helper.py +9 -10
- mindspore/_akg/akg/utils/kernel_exec.py +58 -260
- mindspore/_akg/akg/utils/result_analysis.py +4 -24
- mindspore/_akg/akg/utils/tbe_codegen_utils.py +198 -0
- mindspore/_c_dataengine.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_expression.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_c_mindrecord.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +26 -32
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +1 -9
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +2 -2
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +4 -4
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +12 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +61 -71
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +72 -95
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +24 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/_mindspore_offline_debug.cpython-38-aarch64-linux-gnu.so +0 -0
- mindspore/amp.py +47 -11
- mindspore/bin/cache_admin +0 -0
- mindspore/bin/cache_server +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +13 -0
- mindspore/common/api.py +173 -258
- mindspore/common/auto_dynamic_shape.py +498 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +240 -145
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +13 -2
- mindspore/config/super_bar_config.json +4 -2
- mindspore/context.py +143 -59
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +28 -5
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +11 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +59 -66
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +0 -14
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/include/mindapi/base/type_id.h +1 -0
- mindspore/include/mindapi/base/types.h +1 -0
- mindspore/lib/libdnnl.so.2 +0 -0
- mindspore/lib/libjemalloc.so.2 +0 -0
- mindspore/lib/libmindspore.so +0 -0
- mindspore/lib/libmindspore_backend.so +0 -0
- mindspore/lib/libmindspore_common.so +0 -0
- mindspore/lib/libmindspore_core.so +0 -0
- mindspore/lib/libmindspore_glog.so.0 +0 -0
- mindspore/lib/libmindspore_gpr.so.15 +0 -0
- mindspore/lib/libmindspore_grpc++.so.1 +0 -0
- mindspore/lib/libmindspore_grpc.so.15 +0 -0
- mindspore/lib/libmindspore_shared_lib.so +0 -0
- mindspore/lib/libnnacl.so +0 -0
- mindspore/lib/libopencv_core.so.4.5 +0 -0
- mindspore/lib/libopencv_imgcodecs.so.4.5 +0 -0
- mindspore/lib/libopencv_imgproc.so.4.5 +0 -0
- mindspore/lib/libps_cache.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/aicpu_kernel/impl/libcust_cpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_impl/cpu/config/cust_aicpu_kernel.json +9000 -0
- mindspore/lib/plugin/ascend/custom_aicpu_ops/op_proto/libcust_op_proto.so +0 -0
- mindspore/lib/plugin/ascend/libakg.so +0 -0
- mindspore/lib/plugin/ascend/libascend_collective.so +0 -0
- mindspore/lib/plugin/ascend/libdvpp_utils.so +0 -0
- mindspore/lib/plugin/ascend/libhccl_plugin.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_aicpu_kernels.so +0 -0
- mindspore/lib/plugin/ascend/libmindspore_cpu_kernels.so +0 -0
- mindspore/lib/plugin/cpu/libakg.so +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.1 +0 -0
- mindspore/lib/plugin/libmindspore_ascend.so.2 +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +316 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +21 -28
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +310 -207
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +82 -41
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +13 -18
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +22 -17
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +78 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/offline_debug/convert_async.py +2 -2
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +1 -2
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +10 -0
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +165 -109
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +144 -86
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +172 -187
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +51 -57
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +6 -17
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/aicpu/eps.py +32 -0
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +4 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +273 -72
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +40 -2
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +167 -189
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -8
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +470 -251
- mindspore/ops/function/random_func.py +86 -56
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +235 -19
- mindspore/ops/operations/__init__.py +25 -17
- mindspore/ops/operations/_grad_ops.py +52 -7
- mindspore/ops/operations/_inner_ops.py +213 -12
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +64 -280
- mindspore/ops/operations/comm_ops.py +105 -57
- mindspore/ops/operations/custom_ops.py +10 -3
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/math_ops.py +185 -138
- mindspore/ops/operations/nn_ops.py +716 -492
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +2 -2
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +14 -12
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +6 -10
- mindspore/parallel/shard.py +4 -4
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +3 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +17 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +104 -252
- mindspore/profiler/parser/ascend_msprof_generator.py +8 -8
- mindspore/profiler/parser/ascend_op_generator.py +5 -5
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +9 -6
- mindspore/profiler/parser/base_timeline_generator.py +9 -7
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +14 -10
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +37 -21
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +2 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +139 -71
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +36 -15
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +525 -577
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +2 -2
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +517 -0
- mindspore/scipy/linalg.py +1 -1
- mindspore/scipy/optimize/minimize.py +7 -3
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +14 -7
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +83 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +185 -45
- mindspore/train/serialization.py +390 -150
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +14 -10
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/METADATA +6 -7
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/RECORD +447 -507
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/entry_points.txt +0 -1
- mindspore/_akg/akg/tvm/contrib/debugger/__init__.py +0 -16
- mindspore/_akg/akg/tvm/contrib/debugger/debug_result.py +0 -274
- mindspore/_akg/akg/tvm/contrib/debugger/debug_runtime.py +0 -259
- mindspore/_akg/akg/tvm/contrib/peak.py +0 -341
- mindspore/_akg/akg/tvm/contrib/rpc.py +0 -25
- mindspore/_akg/akg/tvm/contrib/xcode.py +0 -257
- mindspore/_akg/akg/tvm/exec/__init__.py +0 -17
- mindspore/_akg/akg/tvm/exec/autotvm_log_editor.py +0 -60
- mindspore/_akg/akg/tvm/exec/measure_peak.py +0 -48
- mindspore/_akg/akg/tvm/exec/query_rpc_tracker.py +0 -48
- mindspore/_akg/akg/tvm/exec/rpc_proxy.py +0 -98
- mindspore/_akg/akg/tvm/exec/rpc_server.py +0 -88
- mindspore/_akg/akg/tvm/exec/rpc_tracker.py +0 -62
- mindspore/_akg/akg/tvm/rpc/__init__.py +0 -29
- mindspore/_akg/akg/tvm/rpc/base.py +0 -182
- mindspore/_akg/akg/tvm/rpc/client.py +0 -436
- mindspore/_akg/akg/tvm/rpc/proxy.py +0 -595
- mindspore/_akg/akg/tvm/rpc/server.py +0 -413
- mindspore/_akg/akg/tvm/rpc/tornado_util.py +0 -121
- mindspore/_akg/akg/tvm/rpc/tracker.py +0 -431
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.0.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,7 @@ from mindspore.ops.operations import nn_ops as NN_OPS
|
|
|
27
27
|
from mindspore.ops.operations import _sequence_ops as seq
|
|
28
28
|
import mindspore.common.dtype as mstype
|
|
29
29
|
from mindspore.ops.function.math_func import logsumexp
|
|
30
|
-
from mindspore.ops.function.random_func import _get_seed
|
|
30
|
+
from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
|
|
31
31
|
from mindspore.common.tensor import Tensor
|
|
32
32
|
from mindspore._c_expression import Tensor as Tensor_
|
|
33
33
|
from mindspore.ops._primitive_cache import _get_cache_prim
|
|
@@ -40,6 +40,7 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
|
|
|
40
40
|
from mindspore.ops.operations.nn_ops import TripletMarginLoss
|
|
41
41
|
from mindspore.ops.operations._inner_ops import SiLU
|
|
42
42
|
from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
|
|
43
|
+
from mindspore.common.api import _function_forbid_reuse
|
|
43
44
|
|
|
44
45
|
slice_ = P.Slice()
|
|
45
46
|
fast_gelu_ = P.FastGeLU()
|
|
@@ -232,7 +233,7 @@ def adaptive_avg_pool3d(input, output_size):
|
|
|
232
233
|
def _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad):
|
|
233
234
|
"""Checks the type of avgpool1d input"""
|
|
234
235
|
validator.check_value_type('kernel_size', kernel_size, [int], 'avg_pool1d')
|
|
235
|
-
validator.check_value_type('stride', stride,
|
|
236
|
+
validator.check_value_type('stride', stride, (int, tuple), 'avg_pool1d')
|
|
236
237
|
validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool1d')
|
|
237
238
|
validator.check_value_type('count_include_pad', count_include_pad, bool, 'avg_pool1d')
|
|
238
239
|
validator.check_int(kernel_size, 1, validator.GE, "kernel_size", 'avg_pool1d')
|
|
@@ -263,12 +264,10 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
263
264
|
Args:
|
|
264
265
|
input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
|
|
265
266
|
kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
|
|
266
|
-
stride (Union(int, tuple[int])): The distance of kernel moving
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
and right are the same, equal to pad. If `padding` is a tuple of `2` integers, the padding of left and right
|
|
271
|
-
equal to `padding[0]` and `padding[1]` correspondingly. Default: ``0`` .
|
|
267
|
+
stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
|
|
268
|
+
number or a tuple of one int number. Default: ``1`` .
|
|
269
|
+
padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
|
|
270
|
+
or a tuple of one integer. Default: ``0`` .
|
|
272
271
|
ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
|
|
273
272
|
count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
|
|
274
273
|
|
|
@@ -300,20 +299,25 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
300
299
|
raise TypeError("For avg_pool1d, the input input_x must be tensor")
|
|
301
300
|
|
|
302
301
|
if len(input_x.shape) != 3:
|
|
303
|
-
raise ValueError("For avg_pool1d, input must have 3 dim, but got {
|
|
302
|
+
raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
|
|
304
303
|
|
|
305
304
|
_check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
|
|
306
305
|
if isinstance(padding, int):
|
|
307
306
|
check_non_negative_int(padding, 'padding', 'avg_pool1d')
|
|
308
307
|
padding = (0, 0, 0, 0, padding, padding)
|
|
309
308
|
elif isinstance(padding, tuple):
|
|
310
|
-
if len(padding) !=
|
|
311
|
-
raise ValueError("For avg_pool1d, padding should be int or tuple of length
|
|
309
|
+
if len(padding) != 1:
|
|
310
|
+
raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
|
|
312
311
|
for item in padding:
|
|
313
312
|
check_non_negative_int(item, 'padding', 'avg_pool1d')
|
|
314
|
-
padding = (0, 0, 0, 0, padding[0], padding[
|
|
313
|
+
padding = (0, 0, 0, 0, padding[0], padding[0])
|
|
315
314
|
else:
|
|
316
|
-
raise TypeError("For avg_pool1d, padding should be int or tuple of length
|
|
315
|
+
raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")
|
|
316
|
+
|
|
317
|
+
if isinstance(stride, tuple):
|
|
318
|
+
if len(stride) != 1:
|
|
319
|
+
raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
|
|
320
|
+
stride = stride[0]
|
|
317
321
|
|
|
318
322
|
expand_op = _get_cache_prim(P.ExpandDims)()
|
|
319
323
|
squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
|
|
@@ -419,7 +423,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
419
423
|
ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
|
|
420
424
|
count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
|
|
421
425
|
divisor_override (int): If specified, it will be used as divisor in the averaging calculation, otherwise
|
|
422
|
-
`kernel_size` will be used. Default: ``0
|
|
426
|
+
`kernel_size` will be used. Default: ``0``, which means not specified.
|
|
423
427
|
|
|
424
428
|
Returns:
|
|
425
429
|
Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
|
|
@@ -456,7 +460,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
456
460
|
raise TypeError("For avg_pool2d, the input input_x must be tensor")
|
|
457
461
|
|
|
458
462
|
if len(input_x.shape) != 4:
|
|
459
|
-
raise ValueError("For avg_pool2d, input must have 4 dim, but got {
|
|
463
|
+
raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
|
|
460
464
|
|
|
461
465
|
kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
|
|
462
466
|
stride = _check_avgpool_2d_stride(stride)
|
|
@@ -527,7 +531,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
527
531
|
count_include_pad (bool, optional): If ``True`` , averaging calculation
|
|
528
532
|
will include the zero-padding. Default: ``True`` .
|
|
529
533
|
divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
|
|
530
|
-
otherwise `kernel_size` will be used. Default: ``0`` .
|
|
534
|
+
otherwise `kernel_size` will be used. Default: ``0`` , which means not specified.
|
|
531
535
|
|
|
532
536
|
Returns:
|
|
533
537
|
Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
|
|
@@ -560,7 +564,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
560
564
|
raise TypeError("For avg_pool3d, the input input_x must be tensor")
|
|
561
565
|
|
|
562
566
|
if len(input_x.shape) != 5:
|
|
563
|
-
raise ValueError("For avg_pool3d, input must have 5 dim, but got {
|
|
567
|
+
raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
|
|
564
568
|
|
|
565
569
|
_check_avg_pool3d_padding(padding)
|
|
566
570
|
|
|
@@ -637,21 +641,21 @@ def adaptive_max_pool1d(input, output_size):
|
|
|
637
641
|
x_dtype = _get_cache_prim(P.DType)()(input)
|
|
638
642
|
|
|
639
643
|
if len(x_in_shape) != 3:
|
|
640
|
-
raise ValueError("For adaptive_max_pool1d input must have 3 dim, but got {
|
|
644
|
+
raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
|
|
641
645
|
if x_in_shape[2] < output_size:
|
|
642
|
-
raise ValueError("For adaptive_max_pool1d input's last dimension must be greater or equal to "
|
|
643
|
-
"output size {}, but got {
|
|
646
|
+
raise ValueError(f"For adaptive_max_pool1d input's last dimension must be greater or equal to "
|
|
647
|
+
f"output size {output_size}, but got {x_in_shape[2]}.")
|
|
644
648
|
if x_in_shape[2] % output_size != 0:
|
|
645
|
-
raise ValueError("For adaptive_max_pool1d input's last dimension must be divisible by "
|
|
646
|
-
"output size {}, but got {
|
|
649
|
+
raise ValueError(f"For adaptive_max_pool1d input's last dimension must be divisible by "
|
|
650
|
+
f"output size {output_size}, but got {x_in_shape[2]}.")
|
|
647
651
|
if is_ascend_backend():
|
|
648
652
|
if x_dtype not in [mstype.float16]:
|
|
649
|
-
raise TypeError("For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
|
|
650
|
-
"but got {}."
|
|
653
|
+
raise TypeError(f"For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
|
|
654
|
+
f"but got {x_dtype}.")
|
|
651
655
|
else:
|
|
652
656
|
if x_dtype not in [mstype.float16, mstype.float32]:
|
|
653
|
-
raise TypeError("For adaptive_max_pool1d, the input dtype must be float16 or float32, "
|
|
654
|
-
"but got {}."
|
|
657
|
+
raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
|
|
658
|
+
f"but got {x_dtype}.")
|
|
655
659
|
|
|
656
660
|
expand_ = _get_cache_prim(P.ExpandDims)()
|
|
657
661
|
squeeze_ = _get_cache_prim(P.Squeeze)(2)
|
|
@@ -1147,7 +1151,7 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
|
|
|
1147
1151
|
return out
|
|
1148
1152
|
|
|
1149
1153
|
|
|
1150
|
-
def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reduction='mean'):
|
|
1154
|
+
def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
|
|
1151
1155
|
r"""
|
|
1152
1156
|
Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
|
|
1153
1157
|
between the logits and the label.
|
|
@@ -1177,7 +1181,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1177
1181
|
|
|
1178
1182
|
This operator will multiply the output by the corresponding weight.
|
|
1179
1183
|
The tensor :math:`weight` assigns different weights to each piece of data in the batch,
|
|
1180
|
-
and the tensor :math:`
|
|
1184
|
+
and the tensor :math:`pos\_weight` adds corresponding weights to the positive examples of each category.
|
|
1181
1185
|
|
|
1182
1186
|
In addition, it can trade off recall and precision by adding weights to positive examples.
|
|
1183
1187
|
In the case of multi-label classification the loss can be described as:
|
|
@@ -1196,17 +1200,21 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1196
1200
|
logits (Tensor): Input logits. Data type must be float16 or float32.
|
|
1197
1201
|
label (Tensor): Ground truth label, has the same shape as `logits`.
|
|
1198
1202
|
Data type must be float16 or float32.
|
|
1199
|
-
weight (Tensor): A rescaling weight applied to the loss of each batch element. It can be
|
|
1203
|
+
weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
|
|
1200
1204
|
broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
|
|
1201
|
-
|
|
1205
|
+
Default: ``None``, `weight` is a Tensor whose value is ``1``.
|
|
1206
|
+
pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
|
|
1202
1207
|
number of classes. It can be broadcast to a tensor with shape of `logits`.
|
|
1203
|
-
Data type must be float16 or float32.
|
|
1204
|
-
reduction (str):
|
|
1205
|
-
|
|
1206
|
-
|
|
1208
|
+
Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
|
|
1209
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
1210
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
1211
|
+
|
|
1212
|
+
- ``'none'``: no reduction will be applied.
|
|
1213
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
1214
|
+
- ``'sum'``: the output elements will be summed.
|
|
1207
1215
|
|
|
1208
1216
|
Returns:
|
|
1209
|
-
Tensor or Scalar, if `reduction` is 'none'
|
|
1217
|
+
Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
|
|
1210
1218
|
Otherwise, the output is a scalar.
|
|
1211
1219
|
|
|
1212
1220
|
Raises:
|
|
@@ -1214,7 +1222,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1214
1222
|
TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
|
|
1215
1223
|
TypeError: If data type of input `reduction` is not string.
|
|
1216
1224
|
ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
|
|
1217
|
-
ValueError: If `reduction` is not one of 'none'
|
|
1225
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
|
|
1218
1226
|
|
|
1219
1227
|
Supported Platforms:
|
|
1220
1228
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -1232,10 +1240,15 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1232
1240
|
0.3463612
|
|
1233
1241
|
"""
|
|
1234
1242
|
|
|
1243
|
+
if weight is None:
|
|
1244
|
+
weight = ops.ones_like(logits)
|
|
1245
|
+
if pos_weight is None:
|
|
1246
|
+
pos_weight = ops.ones_like(logits)
|
|
1235
1247
|
bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
|
|
1236
1248
|
return bce_with_logits_loss_op(logits, label, weight, pos_weight)
|
|
1237
1249
|
|
|
1238
1250
|
|
|
1251
|
+
@_function_forbid_reuse
|
|
1239
1252
|
def dropout(input, p=0.5, training=True, seed=None):
|
|
1240
1253
|
r"""
|
|
1241
1254
|
During training, randomly zeroes some of the elements of the input tensor
|
|
@@ -1275,7 +1288,9 @@ def dropout(input, p=0.5, training=True, seed=None):
|
|
|
1275
1288
|
return input
|
|
1276
1289
|
keep_prob = 1 - p
|
|
1277
1290
|
seed0, seed1 = _get_seed(seed, "dropout")
|
|
1278
|
-
|
|
1291
|
+
dropout_op = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
|
|
1292
|
+
dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
|
|
1293
|
+
out, _ = dropout_op(input)
|
|
1279
1294
|
return out
|
|
1280
1295
|
|
|
1281
1296
|
|
|
@@ -1820,7 +1835,7 @@ def kl_div(logits, labels, reduction='mean'):
|
|
|
1820
1835
|
Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
|
|
1821
1836
|
|
|
1822
1837
|
Returns:
|
|
1823
|
-
Tensor or Scalar, if `reduction` is 'none'
|
|
1838
|
+
Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
|
|
1824
1839
|
Otherwise, it is a scalar.
|
|
1825
1840
|
|
|
1826
1841
|
Raises:
|
|
@@ -2220,7 +2235,9 @@ def interpolate(input,
|
|
|
2220
2235
|
One and only one of size and scale_factor can be set to None. Default: ``None`` .
|
|
2221
2236
|
mode (str): The sampling algorithm.
|
|
2222
2237
|
One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
|
|
2223
|
-
'area', 'nearest-exact'(
|
|
2238
|
+
'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
|
|
2239
|
+
knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
|
|
2240
|
+
|
|
2224
2241
|
align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
|
|
2225
2242
|
aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
|
|
2226
2243
|
Default: ``None`` .
|
|
@@ -2568,10 +2585,12 @@ def soft_margin_loss(input, target, reduction='mean'):
|
|
|
2568
2585
|
Args:
|
|
2569
2586
|
input (Tensor): Predict data. Data type must be float16 or float32.
|
|
2570
2587
|
target (Tensor): Ground truth data, with the same type and shape as `logits`.
|
|
2571
|
-
reduction (str, optional):
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2588
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
2589
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
2590
|
+
|
|
2591
|
+
- ``'none'``: no reduction will be applied.
|
|
2592
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
2593
|
+
- ``'sum'``: the output elements will be summed.
|
|
2575
2594
|
|
|
2576
2595
|
Outputs:
|
|
2577
2596
|
Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
|
|
@@ -2751,6 +2770,55 @@ def soft_shrink(input, lambd=0.5):
|
|
|
2751
2770
|
return soft_shrink_op(input)
|
|
2752
2771
|
|
|
2753
2772
|
|
|
2773
|
+
def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
|
|
2774
|
+
r"""
|
|
2775
|
+
Applies softplus function to `input` element-wise.
|
|
2776
|
+
|
|
2777
|
+
The softplus function is shown as follows, x is the element of `input` :
|
|
2778
|
+
|
|
2779
|
+
.. math::
|
|
2780
|
+
|
|
2781
|
+
\text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
|
|
2782
|
+
|
|
2783
|
+
When :math:`input * beta > threshold`, the implementation converts to the linear function
|
|
2784
|
+
to ensure numerical stability.
|
|
2785
|
+
|
|
2786
|
+
Args:
|
|
2787
|
+
input (Tensor) - Tensor of any dimension.
|
|
2788
|
+
Supported dtypes:
|
|
2789
|
+
|
|
2790
|
+
- GPU/CPU: float16, float32, float64.
|
|
2791
|
+
- Ascend: float16, float32.
|
|
2792
|
+
|
|
2793
|
+
beta (int, optional) - The :math:`\beta` value in softplus function. Default: ``1`` .
|
|
2794
|
+
threshold (int, optional) - When :math:`input * beta > threshold`, converting softplus to a linear function.
|
|
2795
|
+
Default: ``20`` .
|
|
2796
|
+
|
|
2797
|
+
Returns:
|
|
2798
|
+
Tensor, with the same type and shape as the `input` .
|
|
2799
|
+
|
|
2800
|
+
Raises:
|
|
2801
|
+
TypeError: If `input` is not a Tensor.
|
|
2802
|
+
TypeError: If the dtype of `input` is not float16, float32 or float64.
|
|
2803
|
+
|
|
2804
|
+
Supported Platforms:
|
|
2805
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
2806
|
+
|
|
2807
|
+
Examples:
|
|
2808
|
+
>>> import mindspore
|
|
2809
|
+
>>> import numpy as np
|
|
2810
|
+
>>> from mindspore import Tensor, ops
|
|
2811
|
+
>>> input = Tensor(np.array([0.1, 0.2, 30, 25]), mindspore.float32)
|
|
2812
|
+
>>> output = ops.softplus(input)
|
|
2813
|
+
>>> print(output)
|
|
2814
|
+
[0.7443967 0.79813886 30. 25.]
|
|
2815
|
+
"""
|
|
2816
|
+
softplus_op = _get_cache_prim(P.Softplus)()
|
|
2817
|
+
scaling_input = beta * input
|
|
2818
|
+
op_output = (1 / beta) * softplus_op(scaling_input)
|
|
2819
|
+
return ops.select(input * beta > threshold, input, op_output)
|
|
2820
|
+
|
|
2821
|
+
|
|
2754
2822
|
def silu(x):
|
|
2755
2823
|
r"""
|
|
2756
2824
|
Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
|
|
@@ -2860,7 +2928,7 @@ def sigmoid(input):
|
|
|
2860
2928
|
>>> print(output)
|
|
2861
2929
|
[0.7310586 0.880797 0.95257413 0.98201376 0.9933072 ]
|
|
2862
2930
|
"""
|
|
2863
|
-
return
|
|
2931
|
+
return _get_cache_prim(NN_OPS.Sigmoid)()(input)
|
|
2864
2932
|
|
|
2865
2933
|
|
|
2866
2934
|
def logsigmoid(x):
|
|
@@ -2946,11 +3014,19 @@ def dense(input, weight, bias=None):
|
|
|
2946
3014
|
_check_is_tensor("bias", bias, "dense")
|
|
2947
3015
|
weight = ops.t(weight)
|
|
2948
3016
|
input = ops.matmul(input, weight)
|
|
3017
|
+
input_shape = input.shape
|
|
2949
3018
|
if bias is not None:
|
|
2950
3019
|
input = input + bias
|
|
3020
|
+
_check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
|
|
2951
3021
|
return input
|
|
2952
3022
|
|
|
2953
3023
|
|
|
3024
|
+
def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
|
|
3025
|
+
"""Check that the output has the correct shape after adding bias."""
|
|
3026
|
+
if input_shape != output_shape:
|
|
3027
|
+
raise ValueError(f"For dense, the bias shape {bias_shape} does not match the input shape {input_shape}.")
|
|
3028
|
+
|
|
3029
|
+
|
|
2954
3030
|
@_primexpr
|
|
2955
3031
|
def check_dense_inputs_same_shape(input1_shape, input2_shape, prim_name=None):
|
|
2956
3032
|
"""check bidense input Tensors' shape"""
|
|
@@ -2965,7 +3041,10 @@ def bidense(input1, input2, weight, bias=None):
|
|
|
2965
3041
|
Applies bilinear dense connected layer for `input1` and `input2`. The bilinear dense function is defined as:
|
|
2966
3042
|
|
|
2967
3043
|
.. math::
|
|
2968
|
-
output =
|
|
3044
|
+
output = x_{1}^{T}Ax_{2} + b
|
|
3045
|
+
|
|
3046
|
+
:math:`x_{1}` represents `input1` , :math:`x_{2}` represents `input2` , :math:`A` represents `weight` ,
|
|
3047
|
+
:math:`b` represents `bias` .
|
|
2969
3048
|
|
|
2970
3049
|
.. warning::
|
|
2971
3050
|
This is an experimental API that is subject to change or deletion.
|
|
@@ -3391,7 +3470,9 @@ def relu6(x):
|
|
|
3391
3470
|
It returns :math:`\min(\max(0,x), 6)` element-wise.
|
|
3392
3471
|
|
|
3393
3472
|
Args:
|
|
3394
|
-
x (Tensor):
|
|
3473
|
+
x (Tensor): Tensor of shape :math:`(N, *)`,
|
|
3474
|
+
where :math:`*` means any number of additional dimensions.
|
|
3475
|
+
Data type must be float16, float32.
|
|
3395
3476
|
|
|
3396
3477
|
Returns:
|
|
3397
3478
|
Tensor, with the same dtype and shape as the `x`.
|
|
@@ -3528,6 +3609,9 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
|
|
|
3528
3609
|
_lower = Tensor(lower, mstype.float32)
|
|
3529
3610
|
_upper = Tensor(upper, mstype.float32)
|
|
3530
3611
|
_size = input.shape
|
|
3612
|
+
if ops.is_sequence_value_unknown(_size):
|
|
3613
|
+
dyn_shape = _get_cache_prim(P.TensorShape)()
|
|
3614
|
+
_size = dyn_shape(input)
|
|
3531
3615
|
sign_matrix = _get_cache_prim(P.Sign)()(input)
|
|
3532
3616
|
negative_filter = sign_matrix.clip(None, 0)
|
|
3533
3617
|
positive_filter = sign_matrix.clip(0, None)
|
|
@@ -3615,11 +3699,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3615
3699
|
l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
|
|
3616
3700
|
\cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
|
|
3617
3701
|
|
|
3618
|
-
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
|
|
3619
|
-
|
|
3620
|
-
classes.
|
|
3702
|
+
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
|
|
3703
|
+
:math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
|
|
3621
3704
|
|
|
3622
|
-
If reduction is not
|
|
3705
|
+
If `reduction` is not ``None`` (default ``'mean'`` ), then
|
|
3623
3706
|
|
|
3624
3707
|
.. math::
|
|
3625
3708
|
|
|
@@ -3638,11 +3721,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3638
3721
|
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
|
|
3639
3722
|
l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
|
|
3640
3723
|
|
|
3641
|
-
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
|
|
3642
|
-
|
|
3643
|
-
classes.
|
|
3724
|
+
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
|
|
3725
|
+
:math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
|
|
3644
3726
|
|
|
3645
|
-
If reduction is not
|
|
3727
|
+
If `reduction` is not ``None`` (default ``'mean'`` ), then
|
|
3646
3728
|
|
|
3647
3729
|
.. math::
|
|
3648
3730
|
|
|
@@ -3658,16 +3740,19 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3658
3740
|
in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
|
|
3659
3741
|
`input` is expected to be log-probabilities, data type must be float16 or float32.
|
|
3660
3742
|
target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
|
|
3661
|
-
:math:`(N, d_1, d_2, ..., d_K)` , data type must be int32.
|
|
3662
|
-
|
|
3663
|
-
data type must be float16 or float32.
|
|
3743
|
+
:math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
|
|
3744
|
+
:math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
|
|
3664
3745
|
weight (Tensor): A rescaling weight applied to the loss of each batch element.
|
|
3665
|
-
If not None, the shape is :math:`(C,)`,
|
|
3666
|
-
data type must be float16 or float32. Default: ``None`` .
|
|
3746
|
+
If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
|
|
3667
3747
|
ignore_index (int): Specifies a target value that is ignored
|
|
3668
3748
|
and does not contribute to the input gradient. Default: ``-100`` .
|
|
3669
|
-
reduction (str):
|
|
3670
|
-
Default: ``'mean'`` .
|
|
3749
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3750
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3751
|
+
|
|
3752
|
+
- ``'none'``: no reduction will be applied.
|
|
3753
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
3754
|
+
- ``'sum'``: the output elements will be summed.
|
|
3755
|
+
|
|
3671
3756
|
label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
|
|
3672
3757
|
from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
|
|
3673
3758
|
|
|
@@ -3678,17 +3763,16 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3678
3763
|
``Ascend`` ``GPU`` ``CPU``
|
|
3679
3764
|
|
|
3680
3765
|
Examples:
|
|
3681
|
-
>>> import mindspore
|
|
3766
|
+
>>> import mindspore as ms
|
|
3682
3767
|
>>> import numpy as np
|
|
3683
|
-
>>> from mindspore import Tensor, ops
|
|
3684
3768
|
>>> # Case 1: Indices labels
|
|
3685
|
-
>>> inputs =
|
|
3686
|
-
>>> target =
|
|
3687
|
-
>>> output = ops.cross_entropy(inputs, target)
|
|
3769
|
+
>>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
|
|
3770
|
+
>>> target = ms.Tensor(np.array([1, 0, 4]), ms.int32)
|
|
3771
|
+
>>> output = ms.ops.cross_entropy(inputs, target)
|
|
3688
3772
|
>>> # Case 2: Probability labels
|
|
3689
|
-
>>> inputs =
|
|
3690
|
-
>>> target =
|
|
3691
|
-
>>> output = ops.cross_entropy(inputs, target)
|
|
3773
|
+
>>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
|
|
3774
|
+
>>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
|
|
3775
|
+
>>> output = ms.ops.cross_entropy(inputs, target)
|
|
3692
3776
|
"""
|
|
3693
3777
|
_check_is_tensor('input', input, "cross_entropy_loss")
|
|
3694
3778
|
_check_is_tensor('target', target, "cross_entropy_loss")
|
|
@@ -3743,7 +3827,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
|
|
|
3743
3827
|
N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
|
|
3744
3828
|
classes.
|
|
3745
3829
|
|
|
3746
|
-
If reduction is not
|
|
3830
|
+
If `reduction` is not ``None`` (default 'mean'), then
|
|
3747
3831
|
|
|
3748
3832
|
.. math::
|
|
3749
3833
|
|
|
@@ -3763,8 +3847,13 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
|
|
|
3763
3847
|
The data type must be float16 or float32. Default: ``None`` .
|
|
3764
3848
|
ignore_index (int): Specifies a target value that is ignored
|
|
3765
3849
|
and does not contribute to the input gradient. Default: ``-100`` .
|
|
3766
|
-
reduction (str):
|
|
3767
|
-
Default: ``'mean'`` .
|
|
3850
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3851
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3852
|
+
|
|
3853
|
+
- ``'none'``: no reduction will be applied.
|
|
3854
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
3855
|
+
- ``'sum'``: the output elements will be summed.
|
|
3856
|
+
|
|
3768
3857
|
label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
|
|
3769
3858
|
from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
|
|
3770
3859
|
|
|
@@ -3858,7 +3947,7 @@ def l1_loss(input, target, reduction='mean'):
|
|
|
3858
3947
|
r"""
|
|
3859
3948
|
Calculate the mean absolute error between the `input` value and the `target` value.
|
|
3860
3949
|
|
|
3861
|
-
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to "none"
|
|
3950
|
+
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
|
|
3862
3951
|
then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
|
|
3863
3952
|
|
|
3864
3953
|
The formula is as follows:
|
|
@@ -3881,18 +3970,21 @@ def l1_loss(input, target, reduction='mean'):
|
|
|
3881
3970
|
input (Tensor): Predicted value, Tensor of any dimension.
|
|
3882
3971
|
target (Tensor): Target value, usually has the same shape as the `input`.
|
|
3883
3972
|
If `input` and `target` have different shape, make sure they can broadcast to each other.
|
|
3884
|
-
reduction (str, optional):
|
|
3885
|
-
|
|
3886
|
-
|
|
3973
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3974
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3975
|
+
|
|
3976
|
+
- ``'none'``: no reduction will be applied.
|
|
3977
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
3978
|
+
- ``'sum'``: the output elements will be summed.
|
|
3887
3979
|
|
|
3888
3980
|
Returns:
|
|
3889
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
3981
|
+
Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
|
|
3890
3982
|
Otherwise, a scalar value will be returned.
|
|
3891
3983
|
|
|
3892
3984
|
Raises:
|
|
3893
3985
|
TypeError: If `input` is not a Tensor.
|
|
3894
3986
|
TypeError: If `target` is not a Tensor.
|
|
3895
|
-
ValueError: If `reduction` is not one of "none"
|
|
3987
|
+
ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
|
|
3896
3988
|
|
|
3897
3989
|
Supported Platforms:
|
|
3898
3990
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -3948,16 +4040,20 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
|
|
|
3948
4040
|
target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
|
|
3949
4041
|
beta (float): A parameter used to control the point where the function will change between
|
|
3950
4042
|
L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
|
|
3951
|
-
reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'``
|
|
3952
|
-
Default: ``'none'`` .
|
|
4043
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4044
|
+
``'sum'`` . Default: ``'none'`` .
|
|
4045
|
+
|
|
4046
|
+
- ``'none'``: no reduction will be applied.
|
|
4047
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4048
|
+
- ``'sum'``: the output elements will be summed.
|
|
3953
4049
|
|
|
3954
4050
|
Returns:
|
|
3955
|
-
Tensor, if `reduction` is 'none'
|
|
4051
|
+
Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
|
|
3956
4052
|
Otherwise, the shape of output tensor is :math:`(1,)`.
|
|
3957
4053
|
|
|
3958
4054
|
Raises:
|
|
3959
4055
|
TypeError: If `beta` is not a float.
|
|
3960
|
-
ValueError: If `reduction` is not one of 'none'
|
|
4056
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
3961
4057
|
TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
|
|
3962
4058
|
ValueError: If `beta` is less than or equal to 0.
|
|
3963
4059
|
ValueError: If shape of `input` is not the same as `target`.
|
|
@@ -4072,6 +4168,7 @@ def leaky_relu(input, alpha=0.2):
|
|
|
4072
4168
|
select_op = _get_cache_prim(P.Maximum)()
|
|
4073
4169
|
if alpha > 1:
|
|
4074
4170
|
select_op = _get_cache_prim(P.Minimum)()
|
|
4171
|
+
alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
|
|
4075
4172
|
return select_op(alpha * input, input)
|
|
4076
4173
|
|
|
4077
4174
|
|
|
@@ -4158,6 +4255,10 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
|
|
|
4158
4255
|
r"""
|
|
4159
4256
|
Local Response Normalization.
|
|
4160
4257
|
|
|
4258
|
+
.. warning::
|
|
4259
|
+
lrn is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
|
|
4260
|
+
normalization methods, e.g. :class:`mindspore.ops.batch_norm`.
|
|
4261
|
+
|
|
4161
4262
|
.. math::
|
|
4162
4263
|
|
|
4163
4264
|
b_{c} = a_{c}\left(k + \frac{\alpha}{n}
|
|
@@ -4186,7 +4287,7 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
|
|
|
4186
4287
|
TypeError: If `x` is not a Tensor.
|
|
4187
4288
|
|
|
4188
4289
|
Supported Platforms:
|
|
4189
|
-
``
|
|
4290
|
+
``GPU`` ``CPU``
|
|
4190
4291
|
|
|
4191
4292
|
Examples:
|
|
4192
4293
|
>>> import mindspore
|
|
@@ -4219,7 +4320,11 @@ def mish(x):
|
|
|
4219
4320
|
<https://arxiv.org/abs/1908.08681>`_.
|
|
4220
4321
|
|
|
4221
4322
|
Args:
|
|
4222
|
-
x (Tensor): The input Tensor
|
|
4323
|
+
x (Tensor): The input Tensor.
|
|
4324
|
+
Supported dtypes:
|
|
4325
|
+
|
|
4326
|
+
- GPU/CPU: float16, float32, float64.
|
|
4327
|
+
- Ascend: float16, float32.
|
|
4223
4328
|
|
|
4224
4329
|
Returns:
|
|
4225
4330
|
Tensor, with the same type and shape as the `x`.
|
|
@@ -4320,10 +4425,40 @@ def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data
|
|
|
4320
4425
|
|
|
4321
4426
|
|
|
4322
4427
|
def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
|
|
4323
|
-
"""
|
|
4428
|
+
r"""
|
|
4324
4429
|
MarginRankingLoss creates a criterion that measures the loss.
|
|
4325
4430
|
|
|
4326
|
-
|
|
4431
|
+
Given two tensors :math:`input1`, :math:`input2` and a Tensor label :math:`target` with values 1 or -1,
|
|
4432
|
+
the operation is as follows:
|
|
4433
|
+
|
|
4434
|
+
.. math::
|
|
4435
|
+
\text{loss}(input1, input2, target) = \max(0, -target * (input1 - input2) + \text{margin})
|
|
4436
|
+
|
|
4437
|
+
Args:
|
|
4438
|
+
input1 (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4439
|
+
input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
|
|
4440
|
+
target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
|
|
4441
|
+
:math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_2, x_3, ..., x_R)`.
|
|
4442
|
+
margin (float, optional): Specify the adjustment factor of the operation. Default: ``0.0`` .
|
|
4443
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4444
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4445
|
+
|
|
4446
|
+
- ``'none'``: no reduction will be applied.
|
|
4447
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4448
|
+
- ``'sum'``: the output elements will be summed.
|
|
4449
|
+
|
|
4450
|
+
Returns:
|
|
4451
|
+
Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
|
|
4452
|
+
Otherwise, a scalar value will be returned.
|
|
4453
|
+
|
|
4454
|
+
Raises:
|
|
4455
|
+
TypeError: If `margin` is not a float.
|
|
4456
|
+
TypeError: If `input1`, `input2` or `target` is not a Tensor.
|
|
4457
|
+
TypeError: If the types of `input1` and `input2` are inconsistent.
|
|
4458
|
+
TypeError: If the types of `input1` and `target` are inconsistent.
|
|
4459
|
+
ValueError: If the shape of `input1` and `input2` are inconsistent.
|
|
4460
|
+
ValueError: If the shape of `input1` and `target` are inconsistent.
|
|
4461
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` , ``'sum'``.
|
|
4327
4462
|
|
|
4328
4463
|
Supported Platforms:
|
|
4329
4464
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -4334,7 +4469,7 @@ def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
|
|
|
4334
4469
|
>>> import numpy as np
|
|
4335
4470
|
>>> input1 = Tensor(np.array([0.3864, -2.4093, -1.4076]), ms.float32)
|
|
4336
4471
|
>>> input2 = Tensor(np.array([-0.6012, -1.6681, 1.2928]), ms.float32)
|
|
4337
|
-
>>> target =
|
|
4472
|
+
>>> target = ops.Sign()(Tensor(np.array([-2, -2, 3]), ms.float32))
|
|
4338
4473
|
>>> output = ops.margin_ranking_loss(input1, input2, target)
|
|
4339
4474
|
>>> print(output)
|
|
4340
4475
|
1.2293333
|
|
@@ -4375,17 +4510,20 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
|
|
|
4375
4510
|
target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
|
|
4376
4511
|
:math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
|
|
4377
4512
|
margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
|
|
4378
|
-
reduction (str, optional):
|
|
4379
|
-
|
|
4380
|
-
|
|
4513
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4514
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4515
|
+
|
|
4516
|
+
- ``'none'``: no reduction will be applied.
|
|
4517
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4518
|
+
- ``'sum'``: the output elements will be summed.
|
|
4381
4519
|
|
|
4382
4520
|
Returns:
|
|
4383
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
4521
|
+
Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `target`.
|
|
4384
4522
|
Otherwise, a scalar value will be returned.
|
|
4385
4523
|
|
|
4386
4524
|
Raises:
|
|
4387
4525
|
TypeError: If `margin` is not a float.
|
|
4388
|
-
ValueError: If `reduction` is not one of 'none'
|
|
4526
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
4389
4527
|
ValueError: If `margin` is not in range [-1, 1].
|
|
4390
4528
|
|
|
4391
4529
|
Supported Platforms:
|
|
@@ -4529,14 +4667,24 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
|
|
|
4529
4667
|
|
|
4530
4668
|
Args:
|
|
4531
4669
|
input (Tensor): input with shape of :math:`(N, C, H_{in}, W_{in})` (4-D case) or :math:`(N, C, D_{in},
|
|
4532
|
-
H_{in}, W_{in})` (5-D case) and dtype of
|
|
4670
|
+
H_{in}, W_{in})` (5-D case) and dtype of float32 or float64.
|
|
4533
4671
|
grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
|
|
4534
4672
|
H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
|
|
4535
4673
|
mode (str): An optional string specifying the interpolation method. The optional values are
|
|
4536
|
-
'bilinear'
|
|
4674
|
+
``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
|
|
4537
4675
|
`mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
|
|
4538
4676
|
be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
|
|
4539
4677
|
Default: ``'bilinear'`` .
|
|
4678
|
+
|
|
4679
|
+
- ``'nearest'``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
|
|
4680
|
+
nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
|
|
4681
|
+
- ``'bilinear'``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
|
|
4682
|
+
pixels, computed using bilinear interpolation. This method produces smoother results compared
|
|
4683
|
+
to nearest neighbor interpolation.
|
|
4684
|
+
- ``'trilinear'``: Trilinear interpolation. This is an extension of bilinear interpolation to 3D data.
|
|
4685
|
+
It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
|
|
4686
|
+
the third dimension. It is commonly used for volume or 3D image interpolation.
|
|
4687
|
+
|
|
4540
4688
|
padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
|
|
4541
4689
|
"reflection". Default: ``'zeros'`` .
|
|
4542
4690
|
align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
|
|
@@ -4617,10 +4765,13 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
|
|
|
4617
4765
|
input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
|
|
4618
4766
|
target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
|
|
4619
4767
|
blank (int, optional): The blank label. Default: ``0`` .
|
|
4620
|
-
reduction (str, optional):
|
|
4621
|
-
``'
|
|
4622
|
-
|
|
4623
|
-
|
|
4768
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4769
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4770
|
+
|
|
4771
|
+
- ``'none'``: no reduction will be applied.
|
|
4772
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4773
|
+
- ``'sum'``: the output elements will be summed.
|
|
4774
|
+
|
|
4624
4775
|
zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: ``False`` .
|
|
4625
4776
|
|
|
4626
4777
|
Returns:
|
|
@@ -4704,8 +4855,12 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
|
|
|
4704
4855
|
full (bool, optional): Include the constant term in the loss calculation. When :math:`full=True`,
|
|
4705
4856
|
the constant term will be :math:`const = 0.5*log(2\pi)`. Default: ``False``.
|
|
4706
4857
|
eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: ``1e-6`` .
|
|
4707
|
-
reduction (str, optional): Apply specific reduction method to the
|
|
4708
|
-
|
|
4858
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4859
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4860
|
+
|
|
4861
|
+
- ``'none'``: no reduction will be applied.
|
|
4862
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4863
|
+
- ``'sum'``: the output elements will be summed.
|
|
4709
4864
|
|
|
4710
4865
|
Returns:
|
|
4711
4866
|
Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
|
|
@@ -4722,8 +4877,7 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
|
|
|
4722
4877
|
|
|
4723
4878
|
Examples:
|
|
4724
4879
|
>>> import numpy as np
|
|
4725
|
-
>>> from mindspore import Tensor
|
|
4726
|
-
>>> import mindspore.ops as ops
|
|
4880
|
+
>>> from mindspore import Tensor, ops
|
|
4727
4881
|
>>> import mindspore.common.dtype as mstype
|
|
4728
4882
|
>>> arr1 = np.arange(8).reshape((4, 2))
|
|
4729
4883
|
>>> arr2 = np.array([2, 3, 1, 4, 6, 4, 4, 9]).reshape((4, 2))
|
|
@@ -4831,9 +4985,12 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
|
|
|
4831
4985
|
Has the same shape as `inputs`, contains -1 or 1.
|
|
4832
4986
|
margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
|
|
4833
4987
|
Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
|
|
4834
|
-
reduction (str):
|
|
4835
|
-
``'
|
|
4836
|
-
|
|
4988
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4989
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4990
|
+
|
|
4991
|
+
- ``'none'``: no reduction will be applied.
|
|
4992
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4993
|
+
- ``'sum'``: the output elements will be summed.
|
|
4837
4994
|
|
|
4838
4995
|
Returns:
|
|
4839
4996
|
Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
|
|
@@ -4843,7 +5000,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
|
|
|
4843
5000
|
TypeError: If `targets` is not a Tensor.
|
|
4844
5001
|
TypeError: If `margin` is not a float or int.
|
|
4845
5002
|
ValueError: If `targets` does not have the same shape as `inputs` or they could not broadcast to each other.
|
|
4846
|
-
ValueError: If `reduction` is not one of 'none'
|
|
5003
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
4847
5004
|
|
|
4848
5005
|
Supported Platforms:
|
|
4849
5006
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -4889,6 +5046,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
|
|
|
4889
5046
|
r"""
|
|
4890
5047
|
Performs greedy decoding on the logits given in inputs.
|
|
4891
5048
|
|
|
5049
|
+
Note:
|
|
5050
|
+
On Ascend, 'merge_repeated' can not be set to false.
|
|
5051
|
+
|
|
4892
5052
|
Args:
|
|
4893
5053
|
inputs (Tensor): The input Tensor must be a 3-D tensor whose shape is
|
|
4894
5054
|
:math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
|
|
@@ -5068,74 +5228,87 @@ def _check_conv_iterable_lengths(iterable, dim, iter_name):
|
|
|
5068
5228
|
|
|
5069
5229
|
def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
|
|
5070
5230
|
r"""
|
|
5071
|
-
Applies a 1D convolution over an input tensor.
|
|
5072
|
-
|
|
5073
|
-
where :math:`N` is batch size, :math:`
|
|
5074
|
-
|
|
5075
|
-
|
|
5231
|
+
Applies a 1D convolution over an input tensor. The input Tensor is typically
|
|
5232
|
+
of shape :math:`(N, C_{in}, L_{in})`,
|
|
5233
|
+
where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width.
|
|
5234
|
+
|
|
5235
|
+
The output is calculated based on formula:
|
|
5076
5236
|
|
|
5077
5237
|
.. math::
|
|
5078
5238
|
|
|
5079
|
-
|
|
5239
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
5240
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
5241
|
+
|
|
5242
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
5243
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
5244
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
5245
|
+
|
|
5246
|
+
Here are the indices' meanings:
|
|
5247
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
5248
|
+
|
|
5249
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
5250
|
+
output channels, which is also equal to the number of kernels.
|
|
5080
5251
|
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
|
|
5084
|
-
filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{j}` is a slice
|
|
5085
|
-
of kernel, and it has shape :math:`(\text{kernal_size})`, where :math:`\text{kernel_size}` is the width of
|
|
5086
|
-
the convolution kernel. The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
|
|
5087
|
-
where `groups` is the group number to split the input in the channel dimension.
|
|
5252
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
5253
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
5088
5254
|
|
|
5089
|
-
|
|
5090
|
-
|
|
5091
|
-
|
|
5255
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
5256
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
5257
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
5258
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
5092
5259
|
|
|
5093
|
-
|
|
5094
|
-
:math:`
|
|
5095
|
-
|
|
5096
|
-
|
|
5260
|
+
The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
|
|
5261
|
+
where :math:`kernel\_size` is the width of the kernel.
|
|
5262
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
5263
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
|
|
5264
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
5097
5265
|
|
|
5098
|
-
|
|
5099
|
-
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
|
|
5100
|
-
`ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
|
|
5266
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
5267
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
|
|
5268
|
+
and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
|
|
5101
5269
|
|
|
5102
5270
|
Note:
|
|
5103
5271
|
On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
|
|
5104
5272
|
That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
|
|
5105
5273
|
|
|
5106
5274
|
Args:
|
|
5107
|
-
input (Tensor): Tensor of shape :math:`(N, C_{in},
|
|
5108
|
-
weight (Tensor):
|
|
5109
|
-
:math:`(N, C_{in} / \text{groups}, \text{kernel_size})
|
|
5110
|
-
|
|
5111
|
-
bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
|
|
5275
|
+
input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
|
|
5276
|
+
weight (Tensor): The convolutional kernel value, it should has shape
|
|
5277
|
+
:math:`(N, C_{in} / \text{groups}, \text{kernel_size})`.
|
|
5278
|
+
bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
|
|
5112
5279
|
When bias is None, zeros will be used. Default: ``None`` .
|
|
5113
5280
|
stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number or a tuple of one int
|
|
5114
|
-
that represents width of movement. Default: 1
|
|
5281
|
+
that represents width of movement. Default: ``1``.
|
|
5115
5282
|
pad_mode (str, optional): Specifies padding mode. The optional values are
|
|
5116
5283
|
``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
|
|
5117
5284
|
|
|
5118
|
-
- same
|
|
5285
|
+
- ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
|
|
5119
5286
|
the input `x` divided by stride. The padding will be evenly calculated in left and right possiblily.
|
|
5120
5287
|
Otherwise, the last extra padding will be calculated from the right side.
|
|
5121
5288
|
If this mode is set, `padding` must be 0.
|
|
5122
5289
|
|
|
5123
|
-
- valid
|
|
5290
|
+
- ``"valid"``: Adopts the way of discarding. The possible largest width of output will be returned
|
|
5124
5291
|
without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
|
|
5125
5292
|
|
|
5126
|
-
- pad
|
|
5293
|
+
- ``"pad"``: Implicit paddings on both sides of the input `x`.
|
|
5294
|
+
The number of `padding` will be padded to the input
|
|
5127
5295
|
Tensor borders. `padding` must be greater than or equal to 0.
|
|
5128
|
-
padding (Union(int, tuple[int], list[int]), optional):
|
|
5296
|
+
padding (Union(int, tuple[int], list[int]), optional): Specifies the amount of padding to apply on
|
|
5297
|
+
both side of `input` when `pad_mode` is set to ``"pad"``. The
|
|
5129
5298
|
paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of
|
|
5130
5299
|
1 integer. Default: ``0`` .
|
|
5131
|
-
dilation (Union(int, tuple[int]), optional):
|
|
5132
|
-
|
|
5133
|
-
|
|
5134
|
-
|
|
5300
|
+
dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
|
|
5301
|
+
It can be a single int or a tuple of 1 integer.
|
|
5302
|
+
Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a
|
|
5303
|
+
spacing of :math:`d0-1` elements in the width direction.
|
|
5304
|
+
The value should be in the ranges [1, L].
|
|
5305
|
+
Default: ``1`` .
|
|
5135
5306
|
groups (int, optional): Splits `input` into groups. Default: ``1`` .
|
|
5136
5307
|
|
|
5137
5308
|
Returns:
|
|
5138
|
-
Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out},
|
|
5309
|
+
Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
|
|
5310
|
+
To see how different pad modes affect the output shape, please refer to
|
|
5311
|
+
:class:`mindspore.nn.Conv1d` for more details.
|
|
5139
5312
|
|
|
5140
5313
|
Raises:
|
|
5141
5314
|
TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
|
|
@@ -5204,40 +5377,44 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5204
5377
|
|
|
5205
5378
|
def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
|
|
5206
5379
|
r"""
|
|
5207
|
-
Applies a 2D convolution over an input tensor.
|
|
5208
|
-
|
|
5209
|
-
|
|
5210
|
-
|
|
5211
|
-
|
|
5380
|
+
Applies a 2D convolution over an input tensor. The input tenor is typically of
|
|
5381
|
+
shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
|
|
5382
|
+
channel number, :math:`H` is feature height, :math:`W` is feature width.
|
|
5383
|
+
|
|
5384
|
+
The output is calculated based on formula:
|
|
5212
5385
|
|
|
5213
5386
|
.. math::
|
|
5214
5387
|
|
|
5215
|
-
|
|
5388
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
5389
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
5216
5390
|
|
|
5217
|
-
where :math:`
|
|
5218
|
-
|
|
5219
|
-
|
|
5220
|
-
filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
|
|
5221
|
-
of kernel, and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, where :math:`\text{
|
|
5222
|
-
kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution kernel.
|
|
5223
|
-
The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
|
|
5224
|
-
where `groups` is the group number to split the input in the channel dimension.
|
|
5391
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
5392
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
5393
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
5225
5394
|
|
|
5226
|
-
|
|
5227
|
-
|
|
5228
|
-
(\text{kernel_size[0]} - 1) \times(\text{dilation[0]} - 1)} {\text { stride[0] }}} \right \rfloor` and
|
|
5395
|
+
Here are the indices' meanings:
|
|
5396
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
5229
5397
|
|
|
5230
|
-
:math
|
|
5231
|
-
|
|
5398
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
5399
|
+
output channels, which is also equal to the number of kernels.
|
|
5232
5400
|
|
|
5233
|
-
|
|
5234
|
-
|
|
5235
|
-
For output height and width on other `pad_mode`, please refer to formula on `mindspore.nn.Conv2d
|
|
5236
|
-
<https://www.mindspore.cn/docs/en/r2.1/api_python/nn/mindspore.nn.Conv2d.html>`_.
|
|
5401
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
5402
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
5237
5403
|
|
|
5238
|
-
|
|
5239
|
-
|
|
5240
|
-
`
|
|
5404
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
5405
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
5406
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
5407
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
5408
|
+
|
|
5409
|
+
The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
|
|
5410
|
+
where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
|
|
5411
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
5412
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
|
|
5413
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
5414
|
+
|
|
5415
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
5416
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
|
|
5417
|
+
`ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
|
|
5241
5418
|
|
|
5242
5419
|
Note:
|
|
5243
5420
|
On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
|
|
@@ -5248,7 +5425,7 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5248
5425
|
weight (Tensor): Tensor of shape
|
|
5249
5426
|
:math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
|
|
5250
5427
|
is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
|
|
5251
|
-
bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
|
|
5428
|
+
bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
|
|
5252
5429
|
When bias is ``None`` , zeros will be used. Default: ``None`` .
|
|
5253
5430
|
stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
|
|
5254
5431
|
the height and width of movement are both strides, or a tuple of two int numbers that
|
|
@@ -5278,6 +5455,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5278
5455
|
|
|
5279
5456
|
Returns:
|
|
5280
5457
|
Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
|
|
5458
|
+
To see how different pad modes affect the output shape, please refer to
|
|
5459
|
+
:class:`mindspore.nn.Conv2d` for more details.
|
|
5460
|
+
|
|
5281
5461
|
|
|
5282
5462
|
Raises:
|
|
5283
5463
|
TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
|
|
@@ -5421,8 +5601,9 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
|
|
|
5421
5601
|
Calculates the error between the predicted value and the target value,
|
|
5422
5602
|
which has the best of both the loss of l1 and the loss of mse.
|
|
5423
5603
|
|
|
5424
|
-
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the reduction parameter
|
|
5425
|
-
then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
|
|
5604
|
+
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
|
|
5605
|
+
is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
|
|
5606
|
+
The formula is as follows:
|
|
5426
5607
|
|
|
5427
5608
|
.. math::
|
|
5428
5609
|
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top
|
|
@@ -5451,21 +5632,25 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
|
|
|
5451
5632
|
target (Tensor): Target value, has same dtype and shape as the `input` in common cases.
|
|
5452
5633
|
However, when the shape of `target` is different from the shape of `input`,
|
|
5453
5634
|
and they should be broadcasted to each other.
|
|
5454
|
-
reduction (str):
|
|
5455
|
-
|
|
5456
|
-
|
|
5635
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
5636
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
5637
|
+
|
|
5638
|
+
- ``'none'``: no reduction will be applied.
|
|
5639
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
5640
|
+
- ``'sum'``: the output elements will be summed.
|
|
5641
|
+
|
|
5457
5642
|
delta (Union[int, float]): The threshold to change between two type of loss.
|
|
5458
5643
|
The value must be greater than zero. Default: ``1.0`` .
|
|
5459
5644
|
|
|
5460
5645
|
Returns:
|
|
5461
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
5646
|
+
Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
|
|
5462
5647
|
Otherwise, a scalar value will be returned.
|
|
5463
5648
|
|
|
5464
5649
|
Raises:
|
|
5465
5650
|
TypeError: If `input` or `target` is not a Tensor.
|
|
5466
5651
|
TypeError: If dtype of `delta` is neither float nor int.
|
|
5467
5652
|
ValueError: If `delta` is less than or equal to 0.
|
|
5468
|
-
ValueError: If `reduction` is not one of "none"
|
|
5653
|
+
ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
|
|
5469
5654
|
ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
|
|
5470
5655
|
|
|
5471
5656
|
Supported Platforms:
|
|
@@ -5655,15 +5840,20 @@ def bias_add(input_x, bias):
|
|
|
5655
5840
|
consistent with the shape of the `input_x` Tensor.
|
|
5656
5841
|
|
|
5657
5842
|
Args:
|
|
5658
|
-
input_x (Tensor): The input tensor. The shape can be 2-5 dimensions.
|
|
5659
|
-
|
|
5843
|
+
input_x (Tensor): The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
|
|
5844
|
+
|
|
5845
|
+
- Ascend/CPU: all Number type.
|
|
5846
|
+
- GPU: float16, float32, int8.
|
|
5847
|
+
|
|
5848
|
+
bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
|
|
5849
|
+
`input_x`. It has the same type as `input_x`.
|
|
5660
5850
|
|
|
5661
5851
|
Returns:
|
|
5662
5852
|
Tensor, with the same shape and data type as `input_x`.
|
|
5663
5853
|
|
|
5664
5854
|
Raises:
|
|
5665
5855
|
TypeError: If `input_x` or `bias` is not a Tensor.
|
|
5666
|
-
TypeError: If dtype of `input_x`
|
|
5856
|
+
TypeError: If dtype of `input_x` and `bias` is inconsistent.
|
|
5667
5857
|
TypeError: If dimension of `input_x` is not in the range [2, 5].
|
|
5668
5858
|
|
|
5669
5859
|
Supported Platforms:
|
|
@@ -5718,11 +5908,12 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
|
|
|
5718
5908
|
the loss function
|
|
5719
5909
|
will not consider any sample weights, and each sample will be treated as having equal importance
|
|
5720
5910
|
when calculating the loss.
|
|
5721
|
-
reduction (str, optional):
|
|
5722
|
-
|
|
5723
|
-
|
|
5724
|
-
|
|
5725
|
-
|
|
5911
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
5912
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
5913
|
+
|
|
5914
|
+
- ``'none'``: no reduction will be applied.
|
|
5915
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
5916
|
+
- ``'sum'``: the output elements will be summed.
|
|
5726
5917
|
|
|
5727
5918
|
Returns:
|
|
5728
5919
|
Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
|
|
@@ -5731,7 +5922,7 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
|
|
|
5731
5922
|
Raises:
|
|
5732
5923
|
TypeError: If `logits`, `labels` or `weight` is not a Tensor.
|
|
5733
5924
|
TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
|
|
5734
|
-
ValueError: If `reduction` is not one of 'none'
|
|
5925
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
|
|
5735
5926
|
ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
|
|
5736
5927
|
|
|
5737
5928
|
Supported Platforms:
|
|
@@ -5754,32 +5945,46 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
|
|
|
5754
5945
|
|
|
5755
5946
|
def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
|
|
5756
5947
|
r"""
|
|
5757
|
-
Applies a 3D convolution over an input tensor. The input tensor is typically of
|
|
5758
|
-
:math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
|
|
5759
|
-
:math:`
|
|
5760
|
-
|
|
5761
|
-
|
|
5948
|
+
Applies a 3D convolution over an input tensor. The input tensor is typically of
|
|
5949
|
+
shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
|
|
5950
|
+
is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
|
|
5951
|
+
|
|
5952
|
+
The output is calculated based on formula:
|
|
5762
5953
|
|
|
5763
5954
|
.. math::
|
|
5764
|
-
|
|
5765
|
-
\
|
|
5766
|
-
\
|
|
5767
|
-
|
|
5768
|
-
where :math:`
|
|
5769
|
-
|
|
5770
|
-
:math:`
|
|
5771
|
-
|
|
5772
|
-
|
|
5773
|
-
:math:`
|
|
5774
|
-
|
|
5775
|
-
|
|
5776
|
-
|
|
5777
|
-
|
|
5778
|
-
:math:`
|
|
5779
|
-
|
|
5780
|
-
|
|
5781
|
-
|
|
5782
|
-
|
|
5955
|
+
|
|
5956
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
5957
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
5958
|
+
|
|
5959
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
5960
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
5961
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
5962
|
+
|
|
5963
|
+
Here are the indices' meanings:
|
|
5964
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
5965
|
+
|
|
5966
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
5967
|
+
output channels, which is also equal to the number of kernels.
|
|
5968
|
+
|
|
5969
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
5970
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
5971
|
+
|
|
5972
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
5973
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
5974
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
5975
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
5976
|
+
|
|
5977
|
+
The shape of the convolutional kernel is given by
|
|
5978
|
+
:math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
|
|
5979
|
+
where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
|
|
5980
|
+
height and width of the kernel, respectively.
|
|
5981
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
5982
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
|
|
5983
|
+
\text{kernel_size[1]}, \text{kernel_size[2]})`,
|
|
5984
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
5985
|
+
|
|
5986
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
5987
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
|
|
5783
5988
|
|
|
5784
5989
|
Note:
|
|
5785
5990
|
1. On Ascend platform, `groups = 1` must be satisfied.
|
|
@@ -5790,8 +5995,8 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5790
5995
|
weight (Tensor): Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]},
|
|
5791
5996
|
\text{kernel_size[2]})`, then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]},
|
|
5792
5997
|
\text{kernel_size[1]}, \text{kernel_size[1]})`.
|
|
5793
|
-
bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
|
|
5794
|
-
``None`` .
|
|
5998
|
+
bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
|
|
5999
|
+
When bias is None, zeros will be used. Default: ``None`` .
|
|
5795
6000
|
stride (Union[int, tuple[int]], optional): The distance of kernel moving,
|
|
5796
6001
|
it can be an int number that represents
|
|
5797
6002
|
the depth, height and width of movement or a tuple of three int numbers that
|
|
@@ -5799,18 +6004,18 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5799
6004
|
pad_mode (str, optional): Specifies padding mode. The optional values are
|
|
5800
6005
|
``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
|
|
5801
6006
|
|
|
5802
|
-
- same
|
|
6007
|
+
- ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
|
|
5803
6008
|
the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
|
|
5804
6009
|
left and right directions possiblily.
|
|
5805
6010
|
Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
|
|
5806
6011
|
If this mode is set, `pad` must be 0.
|
|
5807
6012
|
|
|
5808
|
-
- valid
|
|
6013
|
+
- ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
|
|
5809
6014
|
will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
|
|
5810
6015
|
must be 0.
|
|
5811
6016
|
|
|
5812
|
-
- pad
|
|
5813
|
-
be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
|
|
6017
|
+
- ``"pad"``: Implicit paddings on both sides of the input in depth, height and width.
|
|
6018
|
+
The number of `pad` will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
|
|
5814
6019
|
|
|
5815
6020
|
padding (Union[int, tuple[int], list[int]], optional): The pad value to be filled. If `pad` is an integer,
|
|
5816
6021
|
the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
|
|
@@ -5828,7 +6033,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5828
6033
|
Returns:
|
|
5829
6034
|
Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
5830
6035
|
|
|
5831
|
-
`pad_mode` is
|
|
6036
|
+
`pad_mode` is ``"same"``:
|
|
5832
6037
|
|
|
5833
6038
|
.. math::
|
|
5834
6039
|
\begin{array}{ll} \\
|
|
@@ -5837,7 +6042,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5837
6042
|
W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
|
|
5838
6043
|
\end{array}
|
|
5839
6044
|
|
|
5840
|
-
`pad_mode` is
|
|
6045
|
+
`pad_mode` is ``"valid"``:
|
|
5841
6046
|
|
|
5842
6047
|
.. math::
|
|
5843
6048
|
\begin{array}{ll} \\
|
|
@@ -5849,7 +6054,7 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5849
6054
|
{\text{stride[2]}} + 1} \right \rfloor \\
|
|
5850
6055
|
\end{array}
|
|
5851
6056
|
|
|
5852
|
-
`pad_mode` is
|
|
6057
|
+
`pad_mode` is ``"pad"``:
|
|
5853
6058
|
|
|
5854
6059
|
.. math::
|
|
5855
6060
|
\begin{array}{ll} \\
|
|
@@ -6082,7 +6287,7 @@ def glu(x, axis=-1):
|
|
|
6082
6287
|
TypeError: If `x` is not a Tensor.
|
|
6083
6288
|
|
|
6084
6289
|
Supported Platforms:
|
|
6085
|
-
``Ascend`` ``CPU``
|
|
6290
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
6086
6291
|
|
|
6087
6292
|
Examples:
|
|
6088
6293
|
>>> from mindspore import Tensor, ops
|
|
@@ -6128,12 +6333,12 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
|
|
|
6128
6333
|
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6129
6334
|
``'sum'`` . Default: ``'mean'`` .
|
|
6130
6335
|
|
|
6131
|
-
- ``'none'
|
|
6132
|
-
- ``'mean'
|
|
6133
|
-
- ``'sum'
|
|
6336
|
+
- ``'none'``: no reduction will be applied.
|
|
6337
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
6338
|
+
- ``'sum'``: the output elements will be summed.
|
|
6134
6339
|
|
|
6135
6340
|
Returns:
|
|
6136
|
-
Tensor. If `reduction` is 'none'
|
|
6341
|
+
Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
|
|
6137
6342
|
Otherwise, it is a scalar.
|
|
6138
6343
|
|
|
6139
6344
|
Raises:
|
|
@@ -6200,13 +6405,14 @@ def multilabel_margin_loss(input, target, reduction='mean'):
|
|
|
6200
6405
|
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6201
6406
|
``'sum'`` . Default: ``'mean'`` .
|
|
6202
6407
|
|
|
6203
|
-
- ``'none'
|
|
6204
|
-
- ``'mean'
|
|
6205
|
-
- ``'sum'
|
|
6408
|
+
- ``'none'``: no reduction will be applied.
|
|
6409
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
6410
|
+
- ``'sum'``: the output elements will be summed.
|
|
6206
6411
|
|
|
6207
6412
|
Returns:
|
|
6208
|
-
- **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
|
|
6209
|
-
is :math:`(N)`.
|
|
6413
|
+
- **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
|
|
6414
|
+
If `reduction` is ``"none"``, its shape is :math:`(N)`.
|
|
6415
|
+
Otherwise, a scalar value will be returned.
|
|
6210
6416
|
|
|
6211
6417
|
Raises:
|
|
6212
6418
|
TypeError: If `input` or `target` is not a Tensor.
|
|
@@ -6214,7 +6420,7 @@ def multilabel_margin_loss(input, target, reduction='mean'):
|
|
|
6214
6420
|
TypeError: If dtype of `target` is not int32.
|
|
6215
6421
|
ValueError: If length of shape of `input` is neither 1 nor 2.
|
|
6216
6422
|
ValueError: If shape of `input` is not the same as `target`.
|
|
6217
|
-
ValueError: If `reduction` is not one of 'none'
|
|
6423
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
6218
6424
|
|
|
6219
6425
|
Supported Platforms:
|
|
6220
6426
|
``Ascend`` ``GPU``
|
|
@@ -6260,12 +6466,15 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
|
|
|
6260
6466
|
input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
|
|
6261
6467
|
target (Tensor): The label target Tensor which has the same shape as `input`.
|
|
6262
6468
|
weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
|
|
6263
|
-
reduction (str):
|
|
6264
|
-
``'
|
|
6265
|
-
|
|
6469
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6470
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
6471
|
+
|
|
6472
|
+
- ``'none'``: no reduction will be applied.
|
|
6473
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
6474
|
+
- ``'sum'``: the output elements will be summed.
|
|
6266
6475
|
|
|
6267
6476
|
Returns:
|
|
6268
|
-
Tensor, the data type is the same as input, if the reduction is 'none'
|
|
6477
|
+
Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
|
|
6269
6478
|
its shape is :math:`(N)` , otherwise it is zero.
|
|
6270
6479
|
|
|
6271
6480
|
Raises:
|
|
@@ -6409,15 +6618,15 @@ def gelu(input_x, approximate='none'):
|
|
|
6409
6618
|
|
|
6410
6619
|
x_dtype = _get_cache_prim(P.DType)()(input_x)
|
|
6411
6620
|
if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
|
|
6412
|
-
raise TypeError("For gelu, the input dtype must be float16, float32 or float64, "
|
|
6413
|
-
"but got {}."
|
|
6621
|
+
raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
|
|
6622
|
+
f"but got {x_dtype}.")
|
|
6414
6623
|
if approximate == 'tanh':
|
|
6415
6624
|
output = _get_cache_prim(P.GeLU)()(input_x)
|
|
6416
6625
|
else:
|
|
6417
|
-
output = _get_cache_prim(P.Sqrt)()(Tensor(2.0))
|
|
6626
|
+
output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
|
|
6418
6627
|
output = _get_cache_prim(P.Div)()(input_x, output)
|
|
6419
|
-
output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0)
|
|
6420
|
-
output = input_x * output * Tensor(0.5)
|
|
6628
|
+
output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
|
|
6629
|
+
output = input_x * output * Tensor(0.5, x_dtype)
|
|
6421
6630
|
|
|
6422
6631
|
return output
|
|
6423
6632
|
|
|
@@ -6655,8 +6864,12 @@ def mse_loss(input, target, reduction='mean'):
|
|
|
6655
6864
|
target (Tensor): The input label. Tensor of any dimension, same shape as the `input` in common cases.
|
|
6656
6865
|
However, it supports that the shape of `input` is different from the shape of `target`
|
|
6657
6866
|
and they should be broadcasted to each other.
|
|
6658
|
-
reduction (str, optional):
|
|
6659
|
-
|
|
6867
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6868
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
6869
|
+
|
|
6870
|
+
- ``'none'``: no reduction will be applied.
|
|
6871
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
6872
|
+
- ``'sum'``: the output elements will be summed.
|
|
6660
6873
|
|
|
6661
6874
|
Returns:
|
|
6662
6875
|
Tensor, loss of type float, the shape is zero if `reduction` is ``'mean'`` or ``'sum'`` ,
|
|
@@ -6759,11 +6972,15 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
|
|
|
6759
6972
|
eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
|
|
6760
6973
|
swap (bool, optional): The distance swap change the negative distance to the distance between positive
|
|
6761
6974
|
sample and negative sample. Default: ``False`` .
|
|
6762
|
-
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6763
|
-
Default: ``'mean'`` .
|
|
6975
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6976
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
6977
|
+
|
|
6978
|
+
- ``'none'``: no reduction will be applied.
|
|
6979
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
6980
|
+
- ``'sum'``: the output elements will be summed.
|
|
6764
6981
|
|
|
6765
6982
|
Returns:
|
|
6766
|
-
Tensor. If `reduction` is "none"
|
|
6983
|
+
Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
|
|
6767
6984
|
|
|
6768
6985
|
Raises:
|
|
6769
6986
|
TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
|
|
@@ -6776,7 +6993,7 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
|
|
|
6776
6993
|
same time.
|
|
6777
6994
|
ValueError: If the dimension of input `anchor` or `positive` or `negative` is bigger than or equal to 8.
|
|
6778
6995
|
ValueError: If shape of `anchor`, `positive` and `negative` cannot broadcast.
|
|
6779
|
-
ValueError: If `reduction` is not one of 'none'
|
|
6996
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
6780
6997
|
|
|
6781
6998
|
Supported Platforms:
|
|
6782
6999
|
``GPU``
|
|
@@ -6811,7 +7028,7 @@ def linear(x, w, b):
|
|
|
6811
7028
|
def _inner_dropout(x, p, training):
|
|
6812
7029
|
"""inner dropout"""
|
|
6813
7030
|
_dropout = _get_cache_prim(P.Dropout)(1 - p)
|
|
6814
|
-
if p
|
|
7031
|
+
if 0. < p <= 1. and training:
|
|
6815
7032
|
return _dropout(x)[0]
|
|
6816
7033
|
return x
|
|
6817
7034
|
|
|
@@ -6864,10 +7081,11 @@ def _in_projection_packed(q, k, v, w, b, k_is_v, q_is_k):
|
|
|
6864
7081
|
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
|
|
6865
7082
|
|
|
6866
7083
|
|
|
6867
|
-
def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training):
|
|
7084
|
+
def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training, dtype):
|
|
6868
7085
|
"""scaled dot product attention"""
|
|
6869
7086
|
embed_size = query.shape[-1]
|
|
6870
|
-
|
|
7087
|
+
embed_size_tensor = scalar_to_tensor_(embed_size, dtype)
|
|
7088
|
+
scaling_factor = embed_size_tensor.sqrt().sqrt()
|
|
6871
7089
|
query = query / scaling_factor
|
|
6872
7090
|
|
|
6873
7091
|
if is_causal:
|
|
@@ -6960,7 +7178,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
|
|
|
6960
7178
|
out_proj_bias, training=True, key_padding_mask=None, attn_mask=None,
|
|
6961
7179
|
use_separate_proj_weight=False, q_proj_weight=None, k_proj_weight=None,
|
|
6962
7180
|
v_proj_weight=None, static_k=None, static_v=None, average_attn_weights=True,
|
|
6963
|
-
is_causal=False, k_is_v=False, q_is_k=False):
|
|
7181
|
+
is_causal=False, k_is_v=False, q_is_k=False, dtype=mstype.float32):
|
|
6964
7182
|
"""multi head attetion forward function"""
|
|
6965
7183
|
is_batched = _check_qkv_shape(query.ndim, key.ndim, value.ndim)
|
|
6966
7184
|
if key_padding_mask is not None:
|
|
@@ -7117,7 +7335,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
|
|
|
7117
7335
|
v = v.view((bsz, num_heads, src_len, head_dim))
|
|
7118
7336
|
|
|
7119
7337
|
attn_output, attn_output_weights = _scaled_dot_product_attention(
|
|
7120
|
-
q, k, v, attn_mask, dropout_p, is_causal, training)
|
|
7338
|
+
q, k, v, attn_mask, dropout_p, is_causal, training, dtype)
|
|
7121
7339
|
attn_output = attn_output.transpose(2, 0, 1, 3).view((bsz * tgt_len, embed_dim))
|
|
7122
7340
|
|
|
7123
7341
|
attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
|
|
@@ -7260,6 +7478,7 @@ __all__ = [
|
|
|
7260
7478
|
'softsign',
|
|
7261
7479
|
'softshrink',
|
|
7262
7480
|
'soft_shrink',
|
|
7481
|
+
'softplus',
|
|
7263
7482
|
'selu',
|
|
7264
7483
|
'silu',
|
|
7265
7484
|
'soft_margin_loss',
|