mindspore 2.1.0__cp38-cp38-win_amd64.whl → 2.2.11__cp38-cp38-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +4 -1
- mindspore/_c_dataengine.cp38-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp38-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp38-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -1
- mindspore/_checkparam.py +23 -29
- mindspore/_extends/graph_kernel/__init__.py +0 -1
- mindspore/_extends/graph_kernel/model/graph_split.py +84 -76
- mindspore/_extends/graph_kernel/model/model_builder.py +9 -50
- mindspore/_extends/graph_kernel/splitter.py +4 -11
- mindspore/_extends/parallel_compile/akg_compiler/akg_process.py +122 -15
- mindspore/_extends/parallel_compile/akg_compiler/build_tbe_kernel.py +84 -67
- mindspore/_extends/parallel_compile/akg_compiler/tbe_topi.py +4 -2
- mindspore/_extends/parallel_compile/akg_compiler/util.py +10 -7
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_adapter.py +2 -2
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_helper.py +6 -5
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job.py +1 -1
- mindspore/_extends/parallel_compile/tbe_compiler/tbe_job_manager.py +1 -1
- mindspore/_extends/parse/__init__.py +13 -15
- mindspore/_extends/parse/namespace.py +7 -33
- mindspore/_extends/parse/parser.py +67 -72
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +86 -106
- mindspore/_extends/parse/trope.py +1 -1
- mindspore/_extends/remote/kernel_build_server.py +25 -7
- mindspore/_extends/remote/kernel_build_server_akg_v2.py +55 -0
- mindspore/_install_custom.py +43 -0
- mindspore/amp.py +47 -11
- mindspore/atlprov.dll +0 -0
- mindspore/boost/boost.py +1 -8
- mindspore/boost/boost_cell_wrapper.py +3 -2
- mindspore/boost/grad_accumulation.py +1 -1
- mindspore/boost/group_loss_scale_manager.py +8 -7
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +5 -3
- mindspore/common/_jit_fallback_utils.py +6 -0
- mindspore/common/_register_for_adapter.py +2 -0
- mindspore/common/_register_for_tensor.py +2 -2
- mindspore/common/_stub_tensor.py +13 -0
- mindspore/common/_utils.py +29 -0
- mindspore/common/api.py +174 -259
- mindspore/common/auto_dynamic_shape.py +494 -0
- mindspore/common/dtype.py +18 -11
- mindspore/common/dump.py +6 -4
- mindspore/common/initializer.py +14 -14
- mindspore/common/jit_config.py +33 -15
- mindspore/common/lazy_inline.py +126 -7
- mindspore/common/mindir_util.py +101 -0
- mindspore/common/parameter.py +51 -41
- mindspore/common/seed.py +4 -4
- mindspore/common/sparse_tensor.py +13 -14
- mindspore/common/tensor.py +243 -165
- mindspore/communication/__init__.py +7 -4
- mindspore/communication/_comm_helper.py +83 -4
- mindspore/communication/management.py +152 -84
- mindspore/config/op_info.config +14 -3
- mindspore/context.py +152 -61
- mindspore/dataset/__init__.py +5 -5
- mindspore/dataset/audio/__init__.py +2 -2
- mindspore/dataset/audio/transforms.py +52 -52
- mindspore/dataset/callback/ds_callback.py +16 -2
- mindspore/dataset/core/config.py +68 -51
- mindspore/dataset/engine/cache_client.py +33 -7
- mindspore/dataset/engine/datasets.py +250 -112
- mindspore/dataset/engine/datasets_audio.py +43 -211
- mindspore/dataset/engine/datasets_standard_format.py +16 -35
- mindspore/dataset/engine/datasets_text.py +43 -67
- mindspore/dataset/engine/datasets_user_defined.py +86 -100
- mindspore/dataset/engine/datasets_vision.py +219 -1029
- mindspore/dataset/engine/iterators.py +11 -4
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +4 -0
- mindspore/dataset/engine/obs/util.py +3 -0
- mindspore/dataset/engine/samplers.py +1 -1
- mindspore/dataset/engine/validators.py +19 -5
- mindspore/dataset/text/__init__.py +3 -3
- mindspore/dataset/text/transforms.py +101 -127
- mindspore/dataset/text/utils.py +205 -138
- mindspore/dataset/transforms/__init__.py +1 -1
- mindspore/dataset/transforms/py_transforms_util.py +40 -12
- mindspore/dataset/transforms/transforms.py +95 -40
- mindspore/dataset/utils/browse_dataset.py +8 -2
- mindspore/dataset/utils/line_reader.py +17 -19
- mindspore/dataset/vision/__init__.py +3 -3
- mindspore/dataset/vision/c_transforms.py +6 -3
- mindspore/dataset/vision/transforms.py +409 -287
- mindspore/dataset/vision/utils.py +13 -14
- mindspore/dataset/vision/validators.py +11 -1
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/map_parameter.py +14 -0
- mindspore/{nn/optim_ex → experimental/optim}/__init__.py +30 -29
- mindspore/{nn/optim_ex → experimental/optim}/adam.py +60 -67
- mindspore/{nn/optim_ex → experimental/optim}/adamw.py +181 -203
- mindspore/experimental/optim/lr_scheduler.py +1427 -0
- mindspore/{nn/optim_ex → experimental/optim}/optimizer.py +252 -259
- mindspore/{nn/optim_ex → experimental/optim}/sgd.py +147 -152
- mindspore/gen_ops.py +273 -0
- mindspore/include/OWNERS +0 -1
- mindspore/include/api/data_type.h +2 -1
- mindspore/include/api/graph.h +0 -15
- mindspore/include/api/kernel.h +2 -0
- mindspore/include/api/kernel_api.h +37 -12
- mindspore/include/api/model.h +17 -14
- mindspore/include/api/status.h +8 -3
- mindspore/include/api/types.h +37 -4
- mindspore/include/c_api/ms/abstract.h +67 -0
- mindspore/include/c_api/ms/attribute.h +197 -0
- mindspore/include/c_api/ms/base/handle_types.h +43 -0
- mindspore/include/c_api/ms/base/macros.h +32 -0
- mindspore/include/c_api/ms/base/status.h +33 -0
- mindspore/include/c_api/ms/base/types.h +282 -0
- mindspore/include/c_api/ms/context.h +102 -0
- mindspore/include/c_api/ms/graph.h +160 -0
- mindspore/include/c_api/ms/node.h +606 -0
- mindspore/include/c_api/ms/tensor.h +161 -0
- mindspore/include/c_api/ms/value.h +84 -0
- mindspore/include/dataset/constants.h +6 -5
- mindspore/include/dataset/execute.h +23 -13
- mindspore/include/dataset/text.h +26 -26
- mindspore/include/dataset/transforms.h +13 -13
- mindspore/include/dataset/vision.h +60 -60
- mindspore/include/dataset/vision_ascend.h +5 -6
- mindspore/include/dataset/vision_lite.h +17 -17
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +2 -2
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_shared_lib.dll +0 -0
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/nn/__init__.py +0 -2
- mindspore/nn/cell.py +313 -74
- mindspore/nn/dynamic_lr.py +21 -21
- mindspore/nn/layer/activation.py +22 -30
- mindspore/nn/layer/basic.py +15 -13
- mindspore/nn/layer/channel_shuffle.py +1 -1
- mindspore/nn/layer/container.py +271 -9
- mindspore/nn/layer/conv.py +323 -204
- mindspore/nn/layer/dense.py +8 -5
- mindspore/nn/layer/embedding.py +33 -27
- mindspore/nn/layer/flash_attention.py +61 -95
- mindspore/nn/layer/image.py +8 -6
- mindspore/nn/layer/math.py +16 -25
- mindspore/nn/layer/normalization.py +107 -66
- mindspore/nn/layer/padding.py +1 -1
- mindspore/nn/layer/pooling.py +131 -109
- mindspore/nn/layer/rnn_cells.py +27 -22
- mindspore/nn/layer/rnns.py +13 -16
- mindspore/nn/layer/thor_layer.py +1 -1
- mindspore/nn/layer/transformer.py +221 -154
- mindspore/nn/learning_rate_schedule.py +9 -1
- mindspore/nn/loss/loss.py +235 -174
- mindspore/nn/optim/ada_grad.py +2 -1
- mindspore/nn/optim/adadelta.py +1 -0
- mindspore/nn/optim/adafactor.py +2 -1
- mindspore/nn/optim/adam.py +7 -4
- mindspore/nn/optim/adamax.py +3 -2
- mindspore/nn/optim/adasum.py +2 -2
- mindspore/nn/optim/asgd.py +2 -3
- mindspore/nn/optim/ftrl.py +6 -5
- mindspore/nn/optim/lamb.py +7 -4
- mindspore/nn/optim/lars.py +1 -1
- mindspore/nn/optim/lazyadam.py +5 -3
- mindspore/nn/optim/momentum.py +2 -1
- mindspore/nn/optim/optimizer.py +53 -4
- mindspore/nn/optim/proximal_ada_grad.py +3 -4
- mindspore/nn/optim/rmsprop.py +4 -3
- mindspore/nn/optim/rprop.py +23 -12
- mindspore/nn/optim/sgd.py +26 -11
- mindspore/nn/optim/thor.py +9 -7
- mindspore/nn/probability/bijector/bijector.py +5 -5
- mindspore/nn/probability/bijector/power_transform.py +27 -27
- mindspore/nn/probability/bijector/softplus.py +3 -3
- mindspore/nn/probability/distribution/_utils/custom_ops.py +3 -3
- mindspore/nn/probability/distribution/bernoulli.py +5 -5
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +7 -7
- mindspore/nn/probability/distribution/cauchy.py +0 -1
- mindspore/nn/probability/distribution/distribution.py +3 -3
- mindspore/nn/probability/distribution/gamma.py +3 -3
- mindspore/nn/probability/distribution/geometric.py +4 -4
- mindspore/nn/probability/distribution/gumbel.py +4 -4
- mindspore/nn/probability/distribution/log_normal.py +2 -2
- mindspore/nn/probability/distribution/logistic.py +2 -2
- mindspore/nn/probability/distribution/poisson.py +4 -4
- mindspore/nn/probability/distribution/transformed_distribution.py +3 -3
- mindspore/nn/probability/distribution/uniform.py +6 -6
- mindspore/nn/wrap/__init__.py +4 -2
- mindspore/nn/wrap/cell_wrapper.py +87 -34
- mindspore/nn/wrap/grad_reducer.py +8 -5
- mindspore/nn/wrap/loss_scale.py +105 -42
- mindspore/numpy/array_creations.py +1 -2
- mindspore/numpy/array_ops.py +3 -2
- mindspore/numpy/utils_const.py +5 -5
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/_grad_experimental/__init__.py +0 -5
- mindspore/ops/_grad_experimental/grad_array_ops.py +2 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +15 -2
- mindspore/ops/_grad_experimental/grad_debug_ops.py +0 -37
- mindspore/ops/_grad_experimental/grad_implementations.py +11 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +2 -216
- mindspore/ops/_grad_experimental/grad_math_ops.py +19 -199
- mindspore/ops/_grad_experimental/grad_sparse.py +15 -0
- mindspore/ops/_grad_experimental/grad_sparse_ops.py +3 -3
- mindspore/ops/_op_impl/_custom_op/dsd_back_impl.py +1 -1
- mindspore/ops/_op_impl/aicpu/__init__.py +14 -2
- mindspore/ops/_op_impl/aicpu/add.py +3 -3
- mindspore/ops/_op_impl/aicpu/bias_add_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/count_nonzero.py +43 -0
- mindspore/ops/_op_impl/{_custom_op/flash_attention/constants.py → aicpu/eps.py} +18 -27
- mindspore/ops/_op_impl/aicpu/gamma.py +2 -2
- mindspore/ops/_op_impl/aicpu/linear_sum_assignment.py +21 -2
- mindspore/ops/_op_impl/aicpu/log_uniform_candidate_sampler.py +6 -3
- mindspore/ops/_op_impl/aicpu/lu_unpack_grad.py +0 -1
- mindspore/ops/_op_impl/aicpu/multinomial.py +3 -3
- mindspore/ops/_op_impl/aicpu/parameterized_truncated_normal.py +15 -7
- mindspore/ops/_op_impl/aicpu/random_categorical.py +39 -19
- mindspore/ops/_op_impl/aicpu/random_choice_with_mask.py +5 -2
- mindspore/ops/_op_impl/aicpu/random_poisson.py +103 -52
- mindspore/ops/_op_impl/aicpu/random_shuffle.py +17 -15
- mindspore/ops/_op_impl/aicpu/{sparseaddmm.py → sparse_addmm.py} +2 -2
- mindspore/ops/_op_impl/aicpu/{sparsesparsemaximum.py → sparse_sparse_maximum.py} +4 -4
- mindspore/ops/_op_impl/aicpu/standard_laplace.py +5 -5
- mindspore/ops/_op_impl/aicpu/standard_normal.py +5 -5
- mindspore/ops/_op_impl/aicpu/truncated_normal.py +9 -7
- mindspore/ops/_op_impl/aicpu/uniform.py +5 -3
- mindspore/ops/_op_impl/aicpu/uniform_candidate_sampler.py +8 -4
- mindspore/ops/_op_impl/aicpu/uniform_int.py +5 -5
- mindspore/ops/_op_impl/aicpu/uniform_real.py +4 -4
- mindspore/ops/_op_impl/tbe/__init__.py +4 -4
- mindspore/ops/_op_impl/tbe/inplace_index_add.py +7 -3
- mindspore/ops/_op_impl/tbe/trans_data_ds.py +2 -0
- mindspore/ops/_primitive_cache.py +1 -1
- mindspore/ops/_tracefunc.py +45 -13
- mindspore/ops/_utils/utils.py +6 -1
- mindspore/ops/_vmap/vmap_array_ops.py +3 -3
- mindspore/ops/_vmap/vmap_base.py +3 -3
- mindspore/ops/_vmap/vmap_convolution_ops.py +1 -1
- mindspore/ops/_vmap/vmap_grad_math_ops.py +6 -4
- mindspore/ops/_vmap/vmap_math_ops.py +5 -2
- mindspore/ops/_vmap/vmap_nn_ops.py +61 -7
- mindspore/ops/arg_dtype_cast.py +54 -0
- mindspore/ops/composite/base.py +37 -10
- mindspore/ops/composite/math_ops.py +5 -4
- mindspore/ops/composite/multitype_ops/_compile_utils.py +275 -73
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +16 -9
- mindspore/ops/composite/multitype_ops/add_impl.py +43 -4
- mindspore/ops/composite/multitype_ops/getitem_impl.py +42 -4
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +6 -0
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/zeros_like_impl.py +9 -0
- mindspore/ops/deprecated.py +304 -0
- mindspore/ops/function/__init__.py +4 -1
- mindspore/ops/function/array_func.py +174 -193
- mindspore/ops/function/clip_func.py +81 -13
- mindspore/ops/function/debug_func.py +1 -1
- mindspore/ops/function/grad/grad_func.py +18 -9
- mindspore/ops/function/image_func.py +10 -4
- mindspore/ops/function/linalg_func.py +5 -5
- mindspore/ops/function/math_func.py +575 -386
- mindspore/ops/function/nn_func.py +568 -260
- mindspore/ops/function/random_func.py +88 -57
- mindspore/ops/function/sparse_func.py +1 -1
- mindspore/ops/function/sparse_unary_func.py +14 -12
- mindspore/ops/function/vmap_func.py +6 -5
- mindspore/ops/functional.py +15 -10
- mindspore/ops/op_info_register.py +244 -25
- mindspore/ops/operations/__init__.py +31 -19
- mindspore/ops/operations/_grad_ops.py +71 -7
- mindspore/ops/operations/_inner_ops.py +350 -17
- mindspore/ops/operations/_quant_ops.py +4 -8
- mindspore/ops/operations/_sequence_ops.py +42 -0
- mindspore/ops/operations/array_ops.py +68 -282
- mindspore/ops/operations/comm_ops.py +107 -59
- mindspore/ops/operations/custom_ops.py +94 -70
- mindspore/ops/operations/debug_ops.py +8 -4
- mindspore/ops/operations/image_ops.py +18 -12
- mindspore/ops/operations/inner_ops.py +26 -3
- mindspore/ops/operations/math_ops.py +192 -144
- mindspore/ops/operations/nn_ops.py +857 -489
- mindspore/ops/operations/other_ops.py +0 -22
- mindspore/ops/operations/random_ops.py +53 -111
- mindspore/ops/operations/sparse_ops.py +3 -1
- mindspore/ops/primitive.py +24 -18
- mindspore/parallel/_auto_parallel_context.py +68 -8
- mindspore/parallel/_cost_model_context.py +2 -2
- mindspore/parallel/_offload_context.py +17 -3
- mindspore/parallel/_parallel_serialization.py +12 -5
- mindspore/parallel/_ps_context.py +12 -0
- mindspore/parallel/_tensor.py +18 -13
- mindspore/parallel/_transformer/layers.py +5 -3
- mindspore/parallel/_transformer/loss.py +1 -0
- mindspore/parallel/_transformer/moe.py +2 -2
- mindspore/parallel/_transformer/op_parallel_config.py +12 -1
- mindspore/parallel/_transformer/transformer.py +23 -3
- mindspore/parallel/_utils.py +11 -7
- mindspore/parallel/algo_parameter_config.py +85 -5
- mindspore/parallel/checkpoint_transform.py +19 -12
- mindspore/parallel/shard.py +21 -14
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/common/struct_type.py +3 -3
- mindspore/profiler/common/util.py +4 -2
- mindspore/profiler/envprofiling.py +1 -1
- mindspore/profiler/parser/aicpu_data_parser.py +5 -3
- mindspore/profiler/parser/ascend_flops_generator.py +2 -2
- mindspore/profiler/parser/ascend_fpbp_generator.py +1 -1
- mindspore/profiler/parser/ascend_hccl_generator.py +249 -12
- mindspore/profiler/parser/ascend_msprof_exporter.py +150 -255
- mindspore/profiler/parser/ascend_msprof_generator.py +204 -17
- mindspore/profiler/parser/ascend_op_generator.py +6 -6
- mindspore/profiler/parser/ascend_steptrace_generator.py +6 -4
- mindspore/profiler/parser/ascend_timeline_generator.py +14 -187
- mindspore/profiler/parser/base_timeline_generator.py +10 -8
- mindspore/profiler/parser/cpu_gpu_timeline_generator.py +16 -12
- mindspore/profiler/parser/flops_parser.py +15 -11
- mindspore/profiler/parser/framework_parser.py +38 -22
- mindspore/profiler/parser/hccl_parser.py +16 -12
- mindspore/profiler/parser/integrator.py +22 -11
- mindspore/profiler/parser/memory_usage_parser.py +2 -2
- mindspore/profiler/parser/minddata_analyzer.py +12 -14
- mindspore/profiler/parser/minddata_pipeline_parser.py +1 -1
- mindspore/profiler/parser/msadvisor_parser.py +8 -4
- mindspore/profiler/parser/op_intermediate_parser.py +5 -2
- mindspore/profiler/parser/optime_parser.py +1 -1
- mindspore/profiler/parser/profiler_info.py +21 -2
- mindspore/profiler/parser/step_trace_parser.py +11 -14
- mindspore/profiler/profiling.py +179 -89
- mindspore/rewrite/api/node.py +102 -19
- mindspore/rewrite/api/node_type.py +5 -1
- mindspore/rewrite/api/pattern_engine.py +1 -1
- mindspore/rewrite/api/scoped_value.py +9 -17
- mindspore/rewrite/api/symbol_tree.py +131 -47
- mindspore/rewrite/ast_helpers/__init__.py +2 -1
- mindspore/rewrite/ast_helpers/ast_finder.py +129 -0
- mindspore/rewrite/ast_helpers/ast_modifier.py +116 -104
- mindspore/rewrite/ast_transformers/flatten_recursive_stmt.py +93 -46
- mindspore/rewrite/common/rewrite_elog.py +5 -1
- mindspore/rewrite/namer.py +33 -24
- mindspore/rewrite/namespace.py +14 -5
- mindspore/{_extends/graph_kernel/expanders/complex → rewrite/node}/__init__.py +9 -9
- mindspore/rewrite/node/call_function.py +79 -0
- mindspore/rewrite/node/cell_container.py +135 -0
- mindspore/rewrite/node/control_flow.py +88 -0
- mindspore/rewrite/{node.py → node/node.py} +273 -234
- mindspore/rewrite/node/node_manager.py +254 -0
- mindspore/rewrite/{topological_manager.py → node/node_topological_manager.py} +13 -46
- mindspore/rewrite/parsers/arguments_parser.py +22 -21
- mindspore/rewrite/parsers/assign_parser.py +216 -221
- mindspore/rewrite/parsers/attribute_parser.py +9 -7
- mindspore/rewrite/parsers/class_def_parser.py +174 -113
- mindspore/rewrite/parsers/constant_parser.py +9 -6
- mindspore/rewrite/parsers/container_parser.py +9 -7
- mindspore/rewrite/parsers/for_parser.py +42 -21
- mindspore/rewrite/parsers/function_def_parser.py +24 -16
- mindspore/rewrite/parsers/if_parser.py +28 -24
- mindspore/rewrite/parsers/module_parser.py +196 -25
- mindspore/rewrite/{parser.py → parsers/parser.py} +4 -2
- mindspore/rewrite/{parser_register.py → parsers/parser_register.py} +1 -1
- mindspore/rewrite/parsers/return_parser.py +6 -6
- mindspore/rewrite/sparsify/sparse_transformer.py +12 -3
- mindspore/rewrite/sparsify/utils.py +1 -1
- mindspore/rewrite/symbol_tree.py +523 -578
- mindspore/rewrite/symbol_tree_builder.py +9 -193
- mindspore/rewrite/symbol_tree_dumper.py +2 -2
- mindspore/run_check/_check_version.py +6 -4
- mindspore/{ops/bprop_mindir → safeguard}/__init__.py +4 -3
- mindspore/safeguard/rewrite_obfuscation.py +541 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/_utils.py +7 -3
- mindspore/train/amp.py +323 -123
- mindspore/train/anf_ir_pb2.py +14 -2
- mindspore/train/callback/_backup_and_restore.py +2 -12
- mindspore/train/callback/_callback.py +29 -4
- mindspore/train/callback/_checkpoint.py +23 -8
- mindspore/train/callback/_early_stop.py +2 -2
- mindspore/train/callback/_landscape.py +4 -4
- mindspore/train/callback/_loss_monitor.py +2 -2
- mindspore/train/callback/_on_request_exit.py +2 -2
- mindspore/train/callback/_reduce_lr_on_plateau.py +3 -4
- mindspore/train/callback/_summary_collector.py +15 -8
- mindspore/train/callback/_time_monitor.py +58 -5
- mindspore/train/data_sink.py +5 -11
- mindspore/train/dataset_helper.py +84 -57
- mindspore/train/loss_scale_manager.py +2 -2
- mindspore/train/metrics/__init__.py +3 -3
- mindspore/train/metrics/cosine_similarity.py +1 -1
- mindspore/train/metrics/hausdorff_distance.py +3 -2
- mindspore/train/metrics/mean_surface_distance.py +3 -2
- mindspore/train/metrics/metric.py +39 -19
- mindspore/train/metrics/roc.py +2 -2
- mindspore/train/metrics/root_mean_square_surface_distance.py +4 -3
- mindspore/train/mind_ir_pb2.py +85 -36
- mindspore/train/model.py +187 -47
- mindspore/train/serialization.py +487 -161
- mindspore/train/summary/_summary_adapter.py +1 -1
- mindspore/train/summary/_writer_pool.py +3 -2
- mindspore/train/summary/summary_record.py +37 -17
- mindspore/train/train_thor/convert_utils.py +3 -3
- mindspore/train/train_thor/dataset_helper.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/METADATA +7 -4
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/RECORD +429 -486
- mindspore/_extends/graph_kernel/expander.py +0 -80
- mindspore/_extends/graph_kernel/expanders/__init__.py +0 -54
- mindspore/_extends/graph_kernel/expanders/_utils.py +0 -269
- mindspore/_extends/graph_kernel/expanders/addn.py +0 -33
- mindspore/_extends/graph_kernel/expanders/batchnorm.py +0 -152
- mindspore/_extends/graph_kernel/expanders/batchnorm_grad.py +0 -105
- mindspore/_extends/graph_kernel/expanders/clip_by_norm_no_div_sum.py +0 -33
- mindspore/_extends/graph_kernel/expanders/complex/abs.py +0 -30
- mindspore/_extends/graph_kernel/expanders/complex/add.py +0 -44
- mindspore/_extends/graph_kernel/expanders/complex/div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/mul.py +0 -52
- mindspore/_extends/graph_kernel/expanders/complex/real_div.py +0 -62
- mindspore/_extends/graph_kernel/expanders/complex/sub.py +0 -45
- mindspore/_extends/graph_kernel/expanders/conv2d.py +0 -200
- mindspore/_extends/graph_kernel/expanders/dropout_grad.py +0 -30
- mindspore/_extends/graph_kernel/expanders/equal_count.py +0 -50
- mindspore/_extends/graph_kernel/expanders/erfc.py +0 -35
- mindspore/_extends/graph_kernel/expanders/expand_dims.py +0 -50
- mindspore/_extends/graph_kernel/expanders/fused_adam.py +0 -44
- mindspore/_extends/graph_kernel/expanders/fused_adam_weight_decay.py +0 -47
- mindspore/_extends/graph_kernel/expanders/fused_mul_add.py +0 -28
- mindspore/_extends/graph_kernel/expanders/gelu_grad.py +0 -70
- mindspore/_extends/graph_kernel/expanders/gkdropout.py +0 -40
- mindspore/_extends/graph_kernel/expanders/identity.py +0 -25
- mindspore/_extends/graph_kernel/expanders/layernorm.py +0 -93
- mindspore/_extends/graph_kernel/expanders/layernorm_grad.py +0 -113
- mindspore/_extends/graph_kernel/expanders/logsoftmax.py +0 -46
- mindspore/_extends/graph_kernel/expanders/logsoftmax_grad.py +0 -36
- mindspore/_extends/graph_kernel/expanders/matmul.py +0 -80
- mindspore/_extends/graph_kernel/expanders/maximum_grad.py +0 -59
- mindspore/_extends/graph_kernel/expanders/minimum_grad.py +0 -80
- mindspore/_extends/graph_kernel/expanders/oneslike.py +0 -26
- mindspore/_extends/graph_kernel/expanders/reduce_mean.py +0 -43
- mindspore/_extends/graph_kernel/expanders/relu_grad.py +0 -32
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits.py +0 -41
- mindspore/_extends/graph_kernel/expanders/sigmoid_cross_entropy_with_logits_grad.py +0 -35
- mindspore/_extends/graph_kernel/expanders/sigmoid_grad.py +0 -31
- mindspore/_extends/graph_kernel/expanders/slice.py +0 -35
- mindspore/_extends/graph_kernel/expanders/softmax_cross_entropy_with_logits.py +0 -42
- mindspore/_extends/graph_kernel/expanders/softmax_grad_ext.py +0 -41
- mindspore/_extends/graph_kernel/expanders/softsign.py +0 -28
- mindspore/_extends/graph_kernel/expanders/sqrt_grad.py +0 -29
- mindspore/_extends/graph_kernel/expanders/square_sum_all.py +0 -44
- mindspore/_extends/graph_kernel/expanders/square_sum_v1.py +0 -37
- mindspore/_extends/graph_kernel/expanders/squared_difference.py +0 -43
- mindspore/_extends/graph_kernel/expanders/tanh_grad.py +0 -31
- mindspore/_extends/graph_kernel/model/op_infer.py +0 -506
- mindspore/dataset/datapreprocess/__init__.py +0 -20
- mindspore/dataset/datapreprocess/preprocess_imagenet_validate_dataset.py +0 -54
- mindspore/include/api/net.h +0 -142
- mindspore/nn/lr_scheduler.py +0 -262
- mindspore/ops/_grad_experimental/grad_image_ops.py +0 -248
- mindspore/ops/_grad_experimental/grad_linalg_ops.py +0 -181
- mindspore/ops/_grad_experimental/grad_other_ops.py +0 -72
- mindspore/ops/_grad_experimental/grad_scalar_ops.py +0 -112
- mindspore/ops/_grad_experimental/grad_sequence_ops.py +0 -351
- mindspore/ops/_op_impl/_custom_op/flash_attention/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/attention.py +0 -350
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_bwd.py +0 -409
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_fwd.py +0 -578
- mindspore/ops/_op_impl/_custom_op/flash_attention/flash_attention_impl.py +0 -199
- mindspore/ops/_op_impl/_custom_op/flash_attention/tik_ops_utils.py +0 -446
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/__init__.py +0 -0
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/sparse_tiling.py +0 -45
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/strategy.py +0 -67
- mindspore/ops/_op_impl/_custom_op/flash_attention/tiling_strategy/wukong_tiling.py +0 -62
- mindspore/ops/bprop_mindir/BNTrainingReduce_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Broadcast_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Depend_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/DepthwiseConv2dNative_bprop.mindir +0 -138
- mindspore/ops/bprop_mindir/EmbeddingLookup_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Load_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/ScatterNonAliasingAdd_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseGatherV2_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/SparseSoftmaxCrossEntropyWithLogits_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Switch_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TransShape_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/TupleGetItem_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unique_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/Unstack_bprop.mindir +0 -0
- mindspore/ops/bprop_mindir/generate_mindir.py +0 -114
- mindspore/rewrite/node_visitor.py +0 -44
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/WHEEL +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/entry_points.txt +0 -0
- {mindspore-2.1.0.dist-info → mindspore-2.2.11.dist-info}/top_level.txt +0 -0
|
@@ -27,7 +27,7 @@ from mindspore.ops.operations import nn_ops as NN_OPS
|
|
|
27
27
|
from mindspore.ops.operations import _sequence_ops as seq
|
|
28
28
|
import mindspore.common.dtype as mstype
|
|
29
29
|
from mindspore.ops.function.math_func import logsumexp
|
|
30
|
-
from mindspore.ops.function.random_func import _get_seed
|
|
30
|
+
from mindspore.ops.function.random_func import _get_seed, _set_prim_op_user_data
|
|
31
31
|
from mindspore.common.tensor import Tensor
|
|
32
32
|
from mindspore._c_expression import Tensor as Tensor_
|
|
33
33
|
from mindspore.ops._primitive_cache import _get_cache_prim
|
|
@@ -40,6 +40,7 @@ from mindspore.ops.operations.nn_ops import ChannelShuffle
|
|
|
40
40
|
from mindspore.ops.operations.nn_ops import TripletMarginLoss
|
|
41
41
|
from mindspore.ops.operations._inner_ops import SiLU
|
|
42
42
|
from mindspore.ops.operations._sequence_ops import TupleToTensor, TensorToTuple, ListToTensor
|
|
43
|
+
from mindspore.common.api import _function_forbid_reuse
|
|
43
44
|
|
|
44
45
|
slice_ = P.Slice()
|
|
45
46
|
fast_gelu_ = P.FastGeLU()
|
|
@@ -232,7 +233,7 @@ def adaptive_avg_pool3d(input, output_size):
|
|
|
232
233
|
def _check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad):
|
|
233
234
|
"""Checks the type of avgpool1d input"""
|
|
234
235
|
validator.check_value_type('kernel_size', kernel_size, [int], 'avg_pool1d')
|
|
235
|
-
validator.check_value_type('stride', stride,
|
|
236
|
+
validator.check_value_type('stride', stride, (int, tuple), 'avg_pool1d')
|
|
236
237
|
validator.check_value_type('ceil_mode', ceil_mode, bool, 'avg_pool1d')
|
|
237
238
|
validator.check_value_type('count_include_pad', count_include_pad, bool, 'avg_pool1d')
|
|
238
239
|
validator.check_int(kernel_size, 1, validator.GE, "kernel_size", 'avg_pool1d')
|
|
@@ -263,12 +264,10 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
263
264
|
Args:
|
|
264
265
|
input_x (Tensor): Tensor of shape :math:`(N, C_{in}, L_{in})`.
|
|
265
266
|
kernel_size (int): The size of kernel window used to take the average value. Default: ``1`` .
|
|
266
|
-
stride (Union(int, tuple[int])): The distance of kernel moving
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
and right are the same, equal to pad. If `padding` is a tuple of `2` integers, the padding of left and right
|
|
271
|
-
equal to `padding[0]` and `padding[1]` correspondingly. Default: ``0`` .
|
|
267
|
+
stride (Union(int, tuple[int])): The distance of kernel moving. `stride` can either be an int
|
|
268
|
+
number or a tuple of one int number. Default: ``1`` .
|
|
269
|
+
padding (Union(int, tuple[int])): The pad value to be filled. `padding` can either be an integer
|
|
270
|
+
or a tuple of one integer. Default: ``0`` .
|
|
272
271
|
ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
|
|
273
272
|
count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
|
|
274
273
|
|
|
@@ -300,20 +299,25 @@ def avg_pool1d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
300
299
|
raise TypeError("For avg_pool1d, the input input_x must be tensor")
|
|
301
300
|
|
|
302
301
|
if len(input_x.shape) != 3:
|
|
303
|
-
raise ValueError("For avg_pool1d, input must have 3 dim, but got {
|
|
302
|
+
raise ValueError(f"For avg_pool1d, input must have 3 dim, but got {len(input_x.shape)}.")
|
|
304
303
|
|
|
305
304
|
_check_avgpool_1d_type_and_int(kernel_size, stride, ceil_mode, count_include_pad)
|
|
306
305
|
if isinstance(padding, int):
|
|
307
306
|
check_non_negative_int(padding, 'padding', 'avg_pool1d')
|
|
308
307
|
padding = (0, 0, 0, 0, padding, padding)
|
|
309
308
|
elif isinstance(padding, tuple):
|
|
310
|
-
if len(padding) !=
|
|
311
|
-
raise ValueError("For avg_pool1d, padding should be int or tuple of length
|
|
309
|
+
if len(padding) != 1:
|
|
310
|
+
raise ValueError("For avg_pool1d, padding should be int or tuple of length 1.")
|
|
312
311
|
for item in padding:
|
|
313
312
|
check_non_negative_int(item, 'padding', 'avg_pool1d')
|
|
314
|
-
padding = (0, 0, 0, 0, padding[0], padding[
|
|
313
|
+
padding = (0, 0, 0, 0, padding[0], padding[0])
|
|
315
314
|
else:
|
|
316
|
-
raise TypeError("For avg_pool1d, padding should be int or tuple of length
|
|
315
|
+
raise TypeError("For avg_pool1d, padding should be int or tuple of length 1.")
|
|
316
|
+
|
|
317
|
+
if isinstance(stride, tuple):
|
|
318
|
+
if len(stride) != 1:
|
|
319
|
+
raise ValueError("For avg_pool1d, stride should be int or tuple of length 1.")
|
|
320
|
+
stride = stride[0]
|
|
317
321
|
|
|
318
322
|
expand_op = _get_cache_prim(P.ExpandDims)()
|
|
319
323
|
squeeze_op = _get_cache_prim(P.Squeeze)((2, 3))
|
|
@@ -419,7 +423,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
419
423
|
ceil_mode (bool): If True, apply ceil instead of floor to compute the output shape. Default: ``False``.
|
|
420
424
|
count_include_pad (bool): If True, include the zero-padding in the averaging calculation. Default: ``True`` .
|
|
421
425
|
divisor_override (int): If specified, it will be used as divisor in the averaging calculation, otherwise
|
|
422
|
-
`kernel_size` will be used. Default: ``0
|
|
426
|
+
`kernel_size` will be used. Default: ``0``, which means not specified.
|
|
423
427
|
|
|
424
428
|
Returns:
|
|
425
429
|
Tensor, with shape :math:`(N, C_{out}, H_{out}, W_{out})`.
|
|
@@ -456,7 +460,7 @@ def avg_pool2d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
456
460
|
raise TypeError("For avg_pool2d, the input input_x must be tensor")
|
|
457
461
|
|
|
458
462
|
if len(input_x.shape) != 4:
|
|
459
|
-
raise ValueError("For avg_pool2d, input must have 4 dim, but got {
|
|
463
|
+
raise ValueError(f"For avg_pool2d, input must have 4 dim, but got {len(input_x.shape)}.")
|
|
460
464
|
|
|
461
465
|
kernel_size = _check_avgpool_2d_kernel_size(kernel_size)
|
|
462
466
|
stride = _check_avgpool_2d_stride(stride)
|
|
@@ -527,7 +531,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
527
531
|
count_include_pad (bool, optional): If ``True`` , averaging calculation
|
|
528
532
|
will include the zero-padding. Default: ``True`` .
|
|
529
533
|
divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
|
|
530
|
-
otherwise `kernel_size` will be used. Default: ``0`` .
|
|
534
|
+
otherwise `kernel_size` will be used. Default: ``0`` , which means not specified.
|
|
531
535
|
|
|
532
536
|
Returns:
|
|
533
537
|
Tensor, with shape :math:`(N, C, D_{out}, H_{out}, W_{out})`. Has the same data type with `input_x`.
|
|
@@ -560,7 +564,7 @@ def avg_pool3d(input_x, kernel_size=1, stride=1, padding=0, ceil_mode=False, cou
|
|
|
560
564
|
raise TypeError("For avg_pool3d, the input input_x must be tensor")
|
|
561
565
|
|
|
562
566
|
if len(input_x.shape) != 5:
|
|
563
|
-
raise ValueError("For avg_pool3d, input must have 5 dim, but got {
|
|
567
|
+
raise ValueError(f"For avg_pool3d, input must have 5 dim, but got {len(input_x.shape)}.")
|
|
564
568
|
|
|
565
569
|
_check_avg_pool3d_padding(padding)
|
|
566
570
|
|
|
@@ -637,21 +641,21 @@ def adaptive_max_pool1d(input, output_size):
|
|
|
637
641
|
x_dtype = _get_cache_prim(P.DType)()(input)
|
|
638
642
|
|
|
639
643
|
if len(x_in_shape) != 3:
|
|
640
|
-
raise ValueError("For adaptive_max_pool1d input must have 3 dim, but got {
|
|
644
|
+
raise ValueError(f"For adaptive_max_pool1d input must have 3 dim, but got {len(x_in_shape)}.")
|
|
641
645
|
if x_in_shape[2] < output_size:
|
|
642
|
-
raise ValueError("For adaptive_max_pool1d input's last dimension must be greater or equal to "
|
|
643
|
-
"output size {}, but got {
|
|
646
|
+
raise ValueError(f"For adaptive_max_pool1d input's last dimension must be greater or equal to "
|
|
647
|
+
f"output size {output_size}, but got {x_in_shape[2]}.")
|
|
644
648
|
if x_in_shape[2] % output_size != 0:
|
|
645
|
-
raise ValueError("For adaptive_max_pool1d input's last dimension must be divisible by "
|
|
646
|
-
"output size {}, but got {
|
|
649
|
+
raise ValueError(f"For adaptive_max_pool1d input's last dimension must be divisible by "
|
|
650
|
+
f"output size {output_size}, but got {x_in_shape[2]}.")
|
|
647
651
|
if is_ascend_backend():
|
|
648
652
|
if x_dtype not in [mstype.float16]:
|
|
649
|
-
raise TypeError("For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
|
|
650
|
-
"but got {}."
|
|
653
|
+
raise TypeError(f"For adaptive_max_pool1d in Ascend platform, the input dtype must be float16, "
|
|
654
|
+
f"but got {x_dtype}.")
|
|
651
655
|
else:
|
|
652
656
|
if x_dtype not in [mstype.float16, mstype.float32]:
|
|
653
|
-
raise TypeError("For adaptive_max_pool1d, the input dtype must be float16 or float32, "
|
|
654
|
-
"but got {}."
|
|
657
|
+
raise TypeError(f"For adaptive_max_pool1d, the input dtype must be float16 or float32, "
|
|
658
|
+
f"but got {x_dtype}.")
|
|
655
659
|
|
|
656
660
|
expand_ = _get_cache_prim(P.ExpandDims)()
|
|
657
661
|
squeeze_ = _get_cache_prim(P.Squeeze)(2)
|
|
@@ -1147,7 +1151,7 @@ def max_unpool3d(x, indices, kernel_size, stride=None, padding=0, output_size=No
|
|
|
1147
1151
|
return out
|
|
1148
1152
|
|
|
1149
1153
|
|
|
1150
|
-
def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reduction='mean'):
|
|
1154
|
+
def binary_cross_entropy_with_logits(logits, label, weight=None, pos_weight=None, reduction='mean'):
|
|
1151
1155
|
r"""
|
|
1152
1156
|
Adds sigmoid activation function to input `logits`, and uses the given logits to compute binary cross entropy
|
|
1153
1157
|
between the logits and the label.
|
|
@@ -1177,7 +1181,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1177
1181
|
|
|
1178
1182
|
This operator will multiply the output by the corresponding weight.
|
|
1179
1183
|
The tensor :math:`weight` assigns different weights to each piece of data in the batch,
|
|
1180
|
-
and the tensor :math:`
|
|
1184
|
+
and the tensor :math:`pos\_weight` adds corresponding weights to the positive examples of each category.
|
|
1181
1185
|
|
|
1182
1186
|
In addition, it can trade off recall and precision by adding weights to positive examples.
|
|
1183
1187
|
In the case of multi-label classification the loss can be described as:
|
|
@@ -1196,17 +1200,21 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1196
1200
|
logits (Tensor): Input logits. Data type must be float16 or float32.
|
|
1197
1201
|
label (Tensor): Ground truth label, has the same shape as `logits`.
|
|
1198
1202
|
Data type must be float16 or float32.
|
|
1199
|
-
weight (Tensor): A rescaling weight applied to the loss of each batch element. It can be
|
|
1203
|
+
weight (Tensor, optional): A rescaling weight applied to the loss of each batch element. It can be
|
|
1200
1204
|
broadcast to a tensor with shape of `logits`. Data type must be float16 or float32.
|
|
1201
|
-
|
|
1205
|
+
Default: ``None``, `weight` is a Tensor whose value is ``1``.
|
|
1206
|
+
pos_weight (Tensor, optional): A weight of positive examples. Must be a vector with length equal to the
|
|
1202
1207
|
number of classes. It can be broadcast to a tensor with shape of `logits`.
|
|
1203
|
-
Data type must be float16 or float32.
|
|
1204
|
-
reduction (str):
|
|
1205
|
-
|
|
1206
|
-
|
|
1208
|
+
Data type must be float16 or float32. Default: ``None``, `pos_weight` is a Tensor whose value is ``1``.
|
|
1209
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
1210
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
1211
|
+
|
|
1212
|
+
- ``'none'``: no reduction will be applied.
|
|
1213
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
1214
|
+
- ``'sum'``: the output elements will be summed.
|
|
1207
1215
|
|
|
1208
1216
|
Returns:
|
|
1209
|
-
Tensor or Scalar, if `reduction` is 'none'
|
|
1217
|
+
Tensor or Scalar, if `reduction` is ``'none'``, it's a tensor with the same shape and type as input `logits`.
|
|
1210
1218
|
Otherwise, the output is a scalar.
|
|
1211
1219
|
|
|
1212
1220
|
Raises:
|
|
@@ -1214,7 +1222,7 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1214
1222
|
TypeError: If data type of input `logits`, `label`, `weight`, `pos_weight` is neither float16 nor float32.
|
|
1215
1223
|
TypeError: If data type of input `reduction` is not string.
|
|
1216
1224
|
ValueError: If `weight` or `pos_weight` can not be broadcast to a tensor with shape of `logits`.
|
|
1217
|
-
ValueError: If `reduction` is not one of 'none'
|
|
1225
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
|
|
1218
1226
|
|
|
1219
1227
|
Supported Platforms:
|
|
1220
1228
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -1232,10 +1240,15 @@ def binary_cross_entropy_with_logits(logits, label, weight, pos_weight, reductio
|
|
|
1232
1240
|
0.3463612
|
|
1233
1241
|
"""
|
|
1234
1242
|
|
|
1243
|
+
if weight is None:
|
|
1244
|
+
weight = ops.ones_like(logits)
|
|
1245
|
+
if pos_weight is None:
|
|
1246
|
+
pos_weight = ops.ones_like(logits)
|
|
1235
1247
|
bce_with_logits_loss_op = _get_cache_prim(NN_OPS.BCEWithLogitsLoss)(reduction)
|
|
1236
1248
|
return bce_with_logits_loss_op(logits, label, weight, pos_weight)
|
|
1237
1249
|
|
|
1238
1250
|
|
|
1251
|
+
@_function_forbid_reuse
|
|
1239
1252
|
def dropout(input, p=0.5, training=True, seed=None):
|
|
1240
1253
|
r"""
|
|
1241
1254
|
During training, randomly zeroes some of the elements of the input tensor
|
|
@@ -1275,7 +1288,9 @@ def dropout(input, p=0.5, training=True, seed=None):
|
|
|
1275
1288
|
return input
|
|
1276
1289
|
keep_prob = 1 - p
|
|
1277
1290
|
seed0, seed1 = _get_seed(seed, "dropout")
|
|
1278
|
-
|
|
1291
|
+
dropout_op = P.Dropout(keep_prob=keep_prob, Seed0=seed0, Seed1=seed1)
|
|
1292
|
+
dropout_op = _set_prim_op_user_data(dropout_op, "random_cache", False)
|
|
1293
|
+
out, _ = dropout_op(input)
|
|
1279
1294
|
return out
|
|
1280
1295
|
|
|
1281
1296
|
|
|
@@ -1820,7 +1835,7 @@ def kl_div(logits, labels, reduction='mean'):
|
|
|
1820
1835
|
Its value must be one of ``'none'`` , ``'mean'`` , ``'batchmean'`` or ``'sum'`` . Default: ``'mean'`` .
|
|
1821
1836
|
|
|
1822
1837
|
Returns:
|
|
1823
|
-
Tensor or Scalar, if `reduction` is 'none'
|
|
1838
|
+
Tensor or Scalar, if `reduction` is ``'none'``, then output is a tensor and has the same shape as `logits`.
|
|
1824
1839
|
Otherwise, it is a scalar.
|
|
1825
1840
|
|
|
1826
1841
|
Raises:
|
|
@@ -2220,7 +2235,9 @@ def interpolate(input,
|
|
|
2220
2235
|
One and only one of size and scale_factor can be set to None. Default: ``None`` .
|
|
2221
2236
|
mode (str): The sampling algorithm.
|
|
2222
2237
|
One of 'nearest', 'linear' (3D only), 'bilinear' (4D only), 'trilinear' (5D only), 'bicubic' (4D only),
|
|
2223
|
-
'area', 'nearest-exact'(
|
|
2238
|
+
'area', 'nearest-exact'(matches Scikit-Image and PIL nearest neighbours interpolation algorithms and fixes
|
|
2239
|
+
knows issues with `nearest`, 3D and 4D). Default: ``"nearest"`` .
|
|
2240
|
+
|
|
2224
2241
|
align_corners (bool): If True, rescale input by :math:`(new\_height - 1) / (height - 1)`, which exactly
|
|
2225
2242
|
aligns the corners of data and resized data. If False, rescale by :math:`new\_height / height`.
|
|
2226
2243
|
Default: ``None`` .
|
|
@@ -2568,10 +2585,12 @@ def soft_margin_loss(input, target, reduction='mean'):
|
|
|
2568
2585
|
Args:
|
|
2569
2586
|
input (Tensor): Predict data. Data type must be float16 or float32.
|
|
2570
2587
|
target (Tensor): Ground truth data, with the same type and shape as `logits`.
|
|
2571
|
-
reduction (str, optional):
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2588
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
2589
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
2590
|
+
|
|
2591
|
+
- ``'none'``: no reduction will be applied.
|
|
2592
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
2593
|
+
- ``'sum'``: the output elements will be summed.
|
|
2575
2594
|
|
|
2576
2595
|
Outputs:
|
|
2577
2596
|
Tensor or Scalar. If `reduction` is ``'none'``, its shape is the same as `logits`.
|
|
@@ -2751,6 +2770,55 @@ def soft_shrink(input, lambd=0.5):
|
|
|
2751
2770
|
return soft_shrink_op(input)
|
|
2752
2771
|
|
|
2753
2772
|
|
|
2773
|
+
def softplus(input, beta=1, threshold=20): # pylint:disable=redefined-outer-name
|
|
2774
|
+
r"""
|
|
2775
|
+
Applies softplus function to `input` element-wise.
|
|
2776
|
+
|
|
2777
|
+
The softplus function is shown as follows, x is the element of `input` :
|
|
2778
|
+
|
|
2779
|
+
.. math::
|
|
2780
|
+
|
|
2781
|
+
\text{output} = \frac{1}{beta}\log(1 + \exp(\text{beta * x}))
|
|
2782
|
+
|
|
2783
|
+
When :math:`input * beta > threshold`, the implementation converts to the linear function
|
|
2784
|
+
to ensure numerical stability.
|
|
2785
|
+
|
|
2786
|
+
Args:
|
|
2787
|
+
input (Tensor) - Tensor of any dimension.
|
|
2788
|
+
Supported dtypes:
|
|
2789
|
+
|
|
2790
|
+
- GPU/CPU: float16, float32, float64.
|
|
2791
|
+
- Ascend: float16, float32.
|
|
2792
|
+
|
|
2793
|
+
beta (int, optional) - The :math:`\beta` value in softplus function. Default: ``1`` .
|
|
2794
|
+
threshold (int, optional) - When :math:`input * beta > threshold`, converting softplus to a linear function.
|
|
2795
|
+
Default: ``20`` .
|
|
2796
|
+
|
|
2797
|
+
Returns:
|
|
2798
|
+
Tensor, with the same type and shape as the `input` .
|
|
2799
|
+
|
|
2800
|
+
Raises:
|
|
2801
|
+
TypeError: If `input` is not a Tensor.
|
|
2802
|
+
TypeError: If the dtype of `input` is not float16, float32 or float64.
|
|
2803
|
+
|
|
2804
|
+
Supported Platforms:
|
|
2805
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
2806
|
+
|
|
2807
|
+
Examples:
|
|
2808
|
+
>>> import mindspore
|
|
2809
|
+
>>> import numpy as np
|
|
2810
|
+
>>> from mindspore import Tensor, ops
|
|
2811
|
+
>>> input = Tensor(np.array([0.1, 0.2, 30, 25]), mindspore.float32)
|
|
2812
|
+
>>> output = ops.softplus(input)
|
|
2813
|
+
>>> print(output)
|
|
2814
|
+
[0.7443967 0.79813886 30. 25.]
|
|
2815
|
+
"""
|
|
2816
|
+
softplus_op = _get_cache_prim(P.Softplus)()
|
|
2817
|
+
scaling_input = beta * input
|
|
2818
|
+
op_output = (1 / beta) * softplus_op(scaling_input)
|
|
2819
|
+
return ops.select(input * beta > threshold, input, op_output)
|
|
2820
|
+
|
|
2821
|
+
|
|
2754
2822
|
def silu(x):
|
|
2755
2823
|
r"""
|
|
2756
2824
|
Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
|
|
@@ -2860,7 +2928,7 @@ def sigmoid(input):
|
|
|
2860
2928
|
>>> print(output)
|
|
2861
2929
|
[0.7310586 0.880797 0.95257413 0.98201376 0.9933072 ]
|
|
2862
2930
|
"""
|
|
2863
|
-
return
|
|
2931
|
+
return _get_cache_prim(NN_OPS.Sigmoid)()(input)
|
|
2864
2932
|
|
|
2865
2933
|
|
|
2866
2934
|
def logsigmoid(x):
|
|
@@ -2946,11 +3014,19 @@ def dense(input, weight, bias=None):
|
|
|
2946
3014
|
_check_is_tensor("bias", bias, "dense")
|
|
2947
3015
|
weight = ops.t(weight)
|
|
2948
3016
|
input = ops.matmul(input, weight)
|
|
3017
|
+
input_shape = input.shape
|
|
2949
3018
|
if bias is not None:
|
|
2950
3019
|
input = input + bias
|
|
3020
|
+
_check_dense_add_bias_shape(input_shape, input.shape, bias.shape)
|
|
2951
3021
|
return input
|
|
2952
3022
|
|
|
2953
3023
|
|
|
3024
|
+
def _check_dense_add_bias_shape(input_shape, output_shape, bias_shape):
|
|
3025
|
+
"""Check that the output has the correct shape after adding bias."""
|
|
3026
|
+
if input_shape != output_shape:
|
|
3027
|
+
raise ValueError(f"For dense, the bias shape {bias_shape} does not match the input shape {input_shape}.")
|
|
3028
|
+
|
|
3029
|
+
|
|
2954
3030
|
@_primexpr
|
|
2955
3031
|
def check_dense_inputs_same_shape(input1_shape, input2_shape, prim_name=None):
|
|
2956
3032
|
"""check bidense input Tensors' shape"""
|
|
@@ -2965,7 +3041,10 @@ def bidense(input1, input2, weight, bias=None):
|
|
|
2965
3041
|
Applies bilinear dense connected layer for `input1` and `input2`. The bilinear dense function is defined as:
|
|
2966
3042
|
|
|
2967
3043
|
.. math::
|
|
2968
|
-
output =
|
|
3044
|
+
output = x_{1}^{T}Ax_{2} + b
|
|
3045
|
+
|
|
3046
|
+
:math:`x_{1}` represents `input1` , :math:`x_{2}` represents `input2` , :math:`A` represents `weight` ,
|
|
3047
|
+
:math:`b` represents `bias` .
|
|
2969
3048
|
|
|
2970
3049
|
.. warning::
|
|
2971
3050
|
This is an experimental API that is subject to change or deletion.
|
|
@@ -3391,7 +3470,9 @@ def relu6(x):
|
|
|
3391
3470
|
It returns :math:`\min(\max(0,x), 6)` element-wise.
|
|
3392
3471
|
|
|
3393
3472
|
Args:
|
|
3394
|
-
x (Tensor):
|
|
3473
|
+
x (Tensor): Tensor of shape :math:`(N, *)`,
|
|
3474
|
+
where :math:`*` means any number of additional dimensions.
|
|
3475
|
+
Data type must be float16, float32.
|
|
3395
3476
|
|
|
3396
3477
|
Returns:
|
|
3397
3478
|
Tensor, with the same dtype and shape as the `x`.
|
|
@@ -3528,6 +3609,9 @@ def rrelu(input, lower=1.0 / 8, upper=1.0 / 3):
|
|
|
3528
3609
|
_lower = Tensor(lower, mstype.float32)
|
|
3529
3610
|
_upper = Tensor(upper, mstype.float32)
|
|
3530
3611
|
_size = input.shape
|
|
3612
|
+
if ops.is_sequence_value_unknown(_size):
|
|
3613
|
+
dyn_shape = _get_cache_prim(P.TensorShape)()
|
|
3614
|
+
_size = dyn_shape(input)
|
|
3531
3615
|
sign_matrix = _get_cache_prim(P.Sign)()(input)
|
|
3532
3616
|
negative_filter = sign_matrix.clip(None, 0)
|
|
3533
3617
|
positive_filter = sign_matrix.clip(0, None)
|
|
@@ -3615,11 +3699,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3615
3699
|
l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
|
|
3616
3700
|
\cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
|
|
3617
3701
|
|
|
3618
|
-
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
|
|
3619
|
-
|
|
3620
|
-
classes.
|
|
3702
|
+
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
|
|
3703
|
+
:math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
|
|
3621
3704
|
|
|
3622
|
-
If reduction is not
|
|
3705
|
+
If `reduction` is not ``None`` (default ``'mean'`` ), then
|
|
3623
3706
|
|
|
3624
3707
|
.. math::
|
|
3625
3708
|
|
|
@@ -3638,11 +3721,10 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3638
3721
|
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
|
|
3639
3722
|
l_n = - \sum_{c=1}^C w_c \log \frac{\exp(x_{n,c})}{\sum_{i=1}^C \exp(x_{n,i})} y_{n,c}
|
|
3640
3723
|
|
|
3641
|
-
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight,
|
|
3642
|
-
|
|
3643
|
-
classes.
|
|
3724
|
+
where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, N is the batch size,
|
|
3725
|
+
:math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
|
|
3644
3726
|
|
|
3645
|
-
If reduction is not
|
|
3727
|
+
If `reduction` is not ``None`` (default ``'mean'`` ), then
|
|
3646
3728
|
|
|
3647
3729
|
.. math::
|
|
3648
3730
|
|
|
@@ -3658,16 +3740,19 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3658
3740
|
in case of 2D Loss, or :math:`(N, C, d_1, d_2, ..., d_K)`.
|
|
3659
3741
|
`input` is expected to be log-probabilities, data type must be float16 or float32.
|
|
3660
3742
|
target (Tensor): For class indices, tensor of shape :math:`()`, :math:`(N)` or
|
|
3661
|
-
:math:`(N, d_1, d_2, ..., d_K)` , data type must be int32.
|
|
3662
|
-
|
|
3663
|
-
data type must be float16 or float32.
|
|
3743
|
+
:math:`(N, d_1, d_2, ..., d_K)` , data type must be int32. For probabilities, tensor of shape :math:`(C,)` ,
|
|
3744
|
+
:math:`(N, C)` or :math:`(N, C, d_1, d_2, ..., d_K)` , data type must be float16 or float32.
|
|
3664
3745
|
weight (Tensor): A rescaling weight applied to the loss of each batch element.
|
|
3665
|
-
If not None, the shape is :math:`(C,)`,
|
|
3666
|
-
data type must be float16 or float32. Default: ``None`` .
|
|
3746
|
+
If not None, the shape is :math:`(C,)`, data type must be float16 or float32. Default: ``None`` .
|
|
3667
3747
|
ignore_index (int): Specifies a target value that is ignored
|
|
3668
3748
|
and does not contribute to the input gradient. Default: ``-100`` .
|
|
3669
|
-
reduction (str):
|
|
3670
|
-
Default: ``'mean'`` .
|
|
3749
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3750
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3751
|
+
|
|
3752
|
+
- ``'none'``: no reduction will be applied.
|
|
3753
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
3754
|
+
- ``'sum'``: the output elements will be summed.
|
|
3755
|
+
|
|
3671
3756
|
label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
|
|
3672
3757
|
from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
|
|
3673
3758
|
|
|
@@ -3678,17 +3763,16 @@ def cross_entropy(input, target, weight=None, ignore_index=-100, reduction='mean
|
|
|
3678
3763
|
``Ascend`` ``GPU`` ``CPU``
|
|
3679
3764
|
|
|
3680
3765
|
Examples:
|
|
3681
|
-
>>> import mindspore
|
|
3766
|
+
>>> import mindspore as ms
|
|
3682
3767
|
>>> import numpy as np
|
|
3683
|
-
>>> from mindspore import Tensor, ops
|
|
3684
3768
|
>>> # Case 1: Indices labels
|
|
3685
|
-
>>> inputs =
|
|
3686
|
-
>>> target =
|
|
3687
|
-
>>> output = ops.cross_entropy(inputs, target)
|
|
3769
|
+
>>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
|
|
3770
|
+
>>> target = ms.Tensor(np.array([1, 0, 4]), ms.int32)
|
|
3771
|
+
>>> output = ms.ops.cross_entropy(inputs, target)
|
|
3688
3772
|
>>> # Case 2: Probability labels
|
|
3689
|
-
>>> inputs =
|
|
3690
|
-
>>> target =
|
|
3691
|
-
>>> output = ops.cross_entropy(inputs, target)
|
|
3773
|
+
>>> inputs = ms.Tensor(np.random.randn(3, 5), ms.float32)
|
|
3774
|
+
>>> target = ms.Tensor(np.random.randn(3, 5), ms.float32)
|
|
3775
|
+
>>> output = ms.ops.cross_entropy(inputs, target)
|
|
3692
3776
|
"""
|
|
3693
3777
|
_check_is_tensor('input', input, "cross_entropy_loss")
|
|
3694
3778
|
_check_is_tensor('target', target, "cross_entropy_loss")
|
|
@@ -3743,7 +3827,7 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
|
|
|
3743
3827
|
N is the batch size, :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of
|
|
3744
3828
|
classes.
|
|
3745
3829
|
|
|
3746
|
-
If reduction is not
|
|
3830
|
+
If `reduction` is not ``None`` (default 'mean'), then
|
|
3747
3831
|
|
|
3748
3832
|
.. math::
|
|
3749
3833
|
|
|
@@ -3763,8 +3847,13 @@ def nll_loss(inputs, target, weight=None, ignore_index=-100, reduction='mean', l
|
|
|
3763
3847
|
The data type must be float16 or float32. Default: ``None`` .
|
|
3764
3848
|
ignore_index (int): Specifies a target value that is ignored
|
|
3765
3849
|
and does not contribute to the input gradient. Default: ``-100`` .
|
|
3766
|
-
reduction (str):
|
|
3767
|
-
Default: ``'mean'`` .
|
|
3850
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3851
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3852
|
+
|
|
3853
|
+
- ``'none'``: no reduction will be applied.
|
|
3854
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
3855
|
+
- ``'sum'``: the output elements will be summed.
|
|
3856
|
+
|
|
3768
3857
|
label_smoothing (float): Label smoothing values, a regularization tool used to prevent the model
|
|
3769
3858
|
from overfitting when calculating Loss. The value range is [0.0, 1.0]. Default value: ``0.0`` .
|
|
3770
3859
|
|
|
@@ -3858,7 +3947,7 @@ def l1_loss(input, target, reduction='mean'):
|
|
|
3858
3947
|
r"""
|
|
3859
3948
|
Calculate the mean absolute error between the `input` value and the `target` value.
|
|
3860
3949
|
|
|
3861
|
-
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to "none"
|
|
3950
|
+
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, `reduction` is set to ``"none"``,
|
|
3862
3951
|
then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
|
|
3863
3952
|
|
|
3864
3953
|
The formula is as follows:
|
|
@@ -3881,18 +3970,21 @@ def l1_loss(input, target, reduction='mean'):
|
|
|
3881
3970
|
input (Tensor): Predicted value, Tensor of any dimension.
|
|
3882
3971
|
target (Tensor): Target value, usually has the same shape as the `input`.
|
|
3883
3972
|
If `input` and `target` have different shape, make sure they can broadcast to each other.
|
|
3884
|
-
reduction (str, optional):
|
|
3885
|
-
|
|
3886
|
-
|
|
3973
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
3974
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
3975
|
+
|
|
3976
|
+
- ``'none'``: no reduction will be applied.
|
|
3977
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
3978
|
+
- ``'sum'``: the output elements will be summed.
|
|
3887
3979
|
|
|
3888
3980
|
Returns:
|
|
3889
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
3981
|
+
Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
|
|
3890
3982
|
Otherwise, a scalar value will be returned.
|
|
3891
3983
|
|
|
3892
3984
|
Raises:
|
|
3893
3985
|
TypeError: If `input` is not a Tensor.
|
|
3894
3986
|
TypeError: If `target` is not a Tensor.
|
|
3895
|
-
ValueError: If `reduction` is not one of "none"
|
|
3987
|
+
ValueError: If `reduction` is not one of ``"none"``, ``"mean"`` or ``"sum"``.
|
|
3896
3988
|
|
|
3897
3989
|
Supported Platforms:
|
|
3898
3990
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -3948,16 +4040,20 @@ def smooth_l1_loss(input, target, beta=1.0, reduction='none'):
|
|
|
3948
4040
|
target (Tensor): Ground truth data, tensor of shape :math:`(N, *)`, same shape and dtype as the `input`.
|
|
3949
4041
|
beta (float): A parameter used to control the point where the function will change between
|
|
3950
4042
|
L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
|
|
3951
|
-
reduction (str): Apply specific reduction method to the output: ``'none'`` , ``'mean'``
|
|
3952
|
-
Default: ``'none'`` .
|
|
4043
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4044
|
+
``'sum'`` . Default: ``'none'`` .
|
|
4045
|
+
|
|
4046
|
+
- ``'none'``: no reduction will be applied.
|
|
4047
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4048
|
+
- ``'sum'``: the output elements will be summed.
|
|
3953
4049
|
|
|
3954
4050
|
Returns:
|
|
3955
|
-
Tensor, if `reduction` is 'none'
|
|
4051
|
+
Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `input`.
|
|
3956
4052
|
Otherwise, the shape of output tensor is :math:`(1,)`.
|
|
3957
4053
|
|
|
3958
4054
|
Raises:
|
|
3959
4055
|
TypeError: If `beta` is not a float.
|
|
3960
|
-
ValueError: If `reduction` is not one of 'none'
|
|
4056
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
3961
4057
|
TypeError: If dtype of `input` or `target` is not one of float16, float32, float64.
|
|
3962
4058
|
ValueError: If `beta` is less than or equal to 0.
|
|
3963
4059
|
ValueError: If shape of `input` is not the same as `target`.
|
|
@@ -4072,6 +4168,7 @@ def leaky_relu(input, alpha=0.2):
|
|
|
4072
4168
|
select_op = _get_cache_prim(P.Maximum)()
|
|
4073
4169
|
if alpha > 1:
|
|
4074
4170
|
select_op = _get_cache_prim(P.Minimum)()
|
|
4171
|
+
alpha = _get_cache_prim(P.Cast)()(F.scalar_to_tensor(alpha), input.dtype)
|
|
4075
4172
|
return select_op(alpha * input, input)
|
|
4076
4173
|
|
|
4077
4174
|
|
|
@@ -4158,6 +4255,10 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
|
|
|
4158
4255
|
r"""
|
|
4159
4256
|
Local Response Normalization.
|
|
4160
4257
|
|
|
4258
|
+
.. warning::
|
|
4259
|
+
lrn is deprecated on Ascend due to potential accuracy problem. It's recommended to use other
|
|
4260
|
+
normalization methods, e.g. :class:`mindspore.ops.batch_norm`.
|
|
4261
|
+
|
|
4161
4262
|
.. math::
|
|
4162
4263
|
|
|
4163
4264
|
b_{c} = a_{c}\left(k + \frac{\alpha}{n}
|
|
@@ -4186,7 +4287,7 @@ def lrn(x, depth_radius=5, bias=1.0, alpha=1.0, beta=0.5, norm_region="ACROSS_CH
|
|
|
4186
4287
|
TypeError: If `x` is not a Tensor.
|
|
4187
4288
|
|
|
4188
4289
|
Supported Platforms:
|
|
4189
|
-
``
|
|
4290
|
+
``GPU`` ``CPU``
|
|
4190
4291
|
|
|
4191
4292
|
Examples:
|
|
4192
4293
|
>>> import mindspore
|
|
@@ -4219,7 +4320,11 @@ def mish(x):
|
|
|
4219
4320
|
<https://arxiv.org/abs/1908.08681>`_.
|
|
4220
4321
|
|
|
4221
4322
|
Args:
|
|
4222
|
-
x (Tensor): The input Tensor
|
|
4323
|
+
x (Tensor): The input Tensor.
|
|
4324
|
+
Supported dtypes:
|
|
4325
|
+
|
|
4326
|
+
- GPU/CPU: float16, float32, float64.
|
|
4327
|
+
- Ascend: float16, float32.
|
|
4223
4328
|
|
|
4224
4329
|
Returns:
|
|
4225
4330
|
Tensor, with the same type and shape as the `x`.
|
|
@@ -4320,10 +4425,40 @@ def _check_type_and_shape_same(param_name1, input_data1, param_name2, input_data
|
|
|
4320
4425
|
|
|
4321
4426
|
|
|
4322
4427
|
def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
|
|
4323
|
-
"""
|
|
4428
|
+
r"""
|
|
4324
4429
|
MarginRankingLoss creates a criterion that measures the loss.
|
|
4325
4430
|
|
|
4326
|
-
|
|
4431
|
+
Given two tensors :math:`input1`, :math:`input2` and a Tensor label :math:`target` with values 1 or -1,
|
|
4432
|
+
the operation is as follows:
|
|
4433
|
+
|
|
4434
|
+
.. math::
|
|
4435
|
+
\text{loss}(input1, input2, target) = \max(0, -target * (input1 - input2) + \text{margin})
|
|
4436
|
+
|
|
4437
|
+
Args:
|
|
4438
|
+
input1 (Tensor): Tensor of shape :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4439
|
+
input2 (Tensor): Tensor of shape :math:`(N, *)`, same shape and dtype as `input1`.
|
|
4440
|
+
target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
|
|
4441
|
+
:math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_2, x_3, ..., x_R)`.
|
|
4442
|
+
margin (float, optional): Specify the adjustment factor of the operation. Default: ``0.0`` .
|
|
4443
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4444
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4445
|
+
|
|
4446
|
+
- ``'none'``: no reduction will be applied.
|
|
4447
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4448
|
+
- ``'sum'``: the output elements will be summed.
|
|
4449
|
+
|
|
4450
|
+
Returns:
|
|
4451
|
+
Tensor or Scalar. if `reduction` is ``"none"``, its shape is the same as `labels`.
|
|
4452
|
+
Otherwise, a scalar value will be returned.
|
|
4453
|
+
|
|
4454
|
+
Raises:
|
|
4455
|
+
TypeError: If `margin` is not a float.
|
|
4456
|
+
TypeError: If `input1`, `input2` or `target` is not a Tensor.
|
|
4457
|
+
TypeError: If the types of `input1` and `input2` are inconsistent.
|
|
4458
|
+
TypeError: If the types of `input1` and `target` are inconsistent.
|
|
4459
|
+
ValueError: If the shape of `input1` and `input2` are inconsistent.
|
|
4460
|
+
ValueError: If the shape of `input1` and `target` are inconsistent.
|
|
4461
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` , ``'sum'``.
|
|
4327
4462
|
|
|
4328
4463
|
Supported Platforms:
|
|
4329
4464
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -4334,7 +4469,7 @@ def margin_ranking_loss(input1, input2, target, margin=0.0, reduction='mean'):
|
|
|
4334
4469
|
>>> import numpy as np
|
|
4335
4470
|
>>> input1 = Tensor(np.array([0.3864, -2.4093, -1.4076]), ms.float32)
|
|
4336
4471
|
>>> input2 = Tensor(np.array([-0.6012, -1.6681, 1.2928]), ms.float32)
|
|
4337
|
-
>>> target =
|
|
4472
|
+
>>> target = ops.Sign()(Tensor(np.array([-2, -2, 3]), ms.float32))
|
|
4338
4473
|
>>> output = ops.margin_ranking_loss(input1, input2, target)
|
|
4339
4474
|
>>> print(output)
|
|
4340
4475
|
1.2293333
|
|
@@ -4375,17 +4510,20 @@ def cosine_embedding_loss(input1, input2, target, margin=0.0, reduction="mean"):
|
|
|
4375
4510
|
target (Tensor): Contains value 1 or -1. Suppose the shape of `input1` is
|
|
4376
4511
|
:math:`(x_1, x_2, x_3, ..., x_R)`, then the shape of `target` must be :math:`(x_1, x_3, x_4, ..., x_R)`.
|
|
4377
4512
|
margin (float, optional): Should be in [-1.0, 1.0]. Default: 0.0.
|
|
4378
|
-
reduction (str, optional):
|
|
4379
|
-
|
|
4380
|
-
|
|
4513
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4514
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4515
|
+
|
|
4516
|
+
- ``'none'``: no reduction will be applied.
|
|
4517
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4518
|
+
- ``'sum'``: the output elements will be summed.
|
|
4381
4519
|
|
|
4382
4520
|
Returns:
|
|
4383
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
4521
|
+
Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `target`.
|
|
4384
4522
|
Otherwise, a scalar value will be returned.
|
|
4385
4523
|
|
|
4386
4524
|
Raises:
|
|
4387
4525
|
TypeError: If `margin` is not a float.
|
|
4388
|
-
ValueError: If `reduction` is not one of 'none'
|
|
4526
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
4389
4527
|
ValueError: If `margin` is not in range [-1, 1].
|
|
4390
4528
|
|
|
4391
4529
|
Supported Platforms:
|
|
@@ -4471,6 +4609,19 @@ def max_pool3d(x, kernel_size, stride=None, padding=0, dilation=1, ceil_mode=Fal
|
|
|
4471
4609
|
|
|
4472
4610
|
- **output** (Tensor) - Maxpooling result, with shape :math:`(N_{out}, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
4473
4611
|
It has the same data type as `x`.
|
|
4612
|
+
|
|
4613
|
+
.. math::
|
|
4614
|
+
D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
|
|
4615
|
+
(\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
|
|
4616
|
+
|
|
4617
|
+
.. math::
|
|
4618
|
+
H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
|
|
4619
|
+
(\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
|
|
4620
|
+
|
|
4621
|
+
.. math::
|
|
4622
|
+
W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
|
|
4623
|
+
(\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
|
|
4624
|
+
|
|
4474
4625
|
- **argmax** (Tensor) - Index corresponding to the maximum value. Data type is int64. It will be return
|
|
4475
4626
|
only when `return_indices` is ``True`` .
|
|
4476
4627
|
|
|
@@ -4529,14 +4680,24 @@ def grid_sample(input, grid, mode='bilinear', padding_mode='zeros', align_corner
|
|
|
4529
4680
|
|
|
4530
4681
|
Args:
|
|
4531
4682
|
input (Tensor): input with shape of :math:`(N, C, H_{in}, W_{in})` (4-D case) or :math:`(N, C, D_{in},
|
|
4532
|
-
H_{in}, W_{in})` (5-D case) and dtype of
|
|
4683
|
+
H_{in}, W_{in})` (5-D case) and dtype of float32 or float64.
|
|
4533
4684
|
grid (Tensor): flow-field with shape of :math:`(N, H_{out}, W_{out}, 2)` (4-D case) or :math:`(N, D_{out},
|
|
4534
4685
|
H_{out}, W_{out}, 3)` (5-D case) and same dtype as `input`.
|
|
4535
4686
|
mode (str): An optional string specifying the interpolation method. The optional values are
|
|
4536
|
-
'bilinear'
|
|
4687
|
+
``'bilinear'``, ``'nearest'``. Default: ``'bilinear'`` . Note: `bicubic` is not supported yet. When
|
|
4537
4688
|
`mode="bilinear"` and the input is 5-D, the interpolation mode used internally will actually
|
|
4538
4689
|
be trilinear. However, when the input is 4-D, the interpolation mode will legistimately be bilinear.
|
|
4539
4690
|
Default: ``'bilinear'`` .
|
|
4691
|
+
|
|
4692
|
+
- ``'nearest'``: Nearest neighbor interpolation. Each output pixel is assigned the value of the
|
|
4693
|
+
nearest input pixel. This method is simple and fast but can result in blocky or pixelated outputs.
|
|
4694
|
+
- ``'bilinear'``: Bilinear interpolation. Each output pixel is a weighted average of the four nearest input
|
|
4695
|
+
pixels, computed using bilinear interpolation. This method produces smoother results compared
|
|
4696
|
+
to nearest neighbor interpolation.
|
|
4697
|
+
- ``'trilinear'``: Trilinear interpolation. This is an extension of bilinear interpolation to 3D data.
|
|
4698
|
+
It performs bilinear interpolation in the two spatial dimensions and linear interpolation along
|
|
4699
|
+
the third dimension. It is commonly used for volume or 3D image interpolation.
|
|
4700
|
+
|
|
4540
4701
|
padding_mode (str): An optional string specifying the pad method. The optional values are "zeros", "border" or
|
|
4541
4702
|
"reflection". Default: ``'zeros'`` .
|
|
4542
4703
|
align_corners (bool): An optional bool. If set to `True`, the extrema (-1 and 1) are considered as referring to
|
|
@@ -4617,10 +4778,13 @@ def ctc_loss(log_probs, targets, input_lengths, target_lengths, blank=0, reducti
|
|
|
4617
4778
|
input_lengths (Union(tuple, Tensor)): Lengths of the input. A tuple or Tensor of shape(N).
|
|
4618
4779
|
target_lengths (Union(tuple, Tensor)): Lengths of the target. A tuple or Tensor of shape(N).
|
|
4619
4780
|
blank (int, optional): The blank label. Default: ``0`` .
|
|
4620
|
-
reduction (str, optional):
|
|
4621
|
-
``'
|
|
4622
|
-
|
|
4623
|
-
|
|
4781
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4782
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4783
|
+
|
|
4784
|
+
- ``'none'``: no reduction will be applied.
|
|
4785
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4786
|
+
- ``'sum'``: the output elements will be summed.
|
|
4787
|
+
|
|
4624
4788
|
zero_infinity (bool, optional): Whether to set infinite loss and correlation gradient to 0. Default: ``False`` .
|
|
4625
4789
|
|
|
4626
4790
|
Returns:
|
|
@@ -4704,8 +4868,12 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
|
|
|
4704
4868
|
full (bool, optional): Include the constant term in the loss calculation. When :math:`full=True`,
|
|
4705
4869
|
the constant term will be :math:`const = 0.5*log(2\pi)`. Default: ``False``.
|
|
4706
4870
|
eps (float, optional): Used to improve the stability of log function must be greater than 0. Default: ``1e-6`` .
|
|
4707
|
-
reduction (str, optional): Apply specific reduction method to the
|
|
4708
|
-
|
|
4871
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
4872
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
4873
|
+
|
|
4874
|
+
- ``'none'``: no reduction will be applied.
|
|
4875
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
4876
|
+
- ``'sum'``: the output elements will be summed.
|
|
4709
4877
|
|
|
4710
4878
|
Returns:
|
|
4711
4879
|
Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
|
|
@@ -4722,8 +4890,7 @@ def gaussian_nll_loss(x, target, var, full=False, eps=1e-6, reduction='mean'):
|
|
|
4722
4890
|
|
|
4723
4891
|
Examples:
|
|
4724
4892
|
>>> import numpy as np
|
|
4725
|
-
>>> from mindspore import Tensor
|
|
4726
|
-
>>> import mindspore.ops as ops
|
|
4893
|
+
>>> from mindspore import Tensor, ops
|
|
4727
4894
|
>>> import mindspore.common.dtype as mstype
|
|
4728
4895
|
>>> arr1 = np.arange(8).reshape((4, 2))
|
|
4729
4896
|
>>> arr2 = np.array([2, 3, 1, 4, 6, 4, 4, 9]).reshape((4, 2))
|
|
@@ -4831,9 +4998,12 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
|
|
|
4831
4998
|
Has the same shape as `inputs`, contains -1 or 1.
|
|
4832
4999
|
margin (float, int): Threshold defined by Hinge Embedding Loss :math:`margin`.
|
|
4833
5000
|
Represented as :math:`\Delta` in the formula. Default: ``1.0`` .
|
|
4834
|
-
reduction (str):
|
|
4835
|
-
``'
|
|
4836
|
-
|
|
5001
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
5002
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
5003
|
+
|
|
5004
|
+
- ``'none'``: no reduction will be applied.
|
|
5005
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
5006
|
+
- ``'sum'``: the output elements will be summed.
|
|
4837
5007
|
|
|
4838
5008
|
Returns:
|
|
4839
5009
|
Tensor or Tensor scalar, the computed loss depending on :math:`reduction`.
|
|
@@ -4843,7 +5013,7 @@ def hinge_embedding_loss(inputs, targets, margin=1.0, reduction='mean'):
|
|
|
4843
5013
|
TypeError: If `targets` is not a Tensor.
|
|
4844
5014
|
TypeError: If `margin` is not a float or int.
|
|
4845
5015
|
ValueError: If `targets` does not have the same shape as `inputs` or they could not broadcast to each other.
|
|
4846
|
-
ValueError: If `reduction` is not one of 'none'
|
|
5016
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
4847
5017
|
|
|
4848
5018
|
Supported Platforms:
|
|
4849
5019
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -4889,6 +5059,9 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True):
|
|
|
4889
5059
|
r"""
|
|
4890
5060
|
Performs greedy decoding on the logits given in inputs.
|
|
4891
5061
|
|
|
5062
|
+
Note:
|
|
5063
|
+
On Ascend, 'merge_repeated' can not be set to false.
|
|
5064
|
+
|
|
4892
5065
|
Args:
|
|
4893
5066
|
inputs (Tensor): The input Tensor must be a 3-D tensor whose shape is
|
|
4894
5067
|
:math:`(max\_time, batch\_size, num\_classes)`. `num_classes` must be `num_labels + 1` classes,
|
|
@@ -5068,74 +5241,87 @@ def _check_conv_iterable_lengths(iterable, dim, iter_name):
|
|
|
5068
5241
|
|
|
5069
5242
|
def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
|
|
5070
5243
|
r"""
|
|
5071
|
-
Applies a 1D convolution over an input tensor.
|
|
5072
|
-
|
|
5073
|
-
where :math:`N` is batch size, :math:`
|
|
5074
|
-
|
|
5075
|
-
|
|
5244
|
+
Applies a 1D convolution over an input tensor. The input Tensor is typically
|
|
5245
|
+
of shape :math:`(N, C_{in}, L_{in})`,
|
|
5246
|
+
where :math:`N` is batch size, :math:`C` is channel number, :math:`L` is input sequence width.
|
|
5247
|
+
|
|
5248
|
+
The output is calculated based on formula:
|
|
5076
5249
|
|
|
5077
5250
|
.. math::
|
|
5078
5251
|
|
|
5079
|
-
|
|
5252
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
5253
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
5254
|
+
|
|
5255
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
5256
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
5257
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
5258
|
+
|
|
5259
|
+
Here are the indices' meanings:
|
|
5260
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
5261
|
+
|
|
5262
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
5263
|
+
output channels, which is also equal to the number of kernels.
|
|
5080
5264
|
|
|
5081
|
-
|
|
5082
|
-
|
|
5083
|
-
from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
|
|
5084
|
-
filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{j}` is a slice
|
|
5085
|
-
of kernel, and it has shape :math:`(\text{kernal_size})`, where :math:`\text{kernel_size}` is the width of
|
|
5086
|
-
the convolution kernel. The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size})`,
|
|
5087
|
-
where `groups` is the group number to split the input in the channel dimension.
|
|
5265
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
5266
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
5088
5267
|
|
|
5089
|
-
|
|
5090
|
-
|
|
5091
|
-
|
|
5268
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
5269
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
5270
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
5271
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
5092
5272
|
|
|
5093
|
-
|
|
5094
|
-
:math:`
|
|
5095
|
-
|
|
5096
|
-
|
|
5273
|
+
The shape of the convolutional kernel is given by :math:`(kernel\_size)`,
|
|
5274
|
+
where :math:`kernel\_size` is the width of the kernel.
|
|
5275
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
5276
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size})`,
|
|
5277
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
5097
5278
|
|
|
5098
|
-
|
|
5099
|
-
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
|
|
5100
|
-
`ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
|
|
5279
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
5280
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
|
|
5281
|
+
and `ConvNets <http://cs231n.github.io/convolutional-networks/>`_ .
|
|
5101
5282
|
|
|
5102
5283
|
Note:
|
|
5103
5284
|
On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
|
|
5104
5285
|
That is, when `groups>1`, condition `C_{in}` = `C_{out}` = `groups` must be satisfied.
|
|
5105
5286
|
|
|
5106
5287
|
Args:
|
|
5107
|
-
input (Tensor): Tensor of shape :math:`(N, C_{in},
|
|
5108
|
-
weight (Tensor):
|
|
5109
|
-
:math:`(N, C_{in} / \text{groups}, \text{kernel_size})
|
|
5110
|
-
|
|
5111
|
-
bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
|
|
5288
|
+
input (Tensor): Input Tensor of shape :math:`(N, C_{in}, L_{in})`.
|
|
5289
|
+
weight (Tensor): The convolutional kernel value, it should has shape
|
|
5290
|
+
:math:`(N, C_{in} / \text{groups}, \text{kernel_size})`.
|
|
5291
|
+
bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
|
|
5112
5292
|
When bias is None, zeros will be used. Default: ``None`` .
|
|
5113
5293
|
stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number or a tuple of one int
|
|
5114
|
-
that represents width of movement. Default: 1
|
|
5294
|
+
that represents width of movement. Default: ``1``.
|
|
5115
5295
|
pad_mode (str, optional): Specifies padding mode. The optional values are
|
|
5116
5296
|
``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
|
|
5117
5297
|
|
|
5118
|
-
- same
|
|
5298
|
+
- ``"same"``: Adopts the way of completion. The height and width of the output will be equal to
|
|
5119
5299
|
the input `x` divided by stride. The padding will be evenly calculated in left and right possiblily.
|
|
5120
5300
|
Otherwise, the last extra padding will be calculated from the right side.
|
|
5121
5301
|
If this mode is set, `padding` must be 0.
|
|
5122
5302
|
|
|
5123
|
-
- valid
|
|
5303
|
+
- ``"valid"``: Adopts the way of discarding. The possible largest width of output will be returned
|
|
5124
5304
|
without padding. Extra pixels will be discarded. If this mode is set, `padding` must be 0.
|
|
5125
5305
|
|
|
5126
|
-
- pad
|
|
5306
|
+
- ``"pad"``: Implicit paddings on both sides of the input `x`.
|
|
5307
|
+
The number of `padding` will be padded to the input
|
|
5127
5308
|
Tensor borders. `padding` must be greater than or equal to 0.
|
|
5128
|
-
padding (Union(int, tuple[int], list[int]), optional):
|
|
5309
|
+
padding (Union(int, tuple[int], list[int]), optional): Specifies the amount of padding to apply on
|
|
5310
|
+
both side of `input` when `pad_mode` is set to ``"pad"``. The
|
|
5129
5311
|
paddings of left and right are the same, equal to padding or padding[0] when padding is a tuple of
|
|
5130
5312
|
1 integer. Default: ``0`` .
|
|
5131
|
-
dilation (Union(int, tuple[int]), optional):
|
|
5132
|
-
|
|
5133
|
-
|
|
5134
|
-
|
|
5313
|
+
dilation (Union(int, tuple[int]), optional): Specifies the dilation rate to use for dilated convolution.
|
|
5314
|
+
It can be a single int or a tuple of 1 integer.
|
|
5315
|
+
Assuming :math:`dilation=(d0,)`, the convolutional kernel samples the input with a
|
|
5316
|
+
spacing of :math:`d0-1` elements in the width direction.
|
|
5317
|
+
The value should be in the ranges [1, L].
|
|
5318
|
+
Default: ``1`` .
|
|
5135
5319
|
groups (int, optional): Splits `input` into groups. Default: ``1`` .
|
|
5136
5320
|
|
|
5137
5321
|
Returns:
|
|
5138
|
-
Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out},
|
|
5322
|
+
Tensor, the value that applied 1D convolution. The shape is :math:`(N, C_{out}, L_{out})`.
|
|
5323
|
+
To see how different pad modes affect the output shape, please refer to
|
|
5324
|
+
:class:`mindspore.nn.Conv1d` for more details.
|
|
5139
5325
|
|
|
5140
5326
|
Raises:
|
|
5141
5327
|
TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
|
|
@@ -5204,40 +5390,44 @@ def conv1d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5204
5390
|
|
|
5205
5391
|
def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
|
|
5206
5392
|
r"""
|
|
5207
|
-
Applies a 2D convolution over an input tensor.
|
|
5208
|
-
|
|
5209
|
-
|
|
5210
|
-
|
|
5211
|
-
|
|
5393
|
+
Applies a 2D convolution over an input tensor. The input tenor is typically of
|
|
5394
|
+
shape :math:`(N, C_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C` is
|
|
5395
|
+
channel number, :math:`H` is feature height, :math:`W` is feature width.
|
|
5396
|
+
|
|
5397
|
+
The output is calculated based on formula:
|
|
5212
5398
|
|
|
5213
5399
|
.. math::
|
|
5214
5400
|
|
|
5215
|
-
|
|
5401
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
5402
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
5403
|
+
|
|
5404
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
5405
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
5406
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
5407
|
+
|
|
5408
|
+
Here are the indices' meanings:
|
|
5409
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
5216
5410
|
|
|
5217
|
-
|
|
5218
|
-
|
|
5219
|
-
from :math:`0` to :math:`C_{out} - 1`, :math:`W_{ij}` corresponds to the :math:`i`-th channel of the :math:`j`-th
|
|
5220
|
-
filter and :math:`out_{j}` corresponds to the :math:`j`-th channel of the output. :math:`W_{ij}` is a slice
|
|
5221
|
-
of kernel, and it has shape :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`, where :math:`\text{
|
|
5222
|
-
kernel_size[0]}` and :math:`\text{kernel_size[1]}` are the height and width of the convolution kernel.
|
|
5223
|
-
The full kernel has shape :math:`(C_{out}, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
|
|
5224
|
-
where `groups` is the group number to split the input in the channel dimension.
|
|
5411
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
5412
|
+
output channels, which is also equal to the number of kernels.
|
|
5225
5413
|
|
|
5226
|
-
|
|
5227
|
-
|
|
5228
|
-
(\text{kernel_size[0]} - 1) \times(\text{dilation[0]} - 1)} {\text { stride[0] }}} \right \rfloor` and
|
|
5414
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
5415
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
5229
5416
|
|
|
5230
|
-
|
|
5231
|
-
|
|
5417
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
5418
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
5419
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
5420
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
5232
5421
|
|
|
5233
|
-
|
|
5234
|
-
:math:`
|
|
5235
|
-
|
|
5236
|
-
|
|
5422
|
+
The shape of the convolutional kernel is given by :math:`(kernel\_size[0], kernel\_size[1])`,
|
|
5423
|
+
where :math:`kernel\_size[0]` and :math:`kernel\_size[1]` are the height and width of the kernel, respectively.
|
|
5424
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
5425
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})`,
|
|
5426
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
5237
5427
|
|
|
5238
|
-
|
|
5239
|
-
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_
|
|
5240
|
-
`ConvNets <http://cs231n.github.io/convolutional-networks/>`_
|
|
5428
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
5429
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_ and
|
|
5430
|
+
`ConvNets <http://cs231n.github.io/convolutional-networks/>`_.
|
|
5241
5431
|
|
|
5242
5432
|
Note:
|
|
5243
5433
|
On Ascend platform, only group convolution in depthwise convolution scenarios is supported.
|
|
@@ -5248,7 +5438,7 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5248
5438
|
weight (Tensor): Tensor of shape
|
|
5249
5439
|
:math:`(N, C_{in} / \text{groups}, \text{kernel_size[0]}, \text{kernel_size[1]})`, then the size of kernel
|
|
5250
5440
|
is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]})`.
|
|
5251
|
-
bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
|
|
5441
|
+
bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
|
|
5252
5442
|
When bias is ``None`` , zeros will be used. Default: ``None`` .
|
|
5253
5443
|
stride (Union(int, tuple[int]), optional): The distance of kernel moving, an int number that represents
|
|
5254
5444
|
the height and width of movement are both strides, or a tuple of two int numbers that
|
|
@@ -5278,6 +5468,9 @@ def conv2d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5278
5468
|
|
|
5279
5469
|
Returns:
|
|
5280
5470
|
Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`.
|
|
5471
|
+
To see how different pad modes affect the output shape, please refer to
|
|
5472
|
+
:class:`mindspore.nn.Conv2d` for more details.
|
|
5473
|
+
|
|
5281
5474
|
|
|
5282
5475
|
Raises:
|
|
5283
5476
|
TypeError: If `stride`, `padding` or `dilation` is neither an int nor a tuple.
|
|
@@ -5421,8 +5614,9 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
|
|
|
5421
5614
|
Calculates the error between the predicted value and the target value,
|
|
5422
5615
|
which has the best of both the loss of l1 and the loss of mse.
|
|
5423
5616
|
|
|
5424
|
-
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the reduction parameter
|
|
5425
|
-
then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
|
|
5617
|
+
Assuming that the :math:`x` and :math:`y` are 1-D Tensor, length :math:`N`, the `reduction` parameter
|
|
5618
|
+
is set to ``"none"`` then calculate the loss of :math:`x` and :math:`y` without dimensionality reduction.
|
|
5619
|
+
The formula is as follows:
|
|
5426
5620
|
|
|
5427
5621
|
.. math::
|
|
5428
5622
|
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top
|
|
@@ -5451,21 +5645,25 @@ def huber_loss(input, target, reduction='mean', delta=1.0):
|
|
|
5451
5645
|
target (Tensor): Target value, has same dtype and shape as the `input` in common cases.
|
|
5452
5646
|
However, when the shape of `target` is different from the shape of `input`,
|
|
5453
5647
|
and they should be broadcasted to each other.
|
|
5454
|
-
reduction (str):
|
|
5455
|
-
|
|
5456
|
-
|
|
5648
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
5649
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
5650
|
+
|
|
5651
|
+
- ``'none'``: no reduction will be applied.
|
|
5652
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
5653
|
+
- ``'sum'``: the output elements will be summed.
|
|
5654
|
+
|
|
5457
5655
|
delta (Union[int, float]): The threshold to change between two type of loss.
|
|
5458
5656
|
The value must be greater than zero. Default: ``1.0`` .
|
|
5459
5657
|
|
|
5460
5658
|
Returns:
|
|
5461
|
-
Tensor or Scalar, if `reduction` is "none"
|
|
5659
|
+
Tensor or Scalar, if `reduction` is ``"none"``, return a Tensor with same shape and dtype as `input`.
|
|
5462
5660
|
Otherwise, a scalar value will be returned.
|
|
5463
5661
|
|
|
5464
5662
|
Raises:
|
|
5465
5663
|
TypeError: If `input` or `target` is not a Tensor.
|
|
5466
5664
|
TypeError: If dtype of `delta` is neither float nor int.
|
|
5467
5665
|
ValueError: If `delta` is less than or equal to 0.
|
|
5468
|
-
ValueError: If `reduction` is not one of "none"
|
|
5666
|
+
ValueError: If `reduction` is not one of ``"none"``, ``"mean"``, ``"sum"``.
|
|
5469
5667
|
ValueError: If `input` and `target` have different shapes and cannot be broadcasted to each other.
|
|
5470
5668
|
|
|
5471
5669
|
Supported Platforms:
|
|
@@ -5655,15 +5853,20 @@ def bias_add(input_x, bias):
|
|
|
5655
5853
|
consistent with the shape of the `input_x` Tensor.
|
|
5656
5854
|
|
|
5657
5855
|
Args:
|
|
5658
|
-
input_x (Tensor): The input tensor. The shape can be 2-5 dimensions.
|
|
5659
|
-
|
|
5856
|
+
input_x (Tensor): The input tensor. The shape can be 2-5 dimensions. Supported dtypes:
|
|
5857
|
+
|
|
5858
|
+
- Ascend/CPU: all Number type.
|
|
5859
|
+
- GPU: float16, float32, int8.
|
|
5860
|
+
|
|
5861
|
+
bias (Tensor): The bias tensor, with shape :math:`(C)`. C must be the same as channel dimension C of
|
|
5862
|
+
`input_x`. It has the same type as `input_x`.
|
|
5660
5863
|
|
|
5661
5864
|
Returns:
|
|
5662
5865
|
Tensor, with the same shape and data type as `input_x`.
|
|
5663
5866
|
|
|
5664
5867
|
Raises:
|
|
5665
5868
|
TypeError: If `input_x` or `bias` is not a Tensor.
|
|
5666
|
-
TypeError: If dtype of `input_x`
|
|
5869
|
+
TypeError: If dtype of `input_x` and `bias` is inconsistent.
|
|
5667
5870
|
TypeError: If dimension of `input_x` is not in the range [2, 5].
|
|
5668
5871
|
|
|
5669
5872
|
Supported Platforms:
|
|
@@ -5718,11 +5921,12 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
|
|
|
5718
5921
|
the loss function
|
|
5719
5922
|
will not consider any sample weights, and each sample will be treated as having equal importance
|
|
5720
5923
|
when calculating the loss.
|
|
5721
|
-
reduction (str, optional):
|
|
5722
|
-
|
|
5723
|
-
|
|
5724
|
-
|
|
5725
|
-
|
|
5924
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
5925
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
5926
|
+
|
|
5927
|
+
- ``'none'``: no reduction will be applied.
|
|
5928
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
5929
|
+
- ``'sum'``: the output elements will be summed.
|
|
5726
5930
|
|
|
5727
5931
|
Returns:
|
|
5728
5932
|
Tensor or Scalar. Returns Tensor that has the same dtype and shape as `logits` if `reduction` is 'none'.
|
|
@@ -5731,7 +5935,7 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
|
|
|
5731
5935
|
Raises:
|
|
5732
5936
|
TypeError: If `logits`, `labels` or `weight` is not a Tensor.
|
|
5733
5937
|
TypeError: If dtype of `logits`, `labels` or `weight` (if given) is neither float16 nor float32.
|
|
5734
|
-
ValueError: If `reduction` is not one of 'none'
|
|
5938
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
|
|
5735
5939
|
ValueError: If shape of `labels` is not the same as `logits` or `weight` (if given).
|
|
5736
5940
|
|
|
5737
5941
|
Supported Platforms:
|
|
@@ -5754,32 +5958,46 @@ def binary_cross_entropy(logits, labels, weight=None, reduction='mean'):
|
|
|
5754
5958
|
|
|
5755
5959
|
def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dilation=1, groups=1):
|
|
5756
5960
|
r"""
|
|
5757
|
-
Applies a 3D convolution over an input tensor. The input tensor is typically of
|
|
5758
|
-
:math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
|
|
5759
|
-
:math:`
|
|
5760
|
-
|
|
5761
|
-
|
|
5961
|
+
Applies a 3D convolution over an input tensor. The input tensor is typically of
|
|
5962
|
+
shape :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`, where :math:`N` is batch size, :math:`C`
|
|
5963
|
+
is channel number, :math:`D` is feature depth, :math:`H` is feature height, :math:`W` is feature width.
|
|
5964
|
+
|
|
5965
|
+
The output is calculated based on formula:
|
|
5762
5966
|
|
|
5763
5967
|
.. math::
|
|
5764
|
-
|
|
5765
|
-
\
|
|
5766
|
-
\
|
|
5767
|
-
|
|
5768
|
-
where :math:`
|
|
5769
|
-
|
|
5770
|
-
:math:`
|
|
5771
|
-
|
|
5772
|
-
|
|
5773
|
-
:math:`
|
|
5774
|
-
|
|
5775
|
-
|
|
5776
|
-
|
|
5777
|
-
|
|
5778
|
-
:math:`
|
|
5779
|
-
|
|
5780
|
-
|
|
5781
|
-
|
|
5782
|
-
|
|
5968
|
+
|
|
5969
|
+
\text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
|
|
5970
|
+
\sum_{k = 0}^{C_{in} - 1} \text{ccor}({\text{weight}(C_{\text{out}_j}, k), \text{X}(N_i, k)})
|
|
5971
|
+
|
|
5972
|
+
where :math:`bias` is the output channel bias, :math:`ccor` is
|
|
5973
|
+
the `cross-correlation <https://en.wikipedia.org/wiki/Cross-correlation>`_,
|
|
5974
|
+
, :math:`weight` is the convolution kernel value and :math:`X` represents the input feature map.
|
|
5975
|
+
|
|
5976
|
+
Here are the indices' meanings:
|
|
5977
|
+
- :math:`i` corresponds to the batch number, ranging from 0 to N-1, where N is the batch size of the input.
|
|
5978
|
+
|
|
5979
|
+
- :math:`j` corresponds to the output channel, ranging from 0 to C_{out}-1, where C_{out} is the number of
|
|
5980
|
+
output channels, which is also equal to the number of kernels.
|
|
5981
|
+
|
|
5982
|
+
- :math:`k` corresponds to the input channel, ranging from 0 to C_{in}-1, where C_{in} is the number of
|
|
5983
|
+
input channels, which is also equal to the number of channels in the convolutional kernels.
|
|
5984
|
+
|
|
5985
|
+
Therefore, in the above formula, :math:`{bias}(C_{out_j})` represents the bias of the :math:`j`-th
|
|
5986
|
+
output channel, :math:`{weight}(C_{out_j}, k)` represents the slice of the :math:`j`-th convolutional
|
|
5987
|
+
kernel in the :math:`k`-th channel, and :math:`{X}(N_i, k)` represents the slice of the :math:`k`-th input
|
|
5988
|
+
channel in the :math:`i`-th batch of the input feature map.
|
|
5989
|
+
|
|
5990
|
+
The shape of the convolutional kernel is given by
|
|
5991
|
+
:math:`(\text{kernel_size[0]}, \text{kernel_size[1]}, \text{kernel_size[2]})`
|
|
5992
|
+
where :math:`kernel\_size[0]` , :math:`kernel\_size[1]` and :math:`kernel\_size[2]` are the depth,
|
|
5993
|
+
height and width of the kernel, respectively.
|
|
5994
|
+
If we consider the input and output channels as well as the `group` parameter, the complete kernel shape
|
|
5995
|
+
will be :math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]},
|
|
5996
|
+
\text{kernel_size[1]}, \text{kernel_size[2]})`,
|
|
5997
|
+
where `group` is the number of groups dividing `x`'s input channel when applying group convolution.
|
|
5998
|
+
|
|
5999
|
+
For more details about convolution layer, please refer to `Gradient Based Learning Applied to Document Recognition
|
|
6000
|
+
<http://vision.stanford.edu/cs598_spring07/papers/Lecun98.pdf>`_.
|
|
5783
6001
|
|
|
5784
6002
|
Note:
|
|
5785
6003
|
1. On Ascend platform, `groups = 1` must be satisfied.
|
|
@@ -5790,8 +6008,8 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5790
6008
|
weight (Tensor): Set size of kernel is :math:`(\text{kernel_size[0]}, \text{kernel_size[1]},
|
|
5791
6009
|
\text{kernel_size[2]})`, then the shape is :math:`(C_{out}, C_{in}, \text{kernel_size[0]},
|
|
5792
6010
|
\text{kernel_size[1]}, \text{kernel_size[1]})`.
|
|
5793
|
-
bias (Tensor): Bias Tensor with shape :math:`(C_{out})`.
|
|
5794
|
-
``None`` .
|
|
6011
|
+
bias (Tensor, optional): Bias Tensor with shape :math:`(C_{out})`.
|
|
6012
|
+
When bias is None, zeros will be used. Default: ``None`` .
|
|
5795
6013
|
stride (Union[int, tuple[int]], optional): The distance of kernel moving,
|
|
5796
6014
|
it can be an int number that represents
|
|
5797
6015
|
the depth, height and width of movement or a tuple of three int numbers that
|
|
@@ -5799,18 +6017,18 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5799
6017
|
pad_mode (str, optional): Specifies padding mode. The optional values are
|
|
5800
6018
|
``"same"`` , ``"valid"`` and ``"pad"`` . Default: ``"valid"`` .
|
|
5801
6019
|
|
|
5802
|
-
- same
|
|
6020
|
+
- ``"same"``: Adopts the way of completion. The depth, height and width of the output will be equal to
|
|
5803
6021
|
the input `x` divided by stride. The padding will be evenly calculated in head and tail, top and bottom,
|
|
5804
6022
|
left and right directions possiblily.
|
|
5805
6023
|
Otherwise, the last extra padding will be calculated from the tail, bottom and the right side.
|
|
5806
6024
|
If this mode is set, `pad` must be 0.
|
|
5807
6025
|
|
|
5808
|
-
- valid
|
|
6026
|
+
- ``"valid"``: Adopts the way of discarding. The possible largest depth, height and width of output
|
|
5809
6027
|
will be returned without padding. Extra pixels will be discarded. If this mode is set, `pad`
|
|
5810
6028
|
must be 0.
|
|
5811
6029
|
|
|
5812
|
-
- pad
|
|
5813
|
-
be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
|
|
6030
|
+
- ``"pad"``: Implicit paddings on both sides of the input in depth, height and width.
|
|
6031
|
+
The number of `pad` will be padded to the input Tensor borders. `pad` must be greater than or equal to 0.
|
|
5814
6032
|
|
|
5815
6033
|
padding (Union[int, tuple[int], list[int]], optional): The pad value to be filled. If `pad` is an integer,
|
|
5816
6034
|
the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
|
|
@@ -5828,36 +6046,36 @@ def conv3d(input, weight, bias=None, stride=1, pad_mode="valid", padding=0, dila
|
|
|
5828
6046
|
Returns:
|
|
5829
6047
|
Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
5830
6048
|
|
|
5831
|
-
`pad_mode` is
|
|
6049
|
+
`pad_mode` is ``"same"``:
|
|
5832
6050
|
|
|
5833
6051
|
.. math::
|
|
5834
6052
|
\begin{array}{ll} \\
|
|
5835
|
-
D_{out}
|
|
5836
|
-
H_{out}
|
|
5837
|
-
W_{out}
|
|
6053
|
+
D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
|
|
6054
|
+
H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
|
|
6055
|
+
W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
|
|
5838
6056
|
\end{array}
|
|
5839
6057
|
|
|
5840
|
-
`pad_mode` is
|
|
6058
|
+
`pad_mode` is ``"valid"``:
|
|
5841
6059
|
|
|
5842
6060
|
.. math::
|
|
5843
6061
|
\begin{array}{ll} \\
|
|
5844
|
-
D_{out}
|
|
6062
|
+
D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
|
|
5845
6063
|
{\text{stride[0]}} + 1} \right \rfloor \\
|
|
5846
|
-
H_{out}
|
|
6064
|
+
H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
|
|
5847
6065
|
{\text{stride[1]}} + 1} \right \rfloor \\
|
|
5848
|
-
W_{out}
|
|
6066
|
+
W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
|
|
5849
6067
|
{\text{stride[2]}} + 1} \right \rfloor \\
|
|
5850
6068
|
\end{array}
|
|
5851
6069
|
|
|
5852
|
-
`pad_mode` is
|
|
6070
|
+
`pad_mode` is ``"pad"``:
|
|
5853
6071
|
|
|
5854
6072
|
.. math::
|
|
5855
6073
|
\begin{array}{ll} \\
|
|
5856
|
-
D_{out}
|
|
6074
|
+
D_{out} = \left \lfloor{\frac{D_{in} + padding[0] + padding[1] - (\text{dilation[0]} - 1) \times
|
|
5857
6075
|
\text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
|
|
5858
|
-
H_{out}
|
|
6076
|
+
H_{out} = \left \lfloor{\frac{H_{in} + padding[2] + padding[3] - (\text{dilation[1]} - 1) \times
|
|
5859
6077
|
\text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
|
|
5860
|
-
W_{out}
|
|
6078
|
+
W_{out} = \left \lfloor{\frac{W_{in} + padding[4] + padding[5] - (\text{dilation[2]} - 1) \times
|
|
5861
6079
|
\text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
|
|
5862
6080
|
\end{array}
|
|
5863
6081
|
|
|
@@ -6082,7 +6300,7 @@ def glu(x, axis=-1):
|
|
|
6082
6300
|
TypeError: If `x` is not a Tensor.
|
|
6083
6301
|
|
|
6084
6302
|
Supported Platforms:
|
|
6085
|
-
``Ascend`` ``CPU``
|
|
6303
|
+
``Ascend`` ``GPU`` ``CPU``
|
|
6086
6304
|
|
|
6087
6305
|
Examples:
|
|
6088
6306
|
>>> from mindspore import Tensor, ops
|
|
@@ -6128,12 +6346,12 @@ def multi_margin_loss(input, target, p=1, margin=1, weight=None, reduction='mean
|
|
|
6128
6346
|
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6129
6347
|
``'sum'`` . Default: ``'mean'`` .
|
|
6130
6348
|
|
|
6131
|
-
- ``'none'
|
|
6132
|
-
- ``'mean'
|
|
6133
|
-
- ``'sum'
|
|
6349
|
+
- ``'none'``: no reduction will be applied.
|
|
6350
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
6351
|
+
- ``'sum'``: the output elements will be summed.
|
|
6134
6352
|
|
|
6135
6353
|
Returns:
|
|
6136
|
-
Tensor. If `reduction` is 'none'
|
|
6354
|
+
Tensor. If `reduction` is ``'none'``, returns a Tensor with the same shape as `target`.
|
|
6137
6355
|
Otherwise, it is a scalar.
|
|
6138
6356
|
|
|
6139
6357
|
Raises:
|
|
@@ -6200,13 +6418,14 @@ def multilabel_margin_loss(input, target, reduction='mean'):
|
|
|
6200
6418
|
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6201
6419
|
``'sum'`` . Default: ``'mean'`` .
|
|
6202
6420
|
|
|
6203
|
-
- ``'none'
|
|
6204
|
-
- ``'mean'
|
|
6205
|
-
- ``'sum'
|
|
6421
|
+
- ``'none'``: no reduction will be applied.
|
|
6422
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
6423
|
+
- ``'sum'``: the output elements will be summed.
|
|
6206
6424
|
|
|
6207
6425
|
Returns:
|
|
6208
|
-
- **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
|
|
6209
|
-
is :math:`(N)`.
|
|
6426
|
+
- **outputs** (Union[Tensor, Scalar]) - The loss of MultilabelMarginLoss.
|
|
6427
|
+
If `reduction` is ``"none"``, its shape is :math:`(N)`.
|
|
6428
|
+
Otherwise, a scalar value will be returned.
|
|
6210
6429
|
|
|
6211
6430
|
Raises:
|
|
6212
6431
|
TypeError: If `input` or `target` is not a Tensor.
|
|
@@ -6214,7 +6433,7 @@ def multilabel_margin_loss(input, target, reduction='mean'):
|
|
|
6214
6433
|
TypeError: If dtype of `target` is not int32.
|
|
6215
6434
|
ValueError: If length of shape of `input` is neither 1 nor 2.
|
|
6216
6435
|
ValueError: If shape of `input` is not the same as `target`.
|
|
6217
|
-
ValueError: If `reduction` is not one of 'none'
|
|
6436
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
6218
6437
|
|
|
6219
6438
|
Supported Platforms:
|
|
6220
6439
|
``Ascend`` ``GPU``
|
|
@@ -6260,12 +6479,15 @@ def multilabel_soft_margin_loss(input, target, weight=None, reduction='mean'):
|
|
|
6260
6479
|
input (Tensor): A tensor of shape :math:`(N, C)` , where N is batch size and C is number of classes.
|
|
6261
6480
|
target (Tensor): The label target Tensor which has the same shape as `input`.
|
|
6262
6481
|
weight (Union[Tensor, int, float]): The manual rescaling weight given to each class. Default: ``None``.
|
|
6263
|
-
reduction (str):
|
|
6264
|
-
``'
|
|
6265
|
-
|
|
6482
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6483
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
6484
|
+
|
|
6485
|
+
- ``'none'``: no reduction will be applied.
|
|
6486
|
+
- ``'mean'``: compute and return the weighted mean of elements in the output.
|
|
6487
|
+
- ``'sum'``: the output elements will be summed.
|
|
6266
6488
|
|
|
6267
6489
|
Returns:
|
|
6268
|
-
Tensor, the data type is the same as input, if the reduction is 'none'
|
|
6490
|
+
Tensor, the data type is the same as input, if the `reduction` is ``'none'``,
|
|
6269
6491
|
its shape is :math:`(N)` , otherwise it is zero.
|
|
6270
6492
|
|
|
6271
6493
|
Raises:
|
|
@@ -6409,15 +6631,15 @@ def gelu(input_x, approximate='none'):
|
|
|
6409
6631
|
|
|
6410
6632
|
x_dtype = _get_cache_prim(P.DType)()(input_x)
|
|
6411
6633
|
if x_dtype not in [mstype.float16, mstype.float32, mstype.float64]:
|
|
6412
|
-
raise TypeError("For gelu, the input dtype must be float16, float32 or float64, "
|
|
6413
|
-
"but got {}."
|
|
6634
|
+
raise TypeError(f"For gelu, the input dtype must be float16, float32 or float64, "
|
|
6635
|
+
f"but got {x_dtype}.")
|
|
6414
6636
|
if approximate == 'tanh':
|
|
6415
6637
|
output = _get_cache_prim(P.GeLU)()(input_x)
|
|
6416
6638
|
else:
|
|
6417
|
-
output = _get_cache_prim(P.Sqrt)()(Tensor(2.0))
|
|
6639
|
+
output = _get_cache_prim(P.Sqrt)()(Tensor(2.0, x_dtype))
|
|
6418
6640
|
output = _get_cache_prim(P.Div)()(input_x, output)
|
|
6419
|
-
output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0)
|
|
6420
|
-
output = input_x * output * Tensor(0.5)
|
|
6641
|
+
output = _get_cache_prim(P.Erf)()(output) + Tensor(1.0, x_dtype)
|
|
6642
|
+
output = input_x * output * Tensor(0.5, x_dtype)
|
|
6421
6643
|
|
|
6422
6644
|
return output
|
|
6423
6645
|
|
|
@@ -6655,8 +6877,12 @@ def mse_loss(input, target, reduction='mean'):
|
|
|
6655
6877
|
target (Tensor): The input label. Tensor of any dimension, same shape as the `input` in common cases.
|
|
6656
6878
|
However, it supports that the shape of `input` is different from the shape of `target`
|
|
6657
6879
|
and they should be broadcasted to each other.
|
|
6658
|
-
reduction (str, optional):
|
|
6659
|
-
|
|
6880
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6881
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
6882
|
+
|
|
6883
|
+
- ``'none'``: no reduction will be applied.
|
|
6884
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
6885
|
+
- ``'sum'``: the output elements will be summed.
|
|
6660
6886
|
|
|
6661
6887
|
Returns:
|
|
6662
6888
|
Tensor, loss of type float, the shape is zero if `reduction` is ``'mean'`` or ``'sum'`` ,
|
|
@@ -6759,11 +6985,15 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
|
|
|
6759
6985
|
eps (float, optional): Add small value to avoid division by zero. Default: ``1e-06``.
|
|
6760
6986
|
swap (bool, optional): The distance swap change the negative distance to the distance between positive
|
|
6761
6987
|
sample and negative sample. Default: ``False`` .
|
|
6762
|
-
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6763
|
-
Default: ``'mean'`` .
|
|
6988
|
+
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
6989
|
+
``'sum'`` . Default: ``'mean'`` .
|
|
6990
|
+
|
|
6991
|
+
- ``'none'``: no reduction will be applied.
|
|
6992
|
+
- ``'mean'``: compute and return the mean of elements in the output.
|
|
6993
|
+
- ``'sum'``: the output elements will be summed.
|
|
6764
6994
|
|
|
6765
6995
|
Returns:
|
|
6766
|
-
Tensor. If `reduction` is "none"
|
|
6996
|
+
Tensor. If `reduction` is ``"none"``, its shape is :math:`(N)`. Otherwise, a scalar value will be returned.
|
|
6767
6997
|
|
|
6768
6998
|
Raises:
|
|
6769
6999
|
TypeError: If `anchor` or `positive` or 'negative' is not a Tensor.
|
|
@@ -6776,7 +7006,7 @@ def triplet_margin_loss(anchor, positive, negative, margin=1.0, p=2, eps=1e-06,
|
|
|
6776
7006
|
same time.
|
|
6777
7007
|
ValueError: If the dimension of input `anchor` or `positive` or `negative` is bigger than or equal to 8.
|
|
6778
7008
|
ValueError: If shape of `anchor`, `positive` and `negative` cannot broadcast.
|
|
6779
|
-
ValueError: If `reduction` is not one of 'none'
|
|
7009
|
+
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
6780
7010
|
|
|
6781
7011
|
Supported Platforms:
|
|
6782
7012
|
``GPU``
|
|
@@ -6811,7 +7041,7 @@ def linear(x, w, b):
|
|
|
6811
7041
|
def _inner_dropout(x, p, training):
|
|
6812
7042
|
"""inner dropout"""
|
|
6813
7043
|
_dropout = _get_cache_prim(P.Dropout)(1 - p)
|
|
6814
|
-
if p
|
|
7044
|
+
if 0. < p <= 1. and training:
|
|
6815
7045
|
return _dropout(x)[0]
|
|
6816
7046
|
return x
|
|
6817
7047
|
|
|
@@ -6864,10 +7094,11 @@ def _in_projection_packed(q, k, v, w, b, k_is_v, q_is_k):
|
|
|
6864
7094
|
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
|
|
6865
7095
|
|
|
6866
7096
|
|
|
6867
|
-
def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training):
|
|
7097
|
+
def _scaled_dot_product_attention(query, key, value, attn_mask, dropout_p, is_causal, is_training, dtype):
|
|
6868
7098
|
"""scaled dot product attention"""
|
|
6869
7099
|
embed_size = query.shape[-1]
|
|
6870
|
-
|
|
7100
|
+
embed_size_tensor = scalar_to_tensor_(embed_size, dtype)
|
|
7101
|
+
scaling_factor = embed_size_tensor.sqrt().sqrt()
|
|
6871
7102
|
query = query / scaling_factor
|
|
6872
7103
|
|
|
6873
7104
|
if is_causal:
|
|
@@ -6960,7 +7191,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
|
|
|
6960
7191
|
out_proj_bias, training=True, key_padding_mask=None, attn_mask=None,
|
|
6961
7192
|
use_separate_proj_weight=False, q_proj_weight=None, k_proj_weight=None,
|
|
6962
7193
|
v_proj_weight=None, static_k=None, static_v=None, average_attn_weights=True,
|
|
6963
|
-
is_causal=False, k_is_v=False, q_is_k=False):
|
|
7194
|
+
is_causal=False, k_is_v=False, q_is_k=False, dtype=mstype.float32):
|
|
6964
7195
|
"""multi head attetion forward function"""
|
|
6965
7196
|
is_batched = _check_qkv_shape(query.ndim, key.ndim, value.ndim)
|
|
6966
7197
|
if key_padding_mask is not None:
|
|
@@ -7117,7 +7348,7 @@ def multi_head_attention_forward(query, key, value, embed_dim_to_check, num_head
|
|
|
7117
7348
|
v = v.view((bsz, num_heads, src_len, head_dim))
|
|
7118
7349
|
|
|
7119
7350
|
attn_output, attn_output_weights = _scaled_dot_product_attention(
|
|
7120
|
-
q, k, v, attn_mask, dropout_p, is_causal, training)
|
|
7351
|
+
q, k, v, attn_mask, dropout_p, is_causal, training, dtype)
|
|
7121
7352
|
attn_output = attn_output.transpose(2, 0, 1, 3).view((bsz * tgt_len, embed_dim))
|
|
7122
7353
|
|
|
7123
7354
|
attn_output = linear(attn_output, out_proj_weight, out_proj_bias)
|
|
@@ -7213,6 +7444,82 @@ def max_pool2d(x, kernel_size, stride=None, padding=0, dilation=1, return_indice
|
|
|
7213
7444
|
return out
|
|
7214
7445
|
|
|
7215
7446
|
|
|
7447
|
+
def prompt_flash_attention(query, key, value, padding_mask, attn_mask, actual_seq_lengths,
|
|
7448
|
+
actual_seq_lengths_kv, deq_scale1, quant_scale1,
|
|
7449
|
+
deq_scale2, quant_scale2, quant_offset2, num_heads, scale_value=1.0, pre_tokens=2147483547,
|
|
7450
|
+
next_tokens=0, input_layout='BSH',
|
|
7451
|
+
num_key_value_heads=0, sparse_mode=0):
|
|
7452
|
+
r"""
|
|
7453
|
+
The interface for fully inference.
|
|
7454
|
+
B -- Batch size
|
|
7455
|
+
S -- Sequence length
|
|
7456
|
+
H -- Hidden size
|
|
7457
|
+
|
|
7458
|
+
Note:
|
|
7459
|
+
is only supported on ascend910B
|
|
7460
|
+
|
|
7461
|
+
.. warning::
|
|
7462
|
+
This is an experimental API that is subject to change or deletion.
|
|
7463
|
+
|
|
7464
|
+
Inputs:
|
|
7465
|
+
query (Tensor) - The query tensor with data type of float16 or float32.
|
|
7466
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7467
|
+
key (Tensor) - The key tensor with data type of float16 or float32.
|
|
7468
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7469
|
+
value (Tensor) - The value tensor with data type of float16 or float32.
|
|
7470
|
+
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7471
|
+
padding_mask (Tensor) - The padding mask tensor with data type of float16 or float32
|
|
7472
|
+
attn_mask (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
7473
|
+
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
7474
|
+
actual_seq_lengths (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7475
|
+
actual_seq_lengths_kv (list[int]): Describe actual sequence length of each input with data type of int.
|
|
7476
|
+
dep_scale1 (Tensor)
|
|
7477
|
+
quant_scale1 (Tensor)
|
|
7478
|
+
deq_scale2 (Tensor)
|
|
7479
|
+
quant_scale2 (Tensor)
|
|
7480
|
+
quant_offset2 (Tensor)
|
|
7481
|
+
num_heads (int): The number of heads.
|
|
7482
|
+
scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
|
|
7483
|
+
Muls in the calculation. Default: 1.0.
|
|
7484
|
+
pre_tokens (int): Previous tokens. Default: 2147483547.
|
|
7485
|
+
next_tokens (int): next tokens. Default: 0.
|
|
7486
|
+
indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
|
|
7487
|
+
indicates that the data blocks in the upper triangle are not involved in the calculation
|
|
7488
|
+
input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
|
|
7489
|
+
num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
|
|
7490
|
+
The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
|
|
7491
|
+
sparse_mode (int): Default: 0
|
|
7492
|
+
|
|
7493
|
+
|
|
7494
|
+
Outputs:
|
|
7495
|
+
attention_out (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
7496
|
+
|
|
7497
|
+
Supported Platforms:
|
|
7498
|
+
``Ascend``
|
|
7499
|
+
|
|
7500
|
+
Examples:
|
|
7501
|
+
>>> from mindspore.ops.function.nn_func import prompt_flash_attention
|
|
7502
|
+
>>> from mindspore import Tensor
|
|
7503
|
+
>>> import numpy as np
|
|
7504
|
+
>>> B = 1
|
|
7505
|
+
>>> N = 16
|
|
7506
|
+
>>> S = 256
|
|
7507
|
+
>>> D = 16
|
|
7508
|
+
>>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7509
|
+
>>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7510
|
+
>>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
7511
|
+
>>> out = ops.prompt_flash_attention(query, key, value, None, None, None, None, None, None, None, None,
|
|
7512
|
+
None, N, input_layout='BNSD')
|
|
7513
|
+
>>> print(out[0].shape)
|
|
7514
|
+
(1, 16, 256, 16)
|
|
7515
|
+
"""
|
|
7516
|
+
|
|
7517
|
+
pfa = _get_cache_prim(NN_OPS.PromptFlashAttention)(num_heads, scale_value, pre_tokens, next_tokens, input_layout,
|
|
7518
|
+
num_key_value_heads, sparse_mode)
|
|
7519
|
+
return pfa(query, key, value, padding_mask, attn_mask, actual_seq_lengths, actual_seq_lengths_kv, deq_scale1,
|
|
7520
|
+
quant_scale1, deq_scale2, quant_scale2, quant_offset2)
|
|
7521
|
+
|
|
7522
|
+
|
|
7216
7523
|
__all__ = [
|
|
7217
7524
|
'adaptive_avg_pool1d',
|
|
7218
7525
|
'adaptive_avg_pool2d',
|
|
@@ -7260,6 +7567,7 @@ __all__ = [
|
|
|
7260
7567
|
'softsign',
|
|
7261
7568
|
'softshrink',
|
|
7262
7569
|
'soft_shrink',
|
|
7570
|
+
'softplus',
|
|
7263
7571
|
'selu',
|
|
7264
7572
|
'silu',
|
|
7265
7573
|
'soft_margin_loss',
|