mindspore 2.4.10__cp310-cp310-win_amd64.whl → 2.6.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +13 -6
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_check_jit_forbidden_api.py +3 -0
- mindspore/_checkparam.py +3 -38
- mindspore/_deprecated/__init__.py +17 -0
- mindspore/_deprecated/jit.py +198 -0
- mindspore/_extends/builtin_operations.py +1 -1
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +6 -7
- mindspore/_extends/parse/compile_config.py +83 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +394 -0
- mindspore/_extends/parse/jit_fallback_modules/__init__.py +0 -0
- mindspore/_extends/parse/jit_fallback_modules/check_utils.py +123 -0
- mindspore/_extends/parse/jit_fallback_modules/third_party_modules.py +50 -0
- mindspore/_extends/parse/parser.py +47 -198
- mindspore/_extends/parse/resources.py +1 -5
- mindspore/_extends/parse/standard_method.py +229 -99
- mindspore/_extends/pijit/__init__.py +2 -2
- mindspore/_extends/pijit/pijit_func_white_list.py +17 -12
- mindspore/_extends/pijit/tensor_func_list.py +27 -0
- mindspore/_extends/utils.py +1 -1
- mindspore/amp.py +11 -5
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/__init__.py +2 -2
- mindspore/boost/base.py +3 -7
- mindspore/boost/boost_cell_wrapper.py +138 -43
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +6 -3
- mindspore/common/_grad_function.py +56 -0
- mindspore/common/_pijit_context.py +14 -5
- mindspore/common/_register_for_tensor.py +1 -2
- mindspore/common/_stub_tensor.py +30 -14
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +4760 -0
- mindspore/common/api.py +480 -372
- mindspore/common/auto_dynamic_shape.py +41 -44
- mindspore/common/dtype.py +39 -36
- mindspore/common/dump.py +9 -6
- mindspore/common/file_system.py +9 -1
- mindspore/common/generator.py +5 -0
- mindspore/common/hook_handle.py +6 -2
- mindspore/common/initializer.py +13 -10
- mindspore/common/jit_begin_end.py +94 -0
- mindspore/common/jit_config.py +6 -1
- mindspore/common/jit_context.py +76 -0
- mindspore/common/jit_trace.py +378 -0
- mindspore/common/lazy_inline.py +9 -3
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/mutable.py +5 -4
- mindspore/common/parameter.py +135 -52
- mindspore/common/seed.py +2 -2
- mindspore/common/sparse_tensor.py +23 -17
- mindspore/common/tensor.py +975 -1981
- mindspore/communication/__init__.py +7 -5
- mindspore/communication/_comm_helper.py +52 -2
- mindspore/communication/comm_func.py +240 -181
- mindspore/communication/management.py +95 -26
- mindspore/context.py +324 -573
- mindspore/dataset/__init__.py +65 -37
- mindspore/dataset/audio/__init__.py +2 -8
- mindspore/dataset/audio/transforms.py +3 -17
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +87 -6
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +6 -5
- mindspore/dataset/engine/datasets.py +292 -267
- mindspore/dataset/engine/datasets_audio.py +22 -8
- mindspore/dataset/engine/datasets_standard_format.py +46 -27
- mindspore/dataset/engine/datasets_text.py +78 -48
- mindspore/dataset/engine/datasets_user_defined.py +183 -117
- mindspore/dataset/engine/datasets_vision.py +120 -44
- mindspore/dataset/engine/iterators.py +283 -63
- mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +1 -1
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +289 -43
- mindspore/dataset/engine/serializer_deserializer.py +3 -2
- mindspore/dataset/engine/validators.py +53 -11
- mindspore/dataset/text/__init__.py +7 -6
- mindspore/dataset/text/transforms.py +6 -5
- mindspore/dataset/text/utils.py +3 -3
- mindspore/dataset/transforms/__init__.py +0 -9
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +31 -14
- mindspore/dataset/utils/browse_dataset.py +1 -1
- mindspore/dataset/vision/__init__.py +2 -9
- mindspore/dataset/vision/transforms.py +202 -158
- mindspore/dataset/vision/utils.py +7 -5
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +153 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +123 -0
- mindspore/{ops_generate/gen_constants.py → device_context/cpu/__init__.py} +6 -17
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +170 -0
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/es/embedding_service.py +35 -27
- mindspore/experimental/llm_boost/__init__.py +1 -0
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +209 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
- mindspore/experimental/llm_boost/atb/llama_boost.py +6 -1
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/map_parameter.py +4 -4
- mindspore/experimental/optim/adadelta.py +6 -6
- mindspore/experimental/optim/adagrad.py +4 -4
- mindspore/experimental/optim/adam.py +7 -0
- mindspore/experimental/optim/adamax.py +4 -4
- mindspore/experimental/optim/adamw.py +4 -0
- mindspore/experimental/optim/asgd.py +1 -1
- mindspore/experimental/optim/lr_scheduler.py +73 -46
- mindspore/experimental/optim/radam.py +34 -31
- mindspore/experimental/optim/rprop.py +1 -1
- mindspore/experimental/optim/sgd.py +1 -1
- mindspore/hal/contiguous_tensors_handle.py +6 -10
- mindspore/hal/device.py +55 -53
- mindspore/hal/event.py +52 -52
- mindspore/hal/memory.py +179 -120
- mindspore/hal/stream.py +150 -109
- mindspore/include/api/context.h +0 -1
- mindspore/include/dataset/constants.h +7 -4
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +50 -0
- mindspore/mindrecord/__init__.py +21 -8
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_backend.dll → mindspore_ops_host.dll} +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mint/__init__.py +798 -761
- mindspore/mint/distributed/__init__.py +70 -4
- mindspore/mint/distributed/distributed.py +2679 -44
- mindspore/mint/linalg/__init__.py +8 -0
- mindspore/mint/nn/__init__.py +743 -22
- mindspore/mint/nn/functional.py +716 -23
- mindspore/mint/nn/layer/__init__.py +21 -4
- mindspore/mint/nn/layer/_functions.py +334 -0
- mindspore/mint/nn/layer/activation.py +276 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +933 -0
- mindspore/mint/nn/layer/normalization.py +223 -28
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +235 -0
- mindspore/mint/optim/__init__.py +3 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/optim/sgd.py +171 -0
- mindspore/mint/special/__init__.py +2 -1
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +4 -1
- mindspore/nn/cell.py +1373 -192
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +29 -27
- mindspore/nn/layer/basic.py +51 -35
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/container.py +1 -1
- mindspore/nn/layer/conv.py +53 -42
- mindspore/nn/layer/embedding.py +12 -11
- mindspore/nn/layer/normalization.py +56 -49
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +120 -42
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +59 -36
- mindspore/nn/learning_rate_schedule.py +8 -4
- mindspore/nn/loss/loss.py +58 -55
- mindspore/nn/optim/ada_grad.py +7 -5
- mindspore/nn/optim/adadelta.py +11 -9
- mindspore/nn/optim/adafactor.py +1 -1
- mindspore/nn/optim/adam.py +19 -15
- mindspore/nn/optim/adamax.py +8 -7
- mindspore/nn/optim/adasum.py +5 -5
- mindspore/nn/optim/asgd.py +3 -1
- mindspore/nn/optim/ftrl.py +11 -9
- mindspore/nn/optim/lamb.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/lazyadam.py +12 -10
- mindspore/nn/optim/momentum.py +7 -6
- mindspore/nn/optim/optimizer.py +3 -3
- mindspore/nn/optim/proximal_ada_grad.py +12 -10
- mindspore/nn/optim/rmsprop.py +13 -12
- mindspore/nn/optim/rprop.py +11 -9
- mindspore/nn/optim/sgd.py +9 -6
- mindspore/nn/optim/tft_wrapper.py +5 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/probability/bijector/bijector.py +17 -11
- mindspore/nn/probability/bijector/gumbel_cdf.py +5 -5
- mindspore/nn/probability/bijector/invert.py +2 -2
- mindspore/nn/probability/bijector/scalar_affine.py +3 -3
- mindspore/nn/probability/bijector/softplus.py +3 -2
- mindspore/nn/probability/distribution/beta.py +3 -3
- mindspore/nn/probability/distribution/categorical.py +1 -1
- mindspore/nn/probability/distribution/cauchy.py +4 -2
- mindspore/nn/probability/distribution/exponential.py +6 -7
- mindspore/nn/probability/distribution/gamma.py +2 -2
- mindspore/nn/probability/distribution/gumbel.py +2 -2
- mindspore/nn/probability/distribution/half_normal.py +5 -3
- mindspore/nn/probability/distribution/logistic.py +5 -3
- mindspore/nn/probability/distribution/poisson.py +1 -1
- mindspore/nn/probability/distribution/uniform.py +5 -3
- mindspore/nn/reinforcement/_tensors_queue.py +1 -1
- mindspore/nn/reinforcement/tensor_array.py +1 -1
- mindspore/nn/utils/init.py +13 -11
- mindspore/nn/wrap/__init__.py +6 -6
- mindspore/nn/wrap/cell_wrapper.py +181 -122
- mindspore/nn/wrap/grad_reducer.py +45 -36
- mindspore/nn/wrap/loss_scale.py +6 -7
- mindspore/numpy/array_creations.py +63 -65
- mindspore/numpy/array_ops.py +149 -144
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +361 -359
- mindspore/numpy/utils.py +17 -18
- mindspore/numpy/utils_const.py +5 -6
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +5 -3
- mindspore/ops/_grad_experimental/grad_comm_ops.py +112 -16
- mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_grad_experimental/taylor_rule.py +29 -0
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_register_for_op.py +0 -11
- mindspore/{ops_generate → ops/_utils}/arg_dtype_cast.py +123 -4
- mindspore/{ops_generate → ops/_utils}/arg_handler.py +3 -65
- mindspore/ops/_vmap/vmap_array_ops.py +52 -25
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +21 -14
- mindspore/ops/_vmap/vmap_math_ops.py +15 -16
- mindspore/ops/_vmap/vmap_nn_ops.py +29 -42
- mindspore/ops/auto_generate/__init__.py +4 -3
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +258 -46
- mindspore/ops/auto_generate/gen_extend_func.py +757 -185
- mindspore/ops/auto_generate/gen_ops_def.py +4197 -2243
- mindspore/ops/auto_generate/gen_ops_prim.py +16976 -6055
- mindspore/ops/auto_generate/pyboost_inner_prim.py +221 -87
- mindspore/ops/composite/__init__.py +2 -1
- mindspore/ops/composite/base.py +20 -25
- mindspore/ops/composite/math_ops.py +6 -16
- mindspore/ops/composite/multitype_ops/__init__.py +5 -2
- mindspore/ops/composite/multitype_ops/_compile_utils.py +228 -30
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -2
- mindspore/ops/composite/multitype_ops/add_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/bitwise_xor_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/div_impl.py +6 -4
- mindspore/ops/composite/multitype_ops/equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/floordiv_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/getitem_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/greater_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/greater_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/invert_impl.py +50 -0
- mindspore/ops/composite/multitype_ops/left_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/less_equal_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/less_impl.py +4 -3
- mindspore/ops/composite/multitype_ops/logic_not_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/logical_and_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/logical_or_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mod_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/mul_impl.py +3 -2
- mindspore/ops/composite/multitype_ops/negative_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_equal_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/ones_like_impl.py +18 -0
- mindspore/ops/composite/multitype_ops/pow_impl.py +2 -30
- mindspore/ops/composite/multitype_ops/right_shift_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/setitem_impl.py +2 -1
- mindspore/ops/composite/multitype_ops/sub_impl.py +2 -1
- mindspore/ops/function/__init__.py +40 -2
- mindspore/ops/function/_add_attr_func.py +58 -0
- mindspore/ops/function/array_func.py +2089 -2403
- mindspore/ops/function/clip_func.py +80 -23
- mindspore/ops/function/debug_func.py +57 -57
- mindspore/ops/function/grad/__init__.py +1 -0
- mindspore/ops/function/grad/grad_func.py +104 -71
- mindspore/ops/function/image_func.py +2 -2
- mindspore/ops/function/linalg_func.py +47 -78
- mindspore/ops/function/math_func.py +4351 -3813
- mindspore/ops/function/nn_func.py +1712 -637
- mindspore/ops/function/other_func.py +159 -1
- mindspore/ops/function/parameter_func.py +18 -84
- mindspore/ops/function/random_func.py +452 -387
- mindspore/ops/function/reshard_func.py +4 -70
- mindspore/ops/function/sparse_func.py +3 -3
- mindspore/ops/function/sparse_unary_func.py +6 -6
- mindspore/ops/function/spectral_func.py +25 -58
- mindspore/ops/function/vmap_func.py +26 -18
- mindspore/ops/functional.py +23 -7
- mindspore/ops/functional_overload.py +1548 -0
- mindspore/ops/op_info_register.py +32 -244
- mindspore/ops/operations/__init__.py +23 -15
- mindspore/ops/operations/_custom_ops_utils.py +235 -0
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -43
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +43 -84
- mindspore/ops/operations/_ms_kernel.py +4 -10
- mindspore/ops/operations/_rl_inner_ops.py +1 -1
- mindspore/ops/operations/_scalar_ops.py +3 -2
- mindspore/ops/operations/_sequence_ops.py +1 -1
- mindspore/ops/operations/_tensor_array.py +1 -1
- mindspore/ops/operations/array_ops.py +81 -324
- mindspore/ops/operations/comm_ops.py +154 -108
- mindspore/ops/operations/custom_ops.py +298 -87
- mindspore/ops/operations/debug_ops.py +157 -59
- mindspore/ops/operations/inner_ops.py +7 -5
- mindspore/ops/operations/linalg_ops.py +1 -57
- mindspore/ops/operations/manually_defined/_inner.py +1 -1
- mindspore/ops/operations/manually_defined/ops_def.py +928 -180
- mindspore/ops/operations/math_ops.py +32 -234
- mindspore/ops/operations/nn_ops.py +212 -531
- mindspore/ops/operations/other_ops.py +62 -9
- mindspore/ops/operations/random_ops.py +13 -7
- mindspore/ops/operations/reshard_ops.py +1 -1
- mindspore/ops/operations/sparse_ops.py +2 -2
- mindspore/ops/primitive.py +66 -53
- mindspore/ops/tensor_method.py +1895 -0
- mindspore/ops_generate/__init__.py +0 -5
- mindspore/ops_generate/aclnn/__init__.py +0 -0
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +135 -0
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +257 -0
- mindspore/ops_generate/api/__init__.py +0 -0
- mindspore/ops_generate/api/add_tensor_docs_generator.py +56 -0
- mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +105 -0
- mindspore/ops_generate/api/functional_map_cpp_generator.py +504 -0
- mindspore/ops_generate/api/functional_overload_py_generator.py +112 -0
- mindspore/ops_generate/api/functions_cc_generator.py +237 -0
- mindspore/ops_generate/api/gen_api.py +103 -0
- mindspore/ops_generate/api/op_api_proto.py +235 -0
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +461 -0
- mindspore/ops_generate/common/__init__.py +0 -0
- mindspore/ops_generate/common/base_generator.py +11 -0
- mindspore/ops_generate/common/gen_constants.py +91 -0
- mindspore/ops_generate/common/gen_utils.py +348 -0
- mindspore/ops_generate/common/op_proto.py +473 -0
- mindspore/ops_generate/common/template.py +523 -0
- mindspore/ops_generate/gen_ops.py +22 -1069
- mindspore/ops_generate/op_def/__init__.py +0 -0
- mindspore/ops_generate/op_def/gen_op_def.py +90 -0
- mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +191 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +296 -0
- mindspore/ops_generate/op_def/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/op_def/ops_name_h_generator.py +83 -0
- mindspore/ops_generate/op_def/ops_primitive_h_generator.py +125 -0
- mindspore/ops_generate/op_def_py/__init__.py +0 -0
- mindspore/ops_generate/op_def_py/gen_op_def_py.py +47 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +132 -0
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +489 -0
- mindspore/ops_generate/pyboost/__init__.py +0 -0
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +139 -0
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +175 -0
- mindspore/ops_generate/pyboost/op_template_parser.py +517 -0
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +407 -0
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +100 -0
- mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +155 -0
- mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +132 -0
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +272 -0
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +938 -0
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +357 -0
- mindspore/ops_generate/{pyboost_utils.py → pyboost/pyboost_utils.py} +179 -36
- mindspore/ops_generate/resources/__init__.py +0 -0
- mindspore/ops_generate/resources/resource_list.py +30 -0
- mindspore/ops_generate/resources/resource_loader.py +36 -0
- mindspore/ops_generate/resources/resource_manager.py +64 -0
- mindspore/ops_generate/resources/yaml_loader.py +88 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +122 -0
- mindspore/parallel/__init__.py +7 -3
- mindspore/parallel/_auto_parallel_context.py +159 -40
- mindspore/parallel/_cell_wrapper.py +132 -15
- mindspore/parallel/_parallel_serialization.py +107 -5
- mindspore/parallel/_ps_context.py +1 -1
- mindspore/parallel/_recovery_context.py +7 -2
- mindspore/parallel/_tensor.py +142 -18
- mindspore/parallel/_utils.py +199 -23
- mindspore/parallel/algo_parameter_config.py +4 -4
- mindspore/parallel/auto_parallel.py +732 -0
- mindspore/parallel/checkpoint_convert.py +159 -0
- mindspore/parallel/checkpoint_transform.py +700 -35
- mindspore/parallel/cluster/process_entity/_api.py +276 -50
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +21 -4
- mindspore/parallel/function/__init__.py +24 -0
- mindspore/parallel/function/reshard_func.py +258 -0
- mindspore/parallel/nn/__init__.py +25 -0
- mindspore/parallel/nn/parallel_cell_wrapper.py +263 -0
- mindspore/parallel/nn/parallel_grad_reducer.py +169 -0
- mindspore/parallel/parameter_broadcast.py +25 -14
- mindspore/parallel/shard.py +137 -59
- mindspore/parallel/transform_safetensors.py +364 -305
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +22 -5
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +170 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +264 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +109 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +415 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +372 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +250 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +320 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +327 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +376 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +96 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +139 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +186 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +221 -0
- mindspore/profiler/common/path_manager.py +395 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +500 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_meta_data.py +74 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +251 -0
- mindspore/profiler/common/profiler_path_manager.py +179 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +341 -75
- mindspore/profiler/envprofiler.py +163 -0
- mindspore/profiler/experimental_config.py +197 -0
- mindspore/profiler/mstx.py +242 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +335 -0
- mindspore/profiler/profiler.py +1073 -90
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +118 -0
- mindspore/profiler/schedule.py +243 -0
- mindspore/rewrite/api/node.py +15 -13
- mindspore/rewrite/api/symbol_tree.py +2 -3
- mindspore/run_check/_check_version.py +27 -20
- mindspore/run_check/run_check.py +1 -1
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +177 -0
- mindspore/runtime/memory.py +416 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/safeguard/rewrite_obfuscation.py +12 -9
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +8 -8
- mindspore/train/_utils.py +96 -27
- mindspore/train/amp.py +9 -5
- mindspore/train/callback/__init__.py +2 -2
- mindspore/train/callback/_callback.py +2 -16
- mindspore/train/callback/_checkpoint.py +53 -55
- mindspore/train/callback/_cluster_monitor.py +14 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +103 -68
- mindspore/train/callback/_history.py +8 -5
- mindspore/train/callback/_lambda_callback.py +2 -2
- mindspore/train/callback/_landscape.py +0 -3
- mindspore/train/callback/_loss_monitor.py +2 -1
- mindspore/train/callback/_on_request_exit.py +6 -5
- mindspore/train/callback/_reduce_lr_on_plateau.py +11 -6
- mindspore/train/callback/_summary_collector.py +52 -19
- mindspore/train/callback/_time_monitor.py +2 -1
- mindspore/train/callback/{_tft_register.py → _train_fault_tolerance.py} +228 -108
- mindspore/train/data_sink.py +25 -2
- mindspore/train/dataset_helper.py +15 -16
- mindspore/train/loss_scale_manager.py +8 -7
- mindspore/train/metrics/accuracy.py +3 -3
- mindspore/train/metrics/confusion_matrix.py +9 -9
- mindspore/train/metrics/error.py +3 -3
- mindspore/train/metrics/hausdorff_distance.py +4 -4
- mindspore/train/metrics/mean_surface_distance.py +3 -3
- mindspore/train/metrics/metric.py +0 -12
- mindspore/train/metrics/occlusion_sensitivity.py +4 -2
- mindspore/train/metrics/precision.py +11 -10
- mindspore/train/metrics/recall.py +9 -9
- mindspore/train/metrics/root_mean_square_surface_distance.py +2 -2
- mindspore/train/mind_ir_pb2.py +174 -46
- mindspore/train/model.py +269 -136
- mindspore/train/serialization.py +622 -978
- mindspore/train/summary/_summary_adapter.py +2 -2
- mindspore/train/summary/summary_record.py +2 -3
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dryrun.py +140 -0
- mindspore/utils/hooks.py +81 -0
- mindspore/utils/runtime_execution_order_check.py +552 -0
- mindspore/utils/utils.py +138 -4
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/METADATA +3 -3
- {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/RECORD +587 -418
- {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/entry_points.txt +1 -1
- mindspore/_install_custom.py +0 -43
- mindspore/common/_register_for_adapter.py +0 -74
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/ops/auto_generate/gen_arg_dtype_cast.py +0 -252
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -197
- mindspore/ops/operations/_opaque_predicate_registry.py +0 -41
- mindspore/ops_generate/gen_aclnn_implement.py +0 -263
- mindspore/ops_generate/gen_ops_inner_prim.py +0 -131
- mindspore/ops_generate/gen_pyboost_func.py +0 -1052
- mindspore/ops_generate/gen_utils.py +0 -209
- mindspore/ops_generate/op_proto.py +0 -145
- mindspore/ops_generate/template.py +0 -261
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/WHEEL +0 -0
- {mindspore-2.4.10.dist-info → mindspore-2.6.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2020-
|
|
1
|
+
# Copyright 2020-2024 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -31,15 +31,18 @@ from mindspore.ops.primitive import PrimitiveWithInfer
|
|
|
31
31
|
from mindspore.ops.primitive import PrimitiveWithCheck
|
|
32
32
|
from mindspore.ops.primitive import prim_attr_register
|
|
33
33
|
from mindspore.run_check._check_version import AscendEnvChecker
|
|
34
|
-
from
|
|
34
|
+
from mindspore._c_expression import pyboost_all_finite
|
|
35
|
+
from mindspore.common._stub_tensor import _convert_stub
|
|
36
|
+
from ..auto_generate import (CeLU, Flatten, LogSoftmax, LogSoftmaxExt, GLU, ReLU, ReLU6, Dense, Tanh,
|
|
35
37
|
Elu, Sigmoid, Softmax, SoftplusExt, HSwish, HSigmoid, AvgPool, BiasAdd,
|
|
36
38
|
NLLLoss, OneHot, GeLU, FastGeLU, PReLU, RmsNorm, IncreFlashAttention, MSELossExt,
|
|
37
39
|
GridSampler3D, GridSampler2D, LayerNorm, LayerNormExt, HShrink, AdamWeightDecay, Dropout,
|
|
38
|
-
ApplyRotaryPosEmb, PagedAttention, PagedAttentionMask, ReshapeAndCache,
|
|
39
|
-
FlashAttentionScore, Embedding, UpsampleNearest1D, UpsampleNearest2D,
|
|
40
|
+
ApplyRotaryPosEmb, GroupTopk, PagedAttention, PagedAttentionMask, ReshapeAndCache,
|
|
41
|
+
FlashAttentionScore, PromptFlashAttention, Embedding, UpsampleNearest1D, UpsampleNearest2D,
|
|
40
42
|
UpsampleNearest3D, UpsampleTrilinear3D,
|
|
41
|
-
UpsampleBilinear2D, UpsampleLinear1D,
|
|
42
|
-
BinaryCrossEntropy, BCEWithLogitsLoss, SoftShrink
|
|
43
|
+
SoftMarginLoss, UpsampleBilinear2D, UpsampleLinear1D,
|
|
44
|
+
BinaryCrossEntropy, BCEWithLogitsLoss, SoftShrink, AdaptiveMaxPool2D,
|
|
45
|
+
SmoothL1Loss)
|
|
43
46
|
from .manually_defined import BatchNorm
|
|
44
47
|
|
|
45
48
|
|
|
@@ -246,78 +249,6 @@ class AdaptiveAvgPool2D(Primitive):
|
|
|
246
249
|
self.add_prim_attr('output_size', self.output_size)
|
|
247
250
|
|
|
248
251
|
|
|
249
|
-
class AdaptiveMaxPool2D(Primitive):
|
|
250
|
-
r"""
|
|
251
|
-
Performs 2D adaptive max pooling on a multi-plane input signal.
|
|
252
|
-
|
|
253
|
-
Refer to :func:`mindspore.ops.adaptive_max_pool2d` for more details.
|
|
254
|
-
|
|
255
|
-
Args:
|
|
256
|
-
output_size (Union[int, tuple]): The target output size. `output_size` can be a tuple :math:`(H, W)`,
|
|
257
|
-
or an int H for :math:`(H, H)`. :math:`H` and :math:`W` can be int or None.
|
|
258
|
-
If it is None, it means the output size is the same as the input size.
|
|
259
|
-
|
|
260
|
-
Inputs:
|
|
261
|
-
- **input_x** (Tensor) - The input of AdaptiveMaxPool2D, which is a 3D or 4D tensor,
|
|
262
|
-
with float16, float32 or float64 data type.
|
|
263
|
-
|
|
264
|
-
Outputs:
|
|
265
|
-
Tensor, with the same type as the `input_x`.
|
|
266
|
-
|
|
267
|
-
Supported Platforms:
|
|
268
|
-
``Ascend`` ``GPU`` ``CPU``
|
|
269
|
-
|
|
270
|
-
Examples:
|
|
271
|
-
>>> # case 1: output_size=(None, 2)
|
|
272
|
-
>>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
|
273
|
-
... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
|
274
|
-
... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
|
|
275
|
-
>>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((None, 2))
|
|
276
|
-
>>> output = adaptive_max_pool_2d(input_x)
|
|
277
|
-
>>> print(output[0])
|
|
278
|
-
[[[[2. 3.]
|
|
279
|
-
[5. 6.]
|
|
280
|
-
[8. 9.]]
|
|
281
|
-
[[2. 3.]
|
|
282
|
-
[5. 6.]
|
|
283
|
-
[8. 9.]]
|
|
284
|
-
[[2. 3.]
|
|
285
|
-
[5. 6.]
|
|
286
|
-
[8. 9.]]]]
|
|
287
|
-
>>> # case 2: output_size=2
|
|
288
|
-
>>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D(2)
|
|
289
|
-
>>> output = adaptive_max_pool_2d(input_x)
|
|
290
|
-
>>> print(output[0])
|
|
291
|
-
[[[[5. 6.]
|
|
292
|
-
[8. 9.]]
|
|
293
|
-
[[5. 6.]
|
|
294
|
-
[8. 9.]]
|
|
295
|
-
[[5. 6.]
|
|
296
|
-
[8. 9.]]]]
|
|
297
|
-
>>> # case 3: output_size=(1, 2)
|
|
298
|
-
>>> adaptive_max_pool_2d = ops.AdaptiveMaxPool2D((1, 2))
|
|
299
|
-
>>> output = adaptive_max_pool_2d(input_x)
|
|
300
|
-
>>> print(output[0])
|
|
301
|
-
[[[[8. 9.]]
|
|
302
|
-
[[8. 9.]]
|
|
303
|
-
[[8. 9.]]]]
|
|
304
|
-
"""
|
|
305
|
-
|
|
306
|
-
@prim_attr_register
|
|
307
|
-
def __init__(self, output_size):
|
|
308
|
-
"""Initialize AdaptiveMaxPool2D."""
|
|
309
|
-
validator.check_value_type("output_size", output_size, [int, tuple], self.name)
|
|
310
|
-
if isinstance(output_size, tuple):
|
|
311
|
-
validator.check_int(len(output_size), 2, validator.EQ,
|
|
312
|
-
'length of output_size', self.name)
|
|
313
|
-
self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
|
|
314
|
-
self.output_size = (-1 if self.output_size[0] is None else self.output_size[0],
|
|
315
|
-
-1 if self.output_size[1] is None else self.output_size[1])
|
|
316
|
-
for size in self.output_size:
|
|
317
|
-
validator.check_number("output_size", size, -1, validator.GE, None)
|
|
318
|
-
self.add_prim_attr('output_size', self.output_size)
|
|
319
|
-
|
|
320
|
-
|
|
321
252
|
class AdaptiveMaxPool3D(Primitive):
|
|
322
253
|
r"""
|
|
323
254
|
Performs 3D adaptive max pooling on a multi-plane input signal.
|
|
@@ -612,12 +543,12 @@ class InstanceNorm(PrimitiveWithInfer):
|
|
|
612
543
|
Inputs:
|
|
613
544
|
- **input_x** (Tensor) - The input of InstanceNorm, Tensor of shape :math:`(N, C)`,
|
|
614
545
|
data type: float16 or float32.
|
|
615
|
-
- **gamma** (Parameter) - Scale, Tensor of shape :math:`(C,)`,
|
|
546
|
+
- **gamma** (Union[Parameter, Tensor])) - Scale, Tensor of shape :math:`(C,)`,
|
|
616
547
|
data type: float32.
|
|
617
|
-
- **beta** (Parameter) - Bias, Tensor of shape :math:`(C,)`,
|
|
548
|
+
- **beta** (Union[Parameter, Tensor])) - Bias, Tensor of shape :math:`(C,)`,
|
|
618
549
|
data type: float32.
|
|
619
|
-
- **mean** (Parameter) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
620
|
-
- **variance** (Parameter) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
550
|
+
- **mean** (Union[Parameter, Tensor])) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
551
|
+
- **variance** (Union[Parameter, Tensor])) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
621
552
|
|
|
622
553
|
Outputs:
|
|
623
554
|
Tuple of 3 Tensors, the normalized input, the updated parameters.
|
|
@@ -880,13 +811,13 @@ class Conv2D(Primitive):
|
|
|
880
811
|
|
|
881
812
|
Inputs:
|
|
882
813
|
- **x** (Tensor) - Input tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})` or
|
|
883
|
-
:math:`(N, H_{in}, W_{in}, C_{in}
|
|
814
|
+
:math:`(N, H_{in}, W_{in}, C_{in})` depending on `data_format` .
|
|
884
815
|
- **weight** (Tensor) - The convolutional kernel value, it should has shape
|
|
885
816
|
:math:`(C_{out}, C_{in} / \text{group}, \text{kernel_size[0]}, \text{kernel_size[1]})` .
|
|
886
817
|
|
|
887
818
|
Outputs:
|
|
888
819
|
Tensor, the value that applied 2D convolution. The shape is :math:`(N, C_{out}, H_{out}, W_{out})`
|
|
889
|
-
or :math:`(N, H_{out}, W_{out}, C_{out}
|
|
820
|
+
or :math:`(N, H_{out}, W_{out}, C_{out})`.
|
|
890
821
|
To see how different pad modes affect the output shape, please refer to
|
|
891
822
|
:class:`mindspore.nn.Conv2d` for more details.
|
|
892
823
|
|
|
@@ -2052,17 +1983,18 @@ class Conv2DTranspose(Conv2DBackpropInput):
|
|
|
2052
1983
|
If this mode is set, `pad` must be greater than or equal to 0.
|
|
2053
1984
|
|
|
2054
1985
|
Please refer to :class:`mindspore.nn.Conv2dTranspose` for more specifications about `pad_mode`.
|
|
2055
|
-
pad (Union[int, tuple[int]]): The pad value to be filled. Default: ``0`` .
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
|
|
2061
|
-
|
|
2062
|
-
|
|
1986
|
+
pad (Union[int, tuple[int]], optional): The pad value to be filled. Default: ``0`` .
|
|
1987
|
+
If `pad` is an integer, the paddings
|
|
1988
|
+
of top, bottom, left and right are the same, equal to pad. If `pad` is a tuple of four integers,
|
|
1989
|
+
the padding of top, bottom, left and right equal to pad[0], pad[1], pad[2], and pad[3]
|
|
1990
|
+
correspondingly.
|
|
1991
|
+
pad_list (Union[str, None], optional): The pad list like (top, bottom, left, right). Default: ``None`` .
|
|
1992
|
+
mode (int, optional): Modes for different convolutions. The value is currently not used. Default: ``1`` .
|
|
1993
|
+
stride (Union[int, tuple[int]], optional): The stride to be applied to the convolution filter. Default: ``1`` .
|
|
1994
|
+
dilation (Union[int, tuple[int]], optional): Specifies the dilation rate to be used for the dilated convolution.
|
|
2063
1995
|
Default: ``1`` .
|
|
2064
|
-
group (int): Splits input into groups. Default: ``1`` .
|
|
2065
|
-
data_format (str): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
|
|
1996
|
+
group (int, optional): Splits input into groups. Default: ``1`` .
|
|
1997
|
+
data_format (str, optional): The format of input and output data. It should be ``'NHWC'`` or ``'NCHW'`` .
|
|
2066
1998
|
Default is ``'NCHW'`` .
|
|
2067
1999
|
|
|
2068
2000
|
Inputs:
|
|
@@ -2130,7 +2062,7 @@ class SoftmaxCrossEntropyWithLogits(Primitive):
|
|
|
2130
2062
|
- **labels** (Tensor) - Ground truth labels, with shape :math:`(N, C)`, has the same data type with `logits`.
|
|
2131
2063
|
|
|
2132
2064
|
Outputs:
|
|
2133
|
-
Tuple of 2 tensors(loss, dlogits), the `loss` shape is :math:`(N,)`,
|
|
2065
|
+
Tuple of 2 tensors( `loss` , `dlogits` ), the `loss` shape is :math:`(N,)`,
|
|
2134
2066
|
and the `dlogits` with the same shape as `logits`.
|
|
2135
2067
|
|
|
2136
2068
|
Raises:
|
|
@@ -2164,7 +2096,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
|
|
|
2164
2096
|
r"""
|
|
2165
2097
|
Computes the softmax cross-entropy value between logits and sparse encoding labels.
|
|
2166
2098
|
|
|
2167
|
-
Sets input logits as `X`, input label as `Y`, output as `loss`.
|
|
2099
|
+
Sets input logits as `X`, input label as `Y`, output as `loss`. The formula is as follows:
|
|
2168
2100
|
|
|
2169
2101
|
.. math::
|
|
2170
2102
|
\begin{array}{ll} \\
|
|
@@ -2174,7 +2106,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
|
|
|
2174
2106
|
\end{array}
|
|
2175
2107
|
|
|
2176
2108
|
Args:
|
|
2177
|
-
is_grad (bool): If ``True`` , this operation returns the computed gradient. Default: ``False`` .
|
|
2109
|
+
is_grad (bool, optional): If ``True`` , this operation returns the computed gradient. Default: ``False`` .
|
|
2178
2110
|
|
|
2179
2111
|
Inputs:
|
|
2180
2112
|
- **logits** (Tensor) - Input logits, with shape :math:`(N, C)`. Data type must be float16 or float32.
|
|
@@ -2182,7 +2114,7 @@ class SparseSoftmaxCrossEntropyWithLogits(Primitive):
|
|
|
2182
2114
|
Data type must be int32 or int64.
|
|
2183
2115
|
|
|
2184
2116
|
Outputs:
|
|
2185
|
-
Tensor, if `is_grad` is False
|
|
2117
|
+
Tensor, if `is_grad` is ``False``, the output tensor is the value of loss;
|
|
2186
2118
|
if `is_grad` is ``True`` , the output tensor is the gradient of input with the same shape as `logits`.
|
|
2187
2119
|
|
|
2188
2120
|
Raises:
|
|
@@ -2281,15 +2213,15 @@ class ApplyMomentum(Primitive):
|
|
|
2281
2213
|
Refer to :class:`mindspore.nn.Momentum` for more details about the formula and usage.
|
|
2282
2214
|
|
|
2283
2215
|
Args:
|
|
2284
|
-
use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
|
|
2216
|
+
use_locking (bool, optional): Whether to enable a lock to protect the variable and accumulation tensors
|
|
2285
2217
|
from being updated. Default: ``False`` .
|
|
2286
|
-
use_nesterov (bool): Enable Nesterov momentum. Default: ``False`` .
|
|
2287
|
-
gradient_scale (float): The scale of the gradient. Default: ``1.0`` .
|
|
2218
|
+
use_nesterov (bool, optional): Enable Nesterov momentum. Default: ``False`` .
|
|
2219
|
+
gradient_scale (float, optional): The scale of the gradient. Default: ``1.0`` .
|
|
2288
2220
|
|
|
2289
2221
|
Inputs:
|
|
2290
|
-
- **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float,
|
|
2291
|
-
int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
|
|
2292
|
-
- **accumulation** (Parameter) - Accumulated gradient value by moment weight,
|
|
2222
|
+
- **variable** (Union[Parameter, Tensor]) - Weights to be updated. Data type must be float64, int64, float,
|
|
2223
|
+
float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
|
|
2224
|
+
- **accumulation** (Union[Parameter, Tensor]) - Accumulated gradient value by moment weight,
|
|
2293
2225
|
has the same data type with `variable`.
|
|
2294
2226
|
- **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float,
|
|
2295
2227
|
float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
|
|
@@ -2306,7 +2238,7 @@ class ApplyMomentum(Primitive):
|
|
|
2306
2238
|
|
|
2307
2239
|
Raises:
|
|
2308
2240
|
TypeError: If the `use_locking` or `use_nesterov` is not a bool or `gradient_scale` is not a float.
|
|
2309
|
-
TypeError: If the data type of `var`, `accum` and `grad` conversion
|
|
2241
|
+
TypeError: If the data type of `var`, `accum` and `grad` conversion is not supported.
|
|
2310
2242
|
|
|
2311
2243
|
Supported Platforms:
|
|
2312
2244
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -2354,55 +2286,6 @@ class ApplyMomentum(Primitive):
|
|
|
2354
2286
|
self.add_prim_attr('side_effect_mem', True)
|
|
2355
2287
|
|
|
2356
2288
|
|
|
2357
|
-
class SmoothL1Loss(Primitive):
|
|
2358
|
-
r"""
|
|
2359
|
-
Calculate the smooth L1 loss, and the L1 loss function has robustness.
|
|
2360
|
-
|
|
2361
|
-
Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
|
|
2362
|
-
|
|
2363
|
-
Args:
|
|
2364
|
-
beta (float, optional): A parameter used to control the point where the function will change between
|
|
2365
|
-
L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
|
|
2366
|
-
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
2367
|
-
``'sum'`` . Default: ``'none'`` .
|
|
2368
|
-
|
|
2369
|
-
- ``'none'``: no reduction will be applied.
|
|
2370
|
-
- ``'mean'``: compute and return the mean of elements in the output.
|
|
2371
|
-
- ``'sum'``: the output elements will be summed.
|
|
2372
|
-
|
|
2373
|
-
Inputs:
|
|
2374
|
-
- **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64.
|
|
2375
|
-
- **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`.
|
|
2376
|
-
|
|
2377
|
-
Outputs:
|
|
2378
|
-
Tensor, loss float tensor, same shape and dtype as the `logits`.
|
|
2379
|
-
|
|
2380
|
-
Supported Platforms:
|
|
2381
|
-
``Ascend`` ``GPU`` ``CPU``
|
|
2382
|
-
|
|
2383
|
-
Examples:
|
|
2384
|
-
>>> import mindspore
|
|
2385
|
-
>>> import numpy as np
|
|
2386
|
-
>>> from mindspore import Tensor, ops
|
|
2387
|
-
>>> loss = ops.SmoothL1Loss()
|
|
2388
|
-
>>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
|
|
2389
|
-
>>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
|
|
2390
|
-
>>> output = loss(logits, labels)
|
|
2391
|
-
>>> print(output)
|
|
2392
|
-
[0. 0. 0.5]
|
|
2393
|
-
"""
|
|
2394
|
-
|
|
2395
|
-
@prim_attr_register
|
|
2396
|
-
def __init__(self, beta=1.0, reduction='none'):
|
|
2397
|
-
"""Initialize SmoothL1Loss."""
|
|
2398
|
-
validator.check_value_type('beta', beta, [float], self.name)
|
|
2399
|
-
validator.check('beta', beta, '', 0, validator.GT, self.name)
|
|
2400
|
-
validator.check_string(
|
|
2401
|
-
reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
|
|
2402
|
-
self.add_prim_attr('sigma', self.beta)
|
|
2403
|
-
self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
|
|
2404
|
-
|
|
2405
|
-
|
|
2406
2289
|
class MultiMarginLoss(Primitive):
|
|
2407
2290
|
r"""
|
|
2408
2291
|
Creates a loss function that minimizes the hinge loss
|
|
@@ -2470,63 +2353,6 @@ class MultiMarginLoss(Primitive):
|
|
|
2470
2353
|
return super().__call__(x, target, weight)
|
|
2471
2354
|
|
|
2472
2355
|
|
|
2473
|
-
class SoftMarginLoss(Primitive):
|
|
2474
|
-
r"""
|
|
2475
|
-
SoftMarginLoss operation.
|
|
2476
|
-
|
|
2477
|
-
Creates a criterion that optimizes a two-class classification
|
|
2478
|
-
logistic loss between input tensor :math:`x` and target tensor :math:`y`
|
|
2479
|
-
(containing 1 or -1).
|
|
2480
|
-
|
|
2481
|
-
.. math::
|
|
2482
|
-
\text{loss}(x, y) = \sum_i \frac{\log(1 + \exp(-y[i]*x[i]))}{\text{x.nelement}()}
|
|
2483
|
-
|
|
2484
|
-
where :math:`x.nelement()` is the number of elements of x.
|
|
2485
|
-
|
|
2486
|
-
Args:
|
|
2487
|
-
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
2488
|
-
``'sum'`` . Default: ``'mean'`` .
|
|
2489
|
-
|
|
2490
|
-
- ``'none'``: no reduction will be applied.
|
|
2491
|
-
- ``'mean'``: compute and return the mean of elements in the output.
|
|
2492
|
-
- ``'sum'``: the output elements will be summed.
|
|
2493
|
-
|
|
2494
|
-
Inputs:
|
|
2495
|
-
- **logits** (Tensor) - Predict data. Data type must be float16 or float32.
|
|
2496
|
-
- **labels** (Tensor) - Ground truth data, with the same type and shape as `logits`.
|
|
2497
|
-
|
|
2498
|
-
Outputs:
|
|
2499
|
-
Tensor or Scalar, if `reduction` is ``"none"``, its shape is the same as `logits`.
|
|
2500
|
-
Otherwise, a scalar value will be returned.
|
|
2501
|
-
|
|
2502
|
-
Raises:
|
|
2503
|
-
TypeError: If `logits` or `labels` is not a Tensor.
|
|
2504
|
-
TypeError: If dtype of `logits` or `labels` is neither float16 nor float32.
|
|
2505
|
-
ValueError: If shape of `logits` is not the same as `labels`.
|
|
2506
|
-
ValueError: If `reduction` is not one of ``"none"`` , ``"mean"`` or ``"sum"`` .
|
|
2507
|
-
|
|
2508
|
-
Supported Platforms:
|
|
2509
|
-
``Ascend`` ``GPU``
|
|
2510
|
-
|
|
2511
|
-
Examples:
|
|
2512
|
-
>>> import mindspore
|
|
2513
|
-
>>> import numpy as np
|
|
2514
|
-
>>> from mindspore import Tensor, ops
|
|
2515
|
-
>>> loss = ops.SoftMarginLoss()
|
|
2516
|
-
>>> logits = Tensor(np.array([[0.3, 0.7], [0.5, 0.5]]), mindspore.float32)
|
|
2517
|
-
>>> labels = Tensor(np.array([[-1, 1], [1, -1]]), mindspore.float32)
|
|
2518
|
-
>>> output = loss(logits, labels)
|
|
2519
|
-
>>> print(output)
|
|
2520
|
-
0.6764238
|
|
2521
|
-
"""
|
|
2522
|
-
|
|
2523
|
-
@prim_attr_register
|
|
2524
|
-
def __init__(self, reduction="mean"):
|
|
2525
|
-
"""Initialize SoftMarginLoss"""
|
|
2526
|
-
self.init_prim_io_names(inputs=['predict', 'label'], outputs=['loss'])
|
|
2527
|
-
self.reduction = validator.check_string(reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
2356
|
class L2Loss(Primitive):
|
|
2531
2357
|
r"""
|
|
2532
2358
|
Calculates half of the L2 norm, but do not square the result.
|
|
@@ -2790,12 +2616,12 @@ class ApplyRMSProp(PrimitiveWithInfer):
|
|
|
2790
2616
|
:math:`\eta` represents `learning_rate`. :math:`\nabla Q_{i}(w)` represents `grad`.
|
|
2791
2617
|
|
|
2792
2618
|
.. warning::
|
|
2793
|
-
Note that in dense implementation of this algorithm,
|
|
2794
|
-
but in this sparse implementation,
|
|
2795
|
-
in iterations during which
|
|
2619
|
+
Note that in dense implementation of this algorithm, `mean_square` and `moment` will update even if `grad` is 0,
|
|
2620
|
+
but in this sparse implementation, `mean_square` and `moment` will not update
|
|
2621
|
+
in iterations during which `grad` is 0.
|
|
2796
2622
|
|
|
2797
2623
|
Args:
|
|
2798
|
-
use_locking (bool): Whether to enable a lock to protect the variable and accumulation tensors
|
|
2624
|
+
use_locking (bool, optional): Whether to enable a lock to protect the variable and accumulation tensors
|
|
2799
2625
|
from being updated. Default: ``False`` .
|
|
2800
2626
|
|
|
2801
2627
|
Inputs:
|
|
@@ -3453,7 +3279,7 @@ class ComputeAccidentalHits(Primitive):
|
|
|
3453
3279
|
the weight is FLOAT_MAX. FLOAT_MAX indicates the max value in the type of Float
|
|
3454
3280
|
|
|
3455
3281
|
Args:
|
|
3456
|
-
num_true (int): The number of target classes per training example. Default: ``1`` .
|
|
3282
|
+
num_true (int, optional): The number of target classes per training example. Default: ``1`` .
|
|
3457
3283
|
|
|
3458
3284
|
Inputs:
|
|
3459
3285
|
- **true_classes** (Tensor) - The target classes. With data type of int64
|
|
@@ -3610,11 +3436,11 @@ class Adam(Primitive):
|
|
|
3610
3436
|
If ``False`` , update the gradients without using NAG. Default: ``False`` .
|
|
3611
3437
|
|
|
3612
3438
|
Inputs:
|
|
3613
|
-
- **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
|
|
3439
|
+
- **var** (Union[Parameter, Tensor]) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
|
|
3614
3440
|
any number of additional dimensions. The data type can be float16 or float32.
|
|
3615
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula,
|
|
3441
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
|
|
3616
3442
|
the shape should be the same as `var`.
|
|
3617
|
-
- **v** (Parameter) - the 2nd moment vector in the updating formula,
|
|
3443
|
+
- **v** (Union[Parameter, Tensor]) - the 2nd moment vector in the updating formula,
|
|
3618
3444
|
the shape should be the same as `var`.
|
|
3619
3445
|
- **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
|
|
3620
3446
|
- **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
|
|
@@ -3785,8 +3611,8 @@ class AdamNoUpdateParam(Primitive):
|
|
|
3785
3611
|
|
|
3786
3612
|
class FusedSparseAdam(Primitive):
|
|
3787
3613
|
r"""
|
|
3788
|
-
Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation
|
|
3789
|
-
algorithm. This operator is used when the gradient is sparse.
|
|
3614
|
+
Merges the duplicate value of the gradient and then updates parameters or tensors by the Adaptive Moment Estimation
|
|
3615
|
+
(Adam) algorithm. This operator is used when the gradient is sparse.
|
|
3790
3616
|
|
|
3791
3617
|
The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
|
|
3792
3618
|
|
|
@@ -3819,11 +3645,12 @@ class FusedSparseAdam(Primitive):
|
|
|
3819
3645
|
If ``False`` , update the gradients without using NAG. Default: ``False`` .
|
|
3820
3646
|
|
|
3821
3647
|
Inputs:
|
|
3822
|
-
- **var** (Parameter) - Parameters to be updated with float32 data type. The shape is
|
|
3823
|
-
where :math:`*` means, any number of additional dimensions.
|
|
3824
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3825
|
-
|
|
3826
|
-
|
|
3648
|
+
- **var** (Union[Parameter, Tensor]) - Parameters or tensors to be updated with float32 data type. The shape is:
|
|
3649
|
+
math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
3650
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3651
|
+
type as `var`.
|
|
3652
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula, has the same shape and data
|
|
3653
|
+
type as `var`. Mean square gradients, has the same type as `var` with float32 data type.
|
|
3827
3654
|
- **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
|
|
3828
3655
|
The shape is :math:`(1, )`.
|
|
3829
3656
|
- **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
|
|
@@ -3841,7 +3668,7 @@ class FusedSparseAdam(Primitive):
|
|
|
3841
3668
|
- **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
|
|
3842
3669
|
|
|
3843
3670
|
Outputs:
|
|
3844
|
-
Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
|
|
3671
|
+
Tuple of 3 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
3845
3672
|
|
|
3846
3673
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
3847
3674
|
- **m** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -3911,8 +3738,8 @@ class FusedSparseAdam(Primitive):
|
|
|
3911
3738
|
|
|
3912
3739
|
class FusedSparseLazyAdam(Primitive):
|
|
3913
3740
|
r"""
|
|
3914
|
-
Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation
|
|
3915
|
-
algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
|
|
3741
|
+
Merges the duplicate value of the gradient and then updates parameters or tensors by the Adaptive Moment Estimation
|
|
3742
|
+
(Adam) algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
|
|
3916
3743
|
original Adam algorithm, as only the current indices parameters will be updated.
|
|
3917
3744
|
|
|
3918
3745
|
The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
|
|
@@ -3946,11 +3773,12 @@ class FusedSparseLazyAdam(Primitive):
|
|
|
3946
3773
|
If ``False`` , update the gradients without using NAG. Default: ``False`` .
|
|
3947
3774
|
|
|
3948
3775
|
Inputs:
|
|
3949
|
-
- **var** (Parameter) - Parameters to be updated with float32 data type. The shape is
|
|
3950
|
-
where :math:`*` means, any number of additional dimensions.
|
|
3951
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3952
|
-
|
|
3953
|
-
|
|
3776
|
+
- **var** (Union[Parameter, Tensor]) - Parameters or tensors to be updated with float32 data type. The shape is:
|
|
3777
|
+
math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
3778
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3779
|
+
type as `var`.
|
|
3780
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula, has the same shape and data
|
|
3781
|
+
type as `var`. Mean square gradients, has the same type as `var` with float32 data type.
|
|
3954
3782
|
- **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
|
|
3955
3783
|
The shape is :math:`(1, )`.
|
|
3956
3784
|
- **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
|
|
@@ -3968,7 +3796,7 @@ class FusedSparseLazyAdam(Primitive):
|
|
|
3968
3796
|
- **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
|
|
3969
3797
|
|
|
3970
3798
|
Outputs:
|
|
3971
|
-
Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
|
|
3799
|
+
Tuple of 3 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
3972
3800
|
|
|
3973
3801
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
3974
3802
|
- **m** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -4054,17 +3882,18 @@ class FusedSparseFtrl(Primitive):
|
|
|
4054
3882
|
use_locking (bool): Use locks for updating operation if True . Default: ``False`` .
|
|
4055
3883
|
|
|
4056
3884
|
Inputs:
|
|
4057
|
-
- **var** (Parameter) - The variable to be updated. The data type must be float32. The shape is
|
|
4058
|
-
where :math:`*` means, any number of additional dimensions.
|
|
4059
|
-
- **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
|
|
4060
|
-
- **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as
|
|
3885
|
+
- **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float32. The shape is:
|
|
3886
|
+
math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
3887
|
+
- **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same type and shape as `var`.
|
|
3888
|
+
- **linear** (Union[Parameter, Tensor]) - the linear coefficient to be updated, must be same type and shape as
|
|
3889
|
+
`var`.
|
|
4061
3890
|
- **grad** (Tensor) - A tensor of the same type as `var` and
|
|
4062
3891
|
grad.shape[1:] = var.shape[1:] if var.shape > 1.
|
|
4063
3892
|
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
|
|
4064
3893
|
The type must be int32 and indices.shape[0] = grad.shape[0].
|
|
4065
3894
|
|
|
4066
3895
|
Outputs:
|
|
4067
|
-
Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
|
|
3896
|
+
Tuple of 3 Tensor, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
4068
3897
|
|
|
4069
3898
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
4070
3899
|
- **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -4151,9 +3980,10 @@ class FusedSparseProximalAdagrad(Primitive):
|
|
|
4151
3980
|
Default: ``False`` .
|
|
4152
3981
|
|
|
4153
3982
|
Inputs:
|
|
4154
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
|
|
3983
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be float32.
|
|
4155
3984
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4156
|
-
- **accum** (Parameter) - Variable tensor to be updated, has the same shape and data type as
|
|
3985
|
+
- **accum** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same shape and data type as
|
|
3986
|
+
`var`.
|
|
4157
3987
|
- **lr** (Tensor) - The learning rate value. The data type must be float32. The shape is :math:`(1, )`.
|
|
4158
3988
|
- **l1** (Tensor) - l1 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
|
|
4159
3989
|
- **l2** (Tensor) - l2 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
|
|
@@ -4163,7 +3993,7 @@ class FusedSparseProximalAdagrad(Primitive):
|
|
|
4163
3993
|
The type must be int32 and indices.shape[0] = grad.shape[0].
|
|
4164
3994
|
|
|
4165
3995
|
Outputs:
|
|
4166
|
-
Tuple of 2 Tensors, this operator will update the input parameters directly, the outputs are useless.
|
|
3996
|
+
Tuple of 2 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
4167
3997
|
|
|
4168
3998
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
4169
3999
|
- **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -4254,7 +4084,7 @@ class KLDivLoss(Primitive):
|
|
|
4254
4084
|
or ``'sum'``.
|
|
4255
4085
|
|
|
4256
4086
|
Args:
|
|
4257
|
-
reduction (str): Specifies the reduction to be applied to the output.
|
|
4087
|
+
reduction (str, optional): Specifies the reduction to be applied to the output.
|
|
4258
4088
|
Default: ``'mean'`` .
|
|
4259
4089
|
|
|
4260
4090
|
- ``'none'``: no reduction will be applied.
|
|
@@ -4275,7 +4105,7 @@ class KLDivLoss(Primitive):
|
|
|
4275
4105
|
TypeError: If neither `logits` nor `labels` is a Tensor.
|
|
4276
4106
|
TypeError: If dtype of `logits` or `labels` is not currently supported.
|
|
4277
4107
|
ValueError: If shape of `logits` is not the same as `labels`.
|
|
4278
|
-
RuntimeError: If `logits` or `labels` is a scalar when `reduction` is 'batchmean'
|
|
4108
|
+
RuntimeError: If `logits` or `labels` is a scalar when `reduction` is ``'batchmean'``.
|
|
4279
4109
|
|
|
4280
4110
|
Supported Platforms:
|
|
4281
4111
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -4342,11 +4172,11 @@ class ApplyAdaMax(Primitive):
|
|
|
4342
4172
|
the relatively highest priority data type.
|
|
4343
4173
|
|
|
4344
4174
|
Inputs:
|
|
4345
|
-
- **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
|
|
4175
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float32 or float16 data type.
|
|
4346
4176
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4347
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape as `var`.
|
|
4177
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape as `var`.
|
|
4348
4178
|
With float32 or float16 data type.
|
|
4349
|
-
- **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients
|
|
4179
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula. Mean square gradients
|
|
4350
4180
|
with the same shape as `var`. With float32 or float16 data type.
|
|
4351
4181
|
- **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be a scalar.
|
|
4352
4182
|
With float32 or float16 data type.
|
|
@@ -4362,7 +4192,7 @@ class ApplyAdaMax(Primitive):
|
|
|
4362
4192
|
With float32 or float16 data type.
|
|
4363
4193
|
|
|
4364
4194
|
Outputs:
|
|
4365
|
-
Tuple of 3 Tensor, the updated parameters.
|
|
4195
|
+
Tuple of 3 Tensor, the updated parameters or tensors.
|
|
4366
4196
|
|
|
4367
4197
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4368
4198
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -4456,10 +4286,11 @@ class ApplyAdadelta(Primitive):
|
|
|
4456
4286
|
the relatively highest priority data type.
|
|
4457
4287
|
|
|
4458
4288
|
Inputs:
|
|
4459
|
-
- **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
|
|
4289
|
+
- **var** (Union[Parameter, Tensor]) - Weights to be updated. With float32 or float16 data type.
|
|
4460
4290
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4461
|
-
- **accum** (Parameter) - Accumulation to be updated, has the same shape and data type as `var`.
|
|
4462
|
-
- **accum_update** (Parameter) - Accum_update to be updated, has the same shape and data type as
|
|
4291
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated, has the same shape and data type as `var`.
|
|
4292
|
+
- **accum_update** (Union[Parameter, Tensor]) - Accum_update to be updated, has the same shape and data type as
|
|
4293
|
+
`var`.
|
|
4463
4294
|
- **lr** (Union[Number, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
|
|
4464
4295
|
- **rho** (Union[Number, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
|
|
4465
4296
|
- **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar.
|
|
@@ -4467,7 +4298,7 @@ class ApplyAdadelta(Primitive):
|
|
|
4467
4298
|
- **grad** (Tensor) - Gradients, has the same shape and data type as `var`.
|
|
4468
4299
|
|
|
4469
4300
|
Outputs:
|
|
4470
|
-
Tuple of 3 Tensor, the updated parameters.
|
|
4301
|
+
Tuple of 3 Tensor, the updated parameters or tensors.
|
|
4471
4302
|
|
|
4472
4303
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4473
4304
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4558,14 +4389,14 @@ class ApplyAdagrad(Primitive):
|
|
|
4558
4389
|
update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
|
|
4559
4390
|
|
|
4560
4391
|
Inputs:
|
|
4561
|
-
- **var** (Parameter) - Variable to be updated. With float or complex data type.
|
|
4392
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float or complex data type.
|
|
4562
4393
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4563
|
-
- **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4394
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4564
4395
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type.
|
|
4565
4396
|
- **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
|
|
4566
4397
|
|
|
4567
4398
|
Outputs:
|
|
4568
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4399
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
4569
4400
|
|
|
4570
4401
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4571
4402
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4645,15 +4476,15 @@ class ApplyAdagradV2(Primitive):
|
|
|
4645
4476
|
update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
|
|
4646
4477
|
|
|
4647
4478
|
Inputs:
|
|
4648
|
-
- **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
|
|
4479
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float16 or float32 data type.
|
|
4649
4480
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4650
|
-
- **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4481
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4651
4482
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
|
|
4652
4483
|
a scalar tensor with float16 or float32 data type.
|
|
4653
4484
|
- **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
|
|
4654
4485
|
|
|
4655
4486
|
Outputs:
|
|
4656
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4487
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
4657
4488
|
|
|
4658
4489
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4659
4490
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4751,14 +4582,15 @@ class SparseApplyAdagradV2(Primitive):
|
|
|
4751
4582
|
Args:
|
|
4752
4583
|
lr (float): Learning rate.
|
|
4753
4584
|
epsilon (float): A small value added for numerical stability.
|
|
4754
|
-
use_locking (bool): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
|
|
4585
|
+
use_locking (bool, optional): If ``True`` , the `var` and `accum` tensors will be protected from being updated.
|
|
4755
4586
|
Default: ``False`` .
|
|
4756
|
-
update_slots (bool): If ``True`` , the computation logic will be different to `False`.
|
|
4587
|
+
update_slots (bool, optional): If ``True`` , the computation logic will be different to `False`.
|
|
4588
|
+
Default: ``True`` .
|
|
4757
4589
|
|
|
4758
4590
|
Inputs:
|
|
4759
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
4591
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
4760
4592
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4761
|
-
- **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4593
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4762
4594
|
- **grad** (Tensor) - Gradients has the same shape as `var` and
|
|
4763
4595
|
:math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
|
|
4764
4596
|
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
|
|
@@ -4766,7 +4598,7 @@ class SparseApplyAdagradV2(Primitive):
|
|
|
4766
4598
|
must be unique. Otherwise, the result is unpredictable.
|
|
4767
4599
|
|
|
4768
4600
|
Outputs:
|
|
4769
|
-
Tuple of 2 tensors, the updated parameters.
|
|
4601
|
+
Tuple of 2 tensors, the updated parameters or tensors.
|
|
4770
4602
|
|
|
4771
4603
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4772
4604
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4842,13 +4674,14 @@ class ApplyProximalAdagrad(Primitive):
|
|
|
4842
4674
|
the relatively highest priority data type.
|
|
4843
4675
|
|
|
4844
4676
|
Args:
|
|
4845
|
-
use_locking (bool): If ``True`` , the var and accumulation tensors will be protected
|
|
4846
|
-
Default: ``False`` .
|
|
4677
|
+
use_locking (bool, optional): If ``True`` , the var and accumulation tensors will be protected
|
|
4678
|
+
from being updated. Default: ``False`` .
|
|
4847
4679
|
|
|
4848
4680
|
Inputs:
|
|
4849
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
4681
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
4850
4682
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4851
|
-
- **accum** (Parameter) - Accumulation to be updated, must have the same shape and dtype as
|
|
4683
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated, must have the same shape and dtype as
|
|
4684
|
+
`var`.
|
|
4852
4685
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. The data type must be
|
|
4853
4686
|
float16 or float32.
|
|
4854
4687
|
- **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar. The data type must be
|
|
@@ -4858,7 +4691,7 @@ class ApplyProximalAdagrad(Primitive):
|
|
|
4858
4691
|
- **grad** (Tensor) - Gradient with the same shape and dtype as `var`.
|
|
4859
4692
|
|
|
4860
4693
|
Outputs:
|
|
4861
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4694
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
4862
4695
|
|
|
4863
4696
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4864
4697
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4943,9 +4776,9 @@ class SparseApplyProximalAdagrad(Primitive):
|
|
|
4943
4776
|
Default: ``False`` .
|
|
4944
4777
|
|
|
4945
4778
|
Inputs:
|
|
4946
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32.
|
|
4779
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be float16 or float32.
|
|
4947
4780
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4948
|
-
- **accum** (
|
|
4781
|
+
- **accum** (Parameterv) - Variable tensor to be updated, has the same shape as `var`.
|
|
4949
4782
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
|
|
4950
4783
|
a scalar tensor with float16 or float32 data type. It must be positive.
|
|
4951
4784
|
- **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
|
|
@@ -4959,7 +4792,7 @@ class SparseApplyProximalAdagrad(Primitive):
|
|
|
4959
4792
|
following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`.
|
|
4960
4793
|
|
|
4961
4794
|
Outputs:
|
|
4962
|
-
Tuple of 2 tensors, the updated parameters.
|
|
4795
|
+
Tuple of 2 tensors, the updated parameters or tensors.
|
|
4963
4796
|
|
|
4964
4797
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4965
4798
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -5045,9 +4878,9 @@ class ApplyAddSign(Primitive):
|
|
|
5045
4878
|
the relatively highest priority data type.
|
|
5046
4879
|
|
|
5047
4880
|
Inputs:
|
|
5048
|
-
- **var** (Parameter) - Variable tensor to be updated.
|
|
4881
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated.
|
|
5049
4882
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5050
|
-
- **m** (Parameter) - Variable tensor to be updated, has the same data type as `var`.
|
|
4883
|
+
- **m** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same data type as `var`.
|
|
5051
4884
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
|
|
5052
4885
|
- **alpha** (Union[Number, Tensor]) - Must be a scalar.
|
|
5053
4886
|
- **sign_decay** (Union[Number, Tensor]) - Must be a scalar.
|
|
@@ -5055,7 +4888,7 @@ class ApplyAddSign(Primitive):
|
|
|
5055
4888
|
- **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
|
|
5056
4889
|
|
|
5057
4890
|
Outputs:
|
|
5058
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4891
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
5059
4892
|
|
|
5060
4893
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
5061
4894
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -5144,10 +4977,10 @@ class ApplyPowerSign(Primitive):
|
|
|
5144
4977
|
On Ascend, input data type of float64 is currently not supported.
|
|
5145
4978
|
|
|
5146
4979
|
Inputs:
|
|
5147
|
-
- **var** (Parameter) - Variable tensor to be updated. With float64, float32 or float16 data
|
|
5148
|
-
If data type of `var` is float16, all inputs must have the same data type as `var`.
|
|
4980
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float64, float32 or float16 data
|
|
4981
|
+
type. If data type of `var` is float16, all inputs must have the same data type as `var`.
|
|
5149
4982
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5150
|
-
- **m** (Parameter) - Variable tensor to be updated, has the same shape as `var`.
|
|
4983
|
+
- **m** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same shape as `var`.
|
|
5151
4984
|
- **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar or Tensor
|
|
5152
4985
|
with float64, float32 or float16 data type.
|
|
5153
4986
|
- **logbase** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or float16 data type.
|
|
@@ -5158,7 +4991,7 @@ class ApplyPowerSign(Primitive):
|
|
|
5158
4991
|
- **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
|
|
5159
4992
|
|
|
5160
4993
|
Outputs:
|
|
5161
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4994
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
5162
4995
|
|
|
5163
4996
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
5164
4997
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -5235,7 +5068,7 @@ class ApplyGradientDescent(Primitive):
|
|
|
5235
5068
|
the relatively highest priority data type.
|
|
5236
5069
|
|
|
5237
5070
|
Inputs:
|
|
5238
|
-
- **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5071
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5239
5072
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5240
5073
|
- **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
|
|
5241
5074
|
- **delta** (Tensor) - A tensor for the change, has the same shape as `var`.
|
|
@@ -5304,7 +5137,7 @@ class ApplyProximalGradientDescent(Primitive):
|
|
|
5304
5137
|
the relatively highest priority data type.
|
|
5305
5138
|
|
|
5306
5139
|
Inputs:
|
|
5307
|
-
- **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5140
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5308
5141
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5309
5142
|
- **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
|
|
5310
5143
|
- **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar.
|
|
@@ -5448,10 +5281,10 @@ class ApplyFtrl(Primitive):
|
|
|
5448
5281
|
use_locking (bool): Use locks for updating operation if ``True`` . Default: ``False`` .
|
|
5449
5282
|
|
|
5450
5283
|
Inputs:
|
|
5451
|
-
- **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
|
|
5284
|
+
- **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float16 or float32.
|
|
5452
5285
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5453
|
-
- **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
|
|
5454
|
-
- **linear** (Parameter) - The linear coefficient to be updated, must be same shape as `var`.
|
|
5286
|
+
- **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same shape as `var`.
|
|
5287
|
+
- **linear** (Union[Parameter, Tensor]) - The linear coefficient to be updated, must be same shape as `var`.
|
|
5455
5288
|
- **grad** (Tensor) - Gradient. The data type must be float16 or float32.
|
|
5456
5289
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: ``0.001`` .
|
|
5457
5290
|
It must be a float number or a scalar tensor with float16 or float32 data type.
|
|
@@ -5464,16 +5297,16 @@ class ApplyFtrl(Primitive):
|
|
|
5464
5297
|
Default: ``-0.5`` . It must be a float number or a scalar tensor with float16 or float32 data type.
|
|
5465
5298
|
|
|
5466
5299
|
Outputs:
|
|
5467
|
-
- **var** (Tensor) - Represents the updated `var`. As the input parameters has been updated in-place,
|
|
5468
|
-
value is always zero when the platform is GPU.
|
|
5300
|
+
- **var** (Tensor) - Represents the updated `var`. As the input parameters or tensors has been updated in-place,
|
|
5301
|
+
this value is always zero when the platform is GPU.
|
|
5469
5302
|
|
|
5470
5303
|
Raises:
|
|
5471
5304
|
TypeError: If `use_locking` is not a bool.
|
|
5472
5305
|
TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32.
|
|
5473
5306
|
TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor.
|
|
5474
5307
|
TypeError: If `grad` is not a Tensor.
|
|
5475
|
-
TypeError: If the parameter types of `var`, `accum` and `linear` are inconsistent.
|
|
5476
|
-
TypeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
|
|
5308
|
+
TypeError: If the parameter or tensor types of `var`, `accum` and `linear` are inconsistent.
|
|
5309
|
+
TypeError: If the parameter or tensor types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
|
|
5477
5310
|
and the precision is greater than `var`.
|
|
5478
5311
|
|
|
5479
5312
|
Supported Platforms:
|
|
@@ -5548,10 +5381,10 @@ class SparseApplyFtrl(Primitive):
|
|
|
5548
5381
|
use_locking (bool, optional): Use locks for updating operation if ``True`` . Default: ``False`` .
|
|
5549
5382
|
|
|
5550
5383
|
Inputs:
|
|
5551
|
-
- **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
|
|
5384
|
+
- **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float16 or float32.
|
|
5552
5385
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5553
|
-
- **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
|
|
5554
|
-
- **linear** (Parameter) - The linear coefficient to be updated, must be the same shape as `var`.
|
|
5386
|
+
- **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same shape as `var`.
|
|
5387
|
+
- **linear** (Union[Parameter, Tensor]) - The linear coefficient to be updated, must be the same shape as `var`.
|
|
5555
5388
|
- **grad** (Tensor) - A tensor must meet with :math:`grad.shape[1:] = var.shape[1:]`
|
|
5556
5389
|
if var.shape > 1.
|
|
5557
5390
|
- **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
|
|
@@ -5739,7 +5572,7 @@ class Dropout3D(PrimitiveWithInfer):
|
|
|
5739
5572
|
Dropout3D can improve the independence between channel feature maps.
|
|
5740
5573
|
|
|
5741
5574
|
Args:
|
|
5742
|
-
keep_prob (float): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
|
|
5575
|
+
keep_prob (float, optional): The keep probability of a channel, between 0 and 1, e.g. `keep_prob` = 0.8,
|
|
5743
5576
|
means dropping out 20% of channels. Default: ``0.5`` .
|
|
5744
5577
|
|
|
5745
5578
|
Inputs:
|
|
@@ -5791,12 +5624,14 @@ class CTCLoss(Primitive):
|
|
|
5791
5624
|
such that the length of target series must be less than or equal to the length of input.
|
|
5792
5625
|
|
|
5793
5626
|
Args:
|
|
5794
|
-
preprocess_collapse_repeated (bool): If ``True`` , repeated labels will be collapsed prior to the CTC
|
|
5627
|
+
preprocess_collapse_repeated (bool, optional): If ``True`` , repeated labels will be collapsed prior to the CTC
|
|
5795
5628
|
calculation. Default: ``False`` .
|
|
5796
|
-
ctc_merge_repeated (bool): If ``False`` , during CTC calculation,
|
|
5629
|
+
ctc_merge_repeated (bool, optional): If ``False`` , during CTC calculation,
|
|
5630
|
+
repeated non-blank labels will not be merged
|
|
5797
5631
|
and these labels will be interpreted as individual ones. This is a simplified
|
|
5798
5632
|
version of CTC. Default: ``True`` .
|
|
5799
|
-
ignore_longer_outputs_than_inputs (bool): If ``True`` ,
|
|
5633
|
+
ignore_longer_outputs_than_inputs (bool, optional): If ``True`` ,
|
|
5634
|
+
sequences with longer outputs than inputs will be
|
|
5800
5635
|
ignored. Default: ``False`` .
|
|
5801
5636
|
|
|
5802
5637
|
Inputs:
|
|
@@ -6370,10 +6205,7 @@ class AvgPool3D(Primitive):
|
|
|
6370
6205
|
|
|
6371
6206
|
Typically the input is of shape :math:`(N, C, D_{in}, H_{in}, W_{in})`, AvgPool3D outputs
|
|
6372
6207
|
regional average in the :math:`(D_{in}, H_{in}, W_{in})`-dimension. Given kernel size
|
|
6373
|
-
:math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows
|
|
6374
|
-
|
|
6375
|
-
.. warning::
|
|
6376
|
-
"kernel_size" is in the range [1, 255]. "strides" is in the range [1, 63].
|
|
6208
|
+
:math:`ks = (d_{ker}, h_{ker}, w_{ker})` and stride :math:`s = (s_0, s_1, s_2)`, the operation is as follows:
|
|
6377
6209
|
|
|
6378
6210
|
.. math::
|
|
6379
6211
|
\text{output}(N_i, C_j, d, h, w) =
|
|
@@ -6384,12 +6216,13 @@ class AvgPool3D(Primitive):
|
|
|
6384
6216
|
This interface currently does not support Atlas A2 training series products.
|
|
6385
6217
|
|
|
6386
6218
|
Args:
|
|
6387
|
-
kernel_size (Union[int, tuple[int]]): The size of kernel used to take the average value,
|
|
6219
|
+
kernel_size (Union[int, tuple[int]], optional): The size of kernel used to take the average value,
|
|
6388
6220
|
is an int number that represents depth, height and width are both kernel_size, or a tuple
|
|
6389
|
-
of three int numbers that represent depth, height and width respectively.
|
|
6390
|
-
|
|
6221
|
+
of three int numbers that represent depth, height and width respectively.
|
|
6222
|
+
Default: ``1`` . The value range is: [1, 255].
|
|
6223
|
+
strides (Union[int, tuple[int]], optional): The distance of kernel moving, an int number that represents
|
|
6391
6224
|
the depth, height and width of movement are both strides, or a tuple of three int numbers that
|
|
6392
|
-
represent depth, height and width of movement respectively. Default: ``1`` .
|
|
6225
|
+
represent depth, height and width of movement respectively. Default: ``1`` . The value range is: [1, 63].
|
|
6393
6226
|
pad_mode (str, optional): Specifies the padding mode with a padding value of 0. It can be set to:
|
|
6394
6227
|
``"same"`` , ``"valid"`` or ``"pad"`` . Default: ``"valid"`` .
|
|
6395
6228
|
|
|
@@ -6406,16 +6239,18 @@ class AvgPool3D(Primitive):
|
|
|
6406
6239
|
in the depth, height and width dimension is determined by the `pad` parameter.
|
|
6407
6240
|
If this mode is set, `pad` must be greater than or equal to 0.
|
|
6408
6241
|
|
|
6409
|
-
pad (Union(int, tuple[int], list[int])): The pad value to be filled. Default: ``0`` .
|
|
6242
|
+
pad (Union(int, tuple[int], list[int]), optional): The pad value to be filled. Default: ``0`` .
|
|
6243
|
+
If `pad` is an integer,
|
|
6410
6244
|
the paddings of head, tail, top, bottom, left and right are the same, equal to pad.
|
|
6411
6245
|
If `pad` is a tuple of six integers, the padding of head, tail, top, bottom, left and right equal to
|
|
6412
6246
|
pad[0], pad[1], pad[2], pad[3], pad[4] and pad[5] correspondingly.
|
|
6413
|
-
ceil_mode (bool): If ``True`` , ceil instead of floor to compute the output shape.
|
|
6414
|
-
|
|
6247
|
+
ceil_mode (bool, optional): If ``True`` , ceil instead of floor to compute the output shape.
|
|
6248
|
+
Default: ``False`` .
|
|
6249
|
+
count_include_pad (bool, optional): If ``True`` , averaging calculation will include the zero-padding.
|
|
6415
6250
|
Default: ``True`` .
|
|
6416
|
-
divisor_override (int): If specified, it will be used as divisor in the averaging calculation,
|
|
6251
|
+
divisor_override (int, optional): If specified, it will be used as divisor in the averaging calculation,
|
|
6417
6252
|
otherwise kernel_size will be used. Default: ``0`` .
|
|
6418
|
-
data_format (str)
|
|
6253
|
+
data_format (str, optional): The optional value for data format. Currently only support ``'NCDHW'`` .
|
|
6419
6254
|
Default: ``'NCDHW'`` .
|
|
6420
6255
|
|
|
6421
6256
|
Inputs:
|
|
@@ -6599,39 +6434,8 @@ class Conv3D(Primitive):
|
|
|
6599
6434
|
|
|
6600
6435
|
Outputs:
|
|
6601
6436
|
Tensor, the value that applied 3D convolution. The shape is :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`.
|
|
6602
|
-
|
|
6603
|
-
`
|
|
6604
|
-
|
|
6605
|
-
.. math::
|
|
6606
|
-
\begin{array}{ll} \\
|
|
6607
|
-
D_{out} = \left \lceil{\frac{D_{in}}{\text{stride[0]}}} \right \rceil \\
|
|
6608
|
-
H_{out} = \left \lceil{\frac{H_{in}}{\text{stride[1]}}} \right \rceil \\
|
|
6609
|
-
W_{out} = \left \lceil{\frac{W_{in}}{\text{stride[2]}}} \right \rceil \\
|
|
6610
|
-
\end{array}
|
|
6611
|
-
|
|
6612
|
-
`pad_mode` is ``"valid"``:
|
|
6613
|
-
|
|
6614
|
-
.. math::
|
|
6615
|
-
\begin{array}{ll} \\
|
|
6616
|
-
D_{out} = \left \lfloor{\frac{D_{in} - \text{dilation[0]} \times (\text{kernel_size[0]} - 1) }
|
|
6617
|
-
{\text{stride[0]}} + 1} \right \rfloor \\
|
|
6618
|
-
H_{out} = \left \lfloor{\frac{H_{in} - \text{dilation[1]} \times (\text{kernel_size[1]} - 1) }
|
|
6619
|
-
{\text{stride[1]}} + 1} \right \rfloor \\
|
|
6620
|
-
W_{out} = \left \lfloor{\frac{W_{in} - \text{dilation[2]} \times (\text{kernel_size[2]} - 1) }
|
|
6621
|
-
{\text{stride[2]}} + 1} \right \rfloor \\
|
|
6622
|
-
\end{array}
|
|
6623
|
-
|
|
6624
|
-
`pad_mode` is ``"pad"``:
|
|
6625
|
-
|
|
6626
|
-
.. math::
|
|
6627
|
-
\begin{array}{ll} \\
|
|
6628
|
-
D_{out} = \left \lfloor{\frac{D_{in} + pad[0] + pad[1] - (\text{dilation[0]} - 1) \times
|
|
6629
|
-
\text{kernel_size[0]} - 1 }{\text{stride[0]}} + 1} \right \rfloor \\
|
|
6630
|
-
H_{out} = \left \lfloor{\frac{H_{in} + pad[2] + pad[3] - (\text{dilation[1]} - 1) \times
|
|
6631
|
-
\text{kernel_size[1]} - 1 }{\text{stride[1]}} + 1} \right \rfloor \\
|
|
6632
|
-
W_{out} = \left \lfloor{\frac{W_{in} + pad[4] + pad[5] - (\text{dilation[2]} - 1) \times
|
|
6633
|
-
\text{kernel_size[2]} - 1 }{\text{stride[2]}} + 1} \right \rfloor \\
|
|
6634
|
-
\end{array}
|
|
6437
|
+
To see how different pad modes affect the output shape, please refer to
|
|
6438
|
+
:class:`mindspore.nn.Conv3d` for more details.
|
|
6635
6439
|
|
|
6636
6440
|
Raises:
|
|
6637
6441
|
TypeError: If `out_channel` or `group` is not an int.
|
|
@@ -6908,7 +6712,7 @@ class SparseApplyAdadelta(Primitive):
|
|
|
6908
6712
|
to make the data types consistent. Besides, inputs of 'lr' and 'rho' also support implicit type conversion.
|
|
6909
6713
|
If they have different data types, the lower priority data type will be converted to
|
|
6910
6714
|
relatively highest priority data type.
|
|
6911
|
-
RuntimeError exception will be thrown when the data type conversion of Parameter is required.
|
|
6715
|
+
RuntimeError exception will be thrown when the data type conversion of Parameter or Tensor is required.
|
|
6912
6716
|
|
|
6913
6717
|
Note:
|
|
6914
6718
|
If there are negative values or values greater than or equal to var.shape[0] in `indices`,
|
|
@@ -6920,11 +6724,11 @@ class SparseApplyAdadelta(Primitive):
|
|
|
6920
6724
|
Default: ``False`` .
|
|
6921
6725
|
|
|
6922
6726
|
Inputs:
|
|
6923
|
-
- **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
|
|
6924
|
-
- **accum** (Parameter) - Accumulation to be updated. Mush have the same shape and dtype as
|
|
6925
|
-
With float32 or float16 data type.
|
|
6926
|
-
- **accum_update** (Parameter) - Accum_update to be updated. Must have the same shape and dtype
|
|
6927
|
-
With float32 or float16 data type.
|
|
6727
|
+
- **var** (Union[Parameter, Tensor]) - Weights to be updated. With float32 or float16 data type.
|
|
6728
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. Mush have the same shape and dtype as
|
|
6729
|
+
`var`. With float32 or float16 data type.
|
|
6730
|
+
- **accum_update** (Union[Parameter, Tensor]) - Accum_update to be updated. Must have the same shape and dtype
|
|
6731
|
+
as `var`. With float32 or float16 data type.
|
|
6928
6732
|
- **lr** (Union[float, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
|
|
6929
6733
|
- **rho** (Union[float, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
|
|
6930
6734
|
- **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
|
|
@@ -6932,7 +6736,7 @@ class SparseApplyAdadelta(Primitive):
|
|
|
6932
6736
|
Must be one of the following types: int32, int64 and indices.shape[0] = grad.shape[0].
|
|
6933
6737
|
|
|
6934
6738
|
Outputs:
|
|
6935
|
-
Tuple of 3 Tensor, the updated parameters.
|
|
6739
|
+
Tuple of 3 Tensor, the updated parameters or tensors.
|
|
6936
6740
|
|
|
6937
6741
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
6938
6742
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -7020,9 +6824,9 @@ class CTCLossV2(Primitive):
|
|
|
7020
6824
|
and its correlated gradient to zero. Default: ``False`` .
|
|
7021
6825
|
|
|
7022
6826
|
Inputs:
|
|
7023
|
-
- **log_probs** (Tensor) - A tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
|
|
6827
|
+
- **log_probs** (Tensor) - A 3D tensor of shape :math:`(T, N, C)`, where :math:`T` is input length, :math:`N` is
|
|
7024
6828
|
batch size and :math:`C` is number of classes (including blank). Supported dtypes: float32, float64.
|
|
7025
|
-
- **targets** (Tensor) - A tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
|
|
6829
|
+
- **targets** (Tensor) - A 2D tensor of shape :math:`(N, S)`, where :math:`S` is max target length,
|
|
7026
6830
|
means the target sequences. Supported dtypes: int32, int64.
|
|
7027
6831
|
- **input_lengths** (Union(Tuple, Tensor)) - A tuple or Tensor of shape :math:`(N)`.
|
|
7028
6832
|
It means the lengths of the input. Supported dtypes: int32, int64.
|
|
@@ -7093,7 +6897,7 @@ class CTCLossV2Grad(Primitive):
|
|
|
7093
6897
|
|
|
7094
6898
|
Args:
|
|
7095
6899
|
blank (int): The blank label. Default: ``0`` .
|
|
7096
|
-
reduction (
|
|
6900
|
+
reduction (str): Apply specific reduction method to the output. Currently only support 'none'.
|
|
7097
6901
|
Default: ``"none"`` .
|
|
7098
6902
|
zero_infinity (bool): Whether to set infinite loss and correlation gradient to zero. Default: ``False`` .
|
|
7099
6903
|
|
|
@@ -7209,12 +7013,15 @@ class Conv3DTranspose(Primitive):
|
|
|
7209
7013
|
Inputs:
|
|
7210
7014
|
- **dout** (Tensor) - The gradients with respect to the output of the convolution.
|
|
7211
7015
|
The shape conforms to the default.
|
|
7212
|
-
data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
|
|
7213
|
-
|
|
7016
|
+
data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
|
|
7017
|
+
Supported dtypes:
|
|
7018
|
+
|
|
7019
|
+
- Ascend: float16.
|
|
7020
|
+
- GPU/CPU: float16, float32.
|
|
7214
7021
|
- **weight** (Tensor) - Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is
|
|
7215
7022
|
:math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter,
|
|
7216
7023
|
:math:`//` is the symbol for integer division.
|
|
7217
|
-
|
|
7024
|
+
It has the same dtype as `dout`.
|
|
7218
7025
|
- **bias** (Tensor) - Tensor of shape :math:`C_{out}`. Currently, only support none. Default: ``None`` .
|
|
7219
7026
|
|
|
7220
7027
|
Outputs:
|
|
@@ -7500,12 +7307,12 @@ class ApplyAdagradDA(Primitive):
|
|
|
7500
7307
|
Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
|
|
7501
7308
|
|
|
7502
7309
|
Inputs:
|
|
7503
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
7310
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
7504
7311
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
7505
|
-
- **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`.
|
|
7506
|
-
shape as `var`.
|
|
7507
|
-
- **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`.
|
|
7312
|
+
- **gradient_accumulator** (Union[Parameter, Tensor]) - The dict of mutable tensor :math:`grad\_accum`.
|
|
7508
7313
|
Must have the same shape as `var`.
|
|
7314
|
+
- **gradient_squared_accumulator** (Union[Parameter, Tensor]) - The dict of mutable tensor
|
|
7315
|
+
:math:`grad\_squared\_accum`. Must have the same shape as `var`.
|
|
7509
7316
|
- **grad** (Tensor) - A tensor for gradient. Must have the same shape as `var`.
|
|
7510
7317
|
- **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type.
|
|
7511
7318
|
- **l1** ([Number, Tensor]) - L1 regularization. Must be a scalar. With float32 or float16 data type.
|
|
@@ -7513,12 +7320,12 @@ class ApplyAdagradDA(Primitive):
|
|
|
7513
7320
|
- **global_step** ([Number, Tensor]) - Training step number. Must be a scalar. With int32 or int64 data type.
|
|
7514
7321
|
|
|
7515
7322
|
Outputs:
|
|
7516
|
-
Tuple of 1 Tensors, the updated parameters.
|
|
7323
|
+
Tuple of 1 Tensors, the updated parameters or tensors.
|
|
7517
7324
|
|
|
7518
7325
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
7519
7326
|
|
|
7520
7327
|
Raises:
|
|
7521
|
-
TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator`
|
|
7328
|
+
TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator` neither a Parameter nor a Tensor.
|
|
7522
7329
|
TypeError: If `grad` is not a Tensor.
|
|
7523
7330
|
TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor.
|
|
7524
7331
|
TypeError: If use_locking is not a bool.
|
|
@@ -7564,9 +7371,8 @@ class ApplyAdagradDA(Primitive):
|
|
|
7564
7371
|
>>> global_step = Tensor(2, mstype.int32)
|
|
7565
7372
|
>>> output = net(grad, lr, l1, l2, global_step)
|
|
7566
7373
|
>>> print(output)
|
|
7567
|
-
|
|
7568
|
-
|
|
7569
|
-
[-5.96988888e-04, -1.42478070e-03]]))
|
|
7374
|
+
[[-0.00073906, -0.00136889],
|
|
7375
|
+
[-0.00059699, -0.00142478]]
|
|
7570
7376
|
"""
|
|
7571
7377
|
|
|
7572
7378
|
__mindspore_signature__ = (
|
|
@@ -7612,10 +7418,12 @@ class SparseApplyRMSProp(Primitive):
|
|
|
7612
7418
|
otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
|
|
7613
7419
|
|
|
7614
7420
|
Inputs:
|
|
7615
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
7421
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
7616
7422
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
7617
|
-
- **ms** (Parameter) - The dict of mutable tensor ms. Must have the same shape and dtype as
|
|
7618
|
-
|
|
7423
|
+
- **ms** (Union[Parameter, Tensor]) - The dict of mutable tensor ms. Must have the same shape and dtype as
|
|
7424
|
+
`var`.
|
|
7425
|
+
- **mom** (Union[Parameter, Tensor]) - The dict of mutable tensor mom. Must have the same shape and dtype as
|
|
7426
|
+
`var`.
|
|
7619
7427
|
- **lr** ([Number, Tensor]) - Learning rate. Must be a scalar. With float16 or float32 data type.
|
|
7620
7428
|
- **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
|
|
7621
7429
|
- **indices** (Tensor) - A tensor of indices in the first dimension of `var`, `ms` and `mom`.
|
|
@@ -7623,7 +7431,7 @@ class SparseApplyRMSProp(Primitive):
|
|
|
7623
7431
|
following types: int32, int64 and indices.shape[0] = var.shape[0].
|
|
7624
7432
|
|
|
7625
7433
|
Outputs:
|
|
7626
|
-
Tuple of 3 Tensors, the updated parameters.
|
|
7434
|
+
Tuple of 3 Tensors, the updated parameters or tensors.
|
|
7627
7435
|
|
|
7628
7436
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
7629
7437
|
- **ms** (Tensor) - The same shape and data type as `ms`.
|
|
@@ -7729,12 +7537,12 @@ class SparseApplyCenteredRMSProp(Primitive):
|
|
|
7729
7537
|
Default: ``False`` .
|
|
7730
7538
|
|
|
7731
7539
|
Inputs:
|
|
7732
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
7733
|
-
uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
7540
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
7541
|
+
int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
7734
7542
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
7735
|
-
- **mg** (Parameter) - Mean gradients. Must have the same shape and dtype as `var`.
|
|
7736
|
-
- **ms** (Parameter) - Mean square gradients. Must have the same shape and dtype as `var`.
|
|
7737
|
-
- **mom** (Parameter) - Delta of `var`. Must have the same shape and dtype as `var`.
|
|
7543
|
+
- **mg** (Union[Parameter, Tensor]) - Mean gradients. Must have the same shape and dtype as `var`.
|
|
7544
|
+
- **ms** (Union[Parameter, Tensor]) - Mean square gradients. Must have the same shape and dtype as `var`.
|
|
7545
|
+
- **mom** (Union[Parameter, Tensor]) - Delta of `var`. Must have the same shape and dtype as `var`.
|
|
7738
7546
|
- **lr** (Union[Number, Tensor]) - Learning rate. Must be a float number or a scalar tensor.
|
|
7739
7547
|
Must have the same type as `var`.
|
|
7740
7548
|
- **rho** (Union[Number, Tensor]) - Decay rate. Must be a float number or a scalar tensor.
|
|
@@ -7837,8 +7645,9 @@ class ApplyKerasMomentum(Primitive):
|
|
|
7837
7645
|
so in the end, the var you get is actually var + momentum * accum. Default: ``False`` .
|
|
7838
7646
|
|
|
7839
7647
|
Inputs:
|
|
7840
|
-
- **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
|
|
7841
|
-
- **accum** (Parameter) - Must have the same shape and type as `var`. With float16 or float32
|
|
7648
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float16 or float32 data type.
|
|
7649
|
+
- **accum** (Union[Parameter, Tensor]) - Must have the same shape and type as `var`. With float16 or float32
|
|
7650
|
+
data type.
|
|
7842
7651
|
- **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. With float16 or float32 data type.
|
|
7843
7652
|
- **grad** (Tensor) - The gradient. Must have the same shape and type as `var`.
|
|
7844
7653
|
With float16 or float32 data type.
|
|
@@ -7989,12 +7798,12 @@ class ApplyAdamWithAmsgrad(Primitive):
|
|
|
7989
7798
|
Default: ``False`` .
|
|
7990
7799
|
|
|
7991
7800
|
Inputs:
|
|
7992
|
-
- **var** (Parameter) - Variable to be updated. The data type can be float16 or float32.
|
|
7993
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula,
|
|
7801
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type can be float16 or float32.
|
|
7802
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
|
|
7994
7803
|
the shape and data type value should be the same as `var`.
|
|
7995
|
-
- **v** (Parameter) - the 2nd moment vector in the updating formula,
|
|
7804
|
+
- **v** (Union[Parameter, Tensor]) - the 2nd moment vector in the updating formula,
|
|
7996
7805
|
the shape and data type value should be the same as `var`.
|
|
7997
|
-
- **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
|
|
7806
|
+
- **vhat** (Union[Parameter, Tensor]) - :math:`\hat v_t` in the updating formula,
|
|
7998
7807
|
the shape and data type value should be the same as `var`.
|
|
7999
7808
|
- **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
|
|
8000
7809
|
a scalar tensor with float16 or float32 data type.
|
|
@@ -8004,7 +7813,7 @@ class ApplyAdamWithAmsgrad(Primitive):
|
|
|
8004
7813
|
- **grad** (Tensor) - The gradient, has the same shape and data type as `var`.
|
|
8005
7814
|
|
|
8006
7815
|
Outputs:
|
|
8007
|
-
Tuple of 4 Tensors, the updated parameters.
|
|
7816
|
+
Tuple of 4 Tensors, the updated parameters or tensors.
|
|
8008
7817
|
|
|
8009
7818
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
8010
7819
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -8012,7 +7821,7 @@ class ApplyAdamWithAmsgrad(Primitive):
|
|
|
8012
7821
|
- **vhat** (Tensor) - The same shape and data type as `vhat`.
|
|
8013
7822
|
|
|
8014
7823
|
Raises:
|
|
8015
|
-
TypeError: If `var`, `m`, `v`, `vhat`
|
|
7824
|
+
TypeError: If `var`, `m`, `v`, `vhat` neither a Parameter nor a Tensor.
|
|
8016
7825
|
TypeError: If `beta1_power`, `beta2_power`, `lr` is neither a Number nor a Tensor.
|
|
8017
7826
|
TypeError: If `grad` is not a Tensor.
|
|
8018
7827
|
TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
|
|
@@ -8092,16 +7901,16 @@ class ApplyAdamWithAmsgradV2(Primitive):
|
|
|
8092
7901
|
|
|
8093
7902
|
Args:
|
|
8094
7903
|
use_locking (bool): If ``True`` , updating of the `var`, `m`, and `v` tensors will
|
|
8095
|
-
be protected by a lock; Otherwise
|
|
7904
|
+
be protected by a lock; Otherwise some contention may occur.
|
|
8096
7905
|
Default: ``False`` .
|
|
8097
7906
|
|
|
8098
7907
|
Inputs:
|
|
8099
|
-
- **var** (Parameter) - Variable to be updated. The data type can be float16, float32 or float64.
|
|
8100
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula,
|
|
7908
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type can be float16, float32 or float64.
|
|
7909
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
|
|
8101
7910
|
the shape should be the same as `var`.
|
|
8102
|
-
- **v** (Parameter) - The 2nd moment vector in the updating formula,
|
|
7911
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula,
|
|
8103
7912
|
the shape should be the same as `var`.
|
|
8104
|
-
- **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
|
|
7913
|
+
- **vhat** (Union[Parameter, Tensor]) - :math:`\hat v_t` in the updating formula,
|
|
8105
7914
|
the shape and data type value should be the same as `var`.
|
|
8106
7915
|
- **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
|
|
8107
7916
|
with float16, float32 or float64 data type.
|
|
@@ -8117,7 +7926,7 @@ class ApplyAdamWithAmsgradV2(Primitive):
|
|
|
8117
7926
|
- **grad** (Tensor) - The gradient, has the same shape as `var`.
|
|
8118
7927
|
|
|
8119
7928
|
Outputs:
|
|
8120
|
-
Tuple of 4 Tensors, the updated parameters.
|
|
7929
|
+
Tuple of 4 Tensors, the updated parameters or tensors.
|
|
8121
7930
|
|
|
8122
7931
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
8123
7932
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -8125,7 +7934,7 @@ class ApplyAdamWithAmsgradV2(Primitive):
|
|
|
8125
7934
|
- **vhat** (Tensor) - The same shape and data type as `vhat`.
|
|
8126
7935
|
|
|
8127
7936
|
Raises:
|
|
8128
|
-
TypeError: If `var`, `m`, `v`, `vhat`
|
|
7937
|
+
TypeError: If `var`, `m`, `v`, `vhat` neither a Parameter nor a Tensor.
|
|
8129
7938
|
TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
|
|
8130
7939
|
`lr`, `beta1` , `beta2` , `epsilon` or `grad` is not float64, float32 or float16.
|
|
8131
7940
|
RuntimeError: If the data type of `var`, `m`, `v` , `vhat` and `grad` conversion of Parameter is not supported.
|
|
@@ -8640,13 +8449,13 @@ class TripletMarginLoss(Primitive):
|
|
|
8640
8449
|
- **margin** (Tensor) - Make a margin between the positive pair and the negative pair.
|
|
8641
8450
|
|
|
8642
8451
|
Outputs:
|
|
8643
|
-
Union[Tensor, Scalar], if `reduction` is ``"none"``,
|
|
8452
|
+
Union[Tensor, Scalar], if `reduction` is ``"none"``, a Ten sor will be returned with a shape of :math:`(N)`.
|
|
8644
8453
|
Otherwise, a scalar value will be returned.
|
|
8645
8454
|
|
|
8646
8455
|
Raises:
|
|
8647
|
-
TypeError: If `x
|
|
8648
|
-
TypeError: If dtype of `x
|
|
8649
|
-
TypeError: If
|
|
8456
|
+
TypeError: If `x`, `positive`, `negative`, or `margin` is not a Tensor.
|
|
8457
|
+
TypeError: If dtype of `x`, `positive`, or `negative` is not BasicType.
|
|
8458
|
+
TypeError: If dtypes of `x`, `positive` and `negative` are not the same.
|
|
8650
8459
|
TypeError: If `margin` is not float32.
|
|
8651
8460
|
TypeError: If `p` is not an int.
|
|
8652
8461
|
TypeError: If `eps` is not a float.
|
|
@@ -8656,7 +8465,7 @@ class TripletMarginLoss(Primitive):
|
|
|
8656
8465
|
ValueError: If the dimension of input `x` or `positive` or `negative`
|
|
8657
8466
|
is bigger than or equal to 8.
|
|
8658
8467
|
ValueError: If length of shape of `margin` is not 0.
|
|
8659
|
-
ValueError: If
|
|
8468
|
+
ValueError: If shapes of `x`, `positive` and `negative` cannot broadcast.
|
|
8660
8469
|
ValueError: If `reduction` is not one of ``'none'``, ``'mean'``, ``'sum'``.
|
|
8661
8470
|
|
|
8662
8471
|
Supported Platforms:
|
|
@@ -8805,11 +8614,11 @@ class SparseApplyAdagradDA(Primitive):
|
|
|
8805
8614
|
Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
|
|
8806
8615
|
|
|
8807
8616
|
Inputs:
|
|
8808
|
-
- **var** (Parameter) - Variable to be updated.
|
|
8617
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated.
|
|
8809
8618
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
8810
|
-
- **grad_accum** (Parameter) - The dict of mutable tensor grad_accum. Must have the same
|
|
8619
|
+
- **grad_accum** (Union[Parameter, Tensor]) - The dict of mutable tensor grad_accum. Must have the same
|
|
8811
8620
|
shape and dtype as `var`.
|
|
8812
|
-
- **grad_square_accum** (Parameter) - The dict of mutable tensor grad_square_accum.
|
|
8621
|
+
- **grad_square_accum** (Union[Parameter, Tensor]) - The dict of mutable tensor grad_square_accum.
|
|
8813
8622
|
Must have the same shape and dtype as `var`.
|
|
8814
8623
|
- **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
|
|
8815
8624
|
- **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
|
|
@@ -8987,8 +8796,8 @@ class SparseApplyProximalGradientDescent(Primitive):
|
|
|
8987
8796
|
Default: ``False`` .
|
|
8988
8797
|
|
|
8989
8798
|
Inputs:
|
|
8990
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
8991
|
-
uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
8799
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
8800
|
+
int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
8992
8801
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
8993
8802
|
- **alpha** (Union[Number, Tensor]) - Scaling factor. Must be a scalar with same type as `var`.
|
|
8994
8803
|
- **l1** (Union[Number, Tensor]) - L1 regularization. Must be a scalar with same type as `var`.
|
|
@@ -9003,7 +8812,7 @@ class SparseApplyProximalGradientDescent(Primitive):
|
|
|
9003
8812
|
- **var** (Tensor) - Tensor, has the same shape and type as 'var'.
|
|
9004
8813
|
|
|
9005
8814
|
Raises:
|
|
9006
|
-
TypeError: If `var
|
|
8815
|
+
TypeError: If `var` neither a Parameter nor a Tensor.
|
|
9007
8816
|
TypeError: If `alpha`, `l1`, `l2` is neither a Number nor a Tensor.
|
|
9008
8817
|
TypeError: If `use_locking` is not a bool.
|
|
9009
8818
|
TypeError: If dtype of `var`, `alpha`, `l1`, `l2` or `grad` is not one of int8, int16,
|
|
@@ -9139,51 +8948,6 @@ class NuclearNorm(Primitive):
|
|
|
9139
8948
|
validator.check_value_type("keepdim", keepdim, [bool], self.name)
|
|
9140
8949
|
|
|
9141
8950
|
|
|
9142
|
-
class GLU(Primitive):
|
|
9143
|
-
r"""
|
|
9144
|
-
Computes GLU (Gated Linear Unit activation function) of input tensors.
|
|
9145
|
-
|
|
9146
|
-
.. warning::
|
|
9147
|
-
This is an experimental API that is subject to change or deletion.
|
|
9148
|
-
|
|
9149
|
-
Refer to :func:`mindspore.ops.glu` for more details.
|
|
9150
|
-
|
|
9151
|
-
Args:
|
|
9152
|
-
axis (int, optional): Axis on which to split the input.
|
|
9153
|
-
The value of `axis` must be an int within range [-rank(`x`), rank(`x`)).
|
|
9154
|
-
Default: ``-1`` , specifying the last dimension.
|
|
9155
|
-
|
|
9156
|
-
Inputs:
|
|
9157
|
-
- **x** (Tensor) - Input tensor. `x.shape[axis]` must be even.
|
|
9158
|
-
|
|
9159
|
-
Outputs:
|
|
9160
|
-
Tensor, has the same data type with `x`.
|
|
9161
|
-
|
|
9162
|
-
Supported Platforms:
|
|
9163
|
-
``Ascend`` ``CPU``
|
|
9164
|
-
|
|
9165
|
-
Examples:
|
|
9166
|
-
>>> from mindspore import ops, Tensor
|
|
9167
|
-
>>> from mindspore import dtype as mstype
|
|
9168
|
-
>>> import numpy as np
|
|
9169
|
-
>>> axis = 0
|
|
9170
|
-
>>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
|
|
9171
|
-
... 0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
|
|
9172
|
-
... 0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
|
|
9173
|
-
... 0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32)
|
|
9174
|
-
>>> glu = ops.GLU(axis=axis)
|
|
9175
|
-
>>> y = glu(x)
|
|
9176
|
-
>>> print(y)
|
|
9177
|
-
[[[0.20028052 0.6916126 0.57412136 0.06512236 0.26307625]
|
|
9178
|
-
[0.3682598 0.3093122 0.17306386 0.10212085 0.63814086]]]
|
|
9179
|
-
"""
|
|
9180
|
-
|
|
9181
|
-
@prim_attr_register
|
|
9182
|
-
def __init__(self, axis=-1):
|
|
9183
|
-
"""Initialize GLU"""
|
|
9184
|
-
validator.check_value_type("axis", axis, [int], self.name)
|
|
9185
|
-
|
|
9186
|
-
|
|
9187
8951
|
class FractionalMaxPoolWithFixedKsize(Primitive):
|
|
9188
8952
|
r"""
|
|
9189
8953
|
Applies a 2D fractional max pooling to an input signal composed of multiple input planes.
|
|
@@ -9267,7 +9031,8 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
|
|
|
9267
9031
|
class ChannelShuffle(Primitive):
|
|
9268
9032
|
r"""
|
|
9269
9033
|
Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` group and
|
|
9270
|
-
rearrange them as :math:`(*, \frac
|
|
9034
|
+
rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while retaining the original tensor
|
|
9035
|
+
shape in the final output.
|
|
9271
9036
|
|
|
9272
9037
|
.. warning::
|
|
9273
9038
|
This is an experimental API that is subject to change or deletion.
|
|
@@ -9475,93 +9240,6 @@ class WKV(Primitive):
|
|
|
9475
9240
|
outputs=["output", "out_sp", "out_sq", "out_sm"])
|
|
9476
9241
|
|
|
9477
9242
|
|
|
9478
|
-
class PromptFlashAttention(Primitive):
|
|
9479
|
-
r"""
|
|
9480
|
-
The interface for fully inference.
|
|
9481
|
-
B -- Batch size
|
|
9482
|
-
S -- Sequence length
|
|
9483
|
-
H -- Hidden size
|
|
9484
|
-
|
|
9485
|
-
Note:
|
|
9486
|
-
experiment ops
|
|
9487
|
-
|
|
9488
|
-
.. warning::
|
|
9489
|
-
This is an experimental API that is subject to change or deletion.
|
|
9490
|
-
|
|
9491
|
-
Args:
|
|
9492
|
-
num_heads (int): The number of heads.
|
|
9493
|
-
scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
|
|
9494
|
-
Muls in the calculation. Default: 1.0.
|
|
9495
|
-
pre_tokens (int): Previous tokens. Default: 2147483547.
|
|
9496
|
-
next_tokens (int): next tokens. Default: 0.
|
|
9497
|
-
indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
|
|
9498
|
-
indicates that the data blocks in the upper triangle are not involved in the calculation
|
|
9499
|
-
input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
|
|
9500
|
-
num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
|
|
9501
|
-
The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
|
|
9502
|
-
sparse_mode (int): Default: 0
|
|
9503
|
-
inner_precise (int): 0, float16 high precision. 1, high performance. default 1
|
|
9504
|
-
|
|
9505
|
-
Inputs:
|
|
9506
|
-
- **query** (Tensor) - The query tensor with data type of float16 or float32.
|
|
9507
|
-
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9508
|
-
- **key** (Tensor) - The key tensor with data type of float16 or float32.
|
|
9509
|
-
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9510
|
-
- **value** (Tensor) - The value tensor with data type of float16 or float32.
|
|
9511
|
-
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9512
|
-
- **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
9513
|
-
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
9514
|
-
- **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int64.
|
|
9515
|
-
- **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int64.
|
|
9516
|
-
- **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
|
|
9517
|
-
- **dep_scale1** (Tensor)
|
|
9518
|
-
- **quant_scale1** (Tensor)
|
|
9519
|
-
- **deq_scale2** (Tensor)
|
|
9520
|
-
- **quant_scale2** (Tensor)
|
|
9521
|
-
- **quant_offset2** (Tensor)
|
|
9522
|
-
|
|
9523
|
-
Outputs:
|
|
9524
|
-
- **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9525
|
-
|
|
9526
|
-
Supported Platforms:
|
|
9527
|
-
``Ascend``
|
|
9528
|
-
|
|
9529
|
-
Examples:
|
|
9530
|
-
>>> import mindspore.ops.operations.nn_ops as P
|
|
9531
|
-
>>> from mindspore import Tensor
|
|
9532
|
-
>>> import numpy as np
|
|
9533
|
-
>>> B = 1
|
|
9534
|
-
>>> N = 16
|
|
9535
|
-
>>> S = 256
|
|
9536
|
-
>>> D = 16
|
|
9537
|
-
>>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
9538
|
-
>>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
9539
|
-
>>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
9540
|
-
>>> attn_mask = Tensor(np.ones((B, 1, S, S), dtype=np.float16))
|
|
9541
|
-
>>> pfa = P.PromptFlashAttention(N, input_layout='BNSD')
|
|
9542
|
-
>>> out = pfa(query, key, value, attn_mask, None, None, None, None, None, None, None, None)
|
|
9543
|
-
>>> print(out.shape)
|
|
9544
|
-
(1, 16, 256, 16)
|
|
9545
|
-
"""
|
|
9546
|
-
|
|
9547
|
-
@prim_attr_register
|
|
9548
|
-
def __init__(self, num_heads, scale_value=1.0, pre_tokens=214748647, next_tokens=0, input_layout='BSH',
|
|
9549
|
-
num_key_value_heads=0, sparse_mode=0, inner_precise=1):
|
|
9550
|
-
"""Initialize PromptFlashAttention."""
|
|
9551
|
-
validator.check_value_type('num_heads', num_heads, [int], self.name)
|
|
9552
|
-
validator.check_value_type('scale_value', scale_value, [float], self.name)
|
|
9553
|
-
validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
|
|
9554
|
-
validator.check_value_type('next_tokens', next_tokens, [int], self.name)
|
|
9555
|
-
validator.check_value_type('input_layout', input_layout, [str], self.name)
|
|
9556
|
-
validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
|
|
9557
|
-
validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
|
|
9558
|
-
validator.check_value_type('inner_precise', inner_precise, [int], self.name)
|
|
9559
|
-
self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
|
|
9560
|
-
"actual_seq_lengths_kv", "pse_shift", "deq_scale1", "quant_scale1",
|
|
9561
|
-
"deq_scale2", "quant_scale2", "quant_offset2"],
|
|
9562
|
-
outputs=["attention_out"])
|
|
9563
|
-
|
|
9564
|
-
|
|
9565
9243
|
class AllFinite(Primitive):
|
|
9566
9244
|
r"""
|
|
9567
9245
|
Check all gradients is finite.
|
|
@@ -9578,3 +9256,6 @@ class AllFinite(Primitive):
|
|
|
9578
9256
|
raise RuntimeError(
|
|
9579
9257
|
"The version of Ascend AI software package installed "
|
|
9580
9258
|
"in the current environment does not support AllFinite.")
|
|
9259
|
+
|
|
9260
|
+
def __call__(self, *args):
|
|
9261
|
+
return _convert_stub(pyboost_all_finite(self, args))
|