mindspore 2.4.1__cp311-cp311-win_amd64.whl → 2.5.0__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
- mindspore/Newtonsoft.Json.dll +0 -0
- mindspore/__init__.py +8 -3
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +0 -5
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/compile_config.py +64 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
- mindspore/_extends/parse/parser.py +23 -5
- mindspore/_extends/parse/standard_method.py +123 -27
- mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
- mindspore/amp.py +7 -1
- mindspore/atlprov.dll +0 -0
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/boost_cell_wrapper.py +136 -41
- mindspore/c1.dll +0 -0
- mindspore/c1xx.dll +0 -0
- mindspore/c2.dll +0 -0
- mindspore/common/__init__.py +3 -1
- mindspore/common/_register_for_tensor.py +0 -1
- mindspore/common/_stub_tensor.py +25 -4
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +6132 -0
- mindspore/common/api.py +99 -25
- mindspore/common/dtype.py +34 -34
- mindspore/common/dump.py +2 -1
- mindspore/common/file_system.py +8 -1
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +3 -1
- mindspore/common/initializer.py +3 -4
- mindspore/common/lazy_inline.py +8 -2
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/parameter.py +30 -27
- mindspore/common/tensor.py +713 -1337
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +10 -0
- mindspore/communication/comm_func.py +215 -173
- mindspore/communication/management.py +23 -20
- mindspore/context.py +292 -193
- mindspore/dataset/__init__.py +23 -19
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +84 -3
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +5 -4
- mindspore/dataset/engine/datasets.py +192 -149
- mindspore/dataset/engine/datasets_audio.py +14 -0
- mindspore/dataset/engine/datasets_standard_format.py +28 -11
- mindspore/dataset/engine/datasets_text.py +38 -1
- mindspore/dataset/engine/datasets_user_defined.py +125 -65
- mindspore/dataset/engine/datasets_vision.py +81 -8
- mindspore/dataset/engine/iterators.py +281 -63
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +26 -2
- mindspore/dataset/engine/serializer_deserializer.py +1 -1
- mindspore/dataset/engine/validators.py +43 -11
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +29 -12
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +94 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +127 -0
- mindspore/device_context/cpu/__init__.py +25 -0
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +134 -0
- mindspore/dnnl.dll +0 -0
- mindspore/dpcmi.dll +0 -0
- mindspore/experimental/llm_boost/__init__.py +3 -2
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
- mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
- mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/optim/adadelta.py +26 -22
- mindspore/experimental/optim/adam.py +3 -0
- mindspore/experimental/optim/lr_scheduler.py +33 -24
- mindspore/experimental/optim/radam.py +33 -30
- mindspore/hal/device.py +28 -0
- mindspore/hal/event.py +17 -0
- mindspore/hal/memory.py +94 -3
- mindspore/hal/stream.py +91 -6
- mindspore/include/api/context.h +1 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +12 -0
- mindspore/mindrecord/__init__.py +1 -1
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +824 -218
- mindspore/mint/distributed/__init__.py +66 -4
- mindspore/mint/distributed/distributed.py +2594 -44
- mindspore/mint/linalg/__init__.py +6 -0
- mindspore/mint/nn/__init__.py +473 -14
- mindspore/mint/nn/functional.py +486 -11
- mindspore/mint/nn/layer/__init__.py +17 -4
- mindspore/mint/nn/layer/_functions.py +330 -0
- mindspore/mint/nn/layer/activation.py +169 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +727 -0
- mindspore/mint/nn/layer/normalization.py +215 -19
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +170 -0
- mindspore/mint/optim/__init__.py +2 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/special/__init__.py +2 -1
- mindspore/msobj140.dll +0 -0
- mindspore/mspdb140.dll +0 -0
- mindspore/mspdbcore.dll +0 -0
- mindspore/mspdbst.dll +0 -0
- mindspore/mspft140.dll +0 -0
- mindspore/msvcdis140.dll +0 -0
- mindspore/msvcp140_1.dll +0 -0
- mindspore/msvcp140_2.dll +0 -0
- mindspore/msvcp140_atomic_wait.dll +0 -0
- mindspore/msvcp140_codecvt_ids.dll +0 -0
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +142 -21
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +6 -6
- mindspore/nn/layer/basic.py +35 -25
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/conv.py +3 -0
- mindspore/nn/layer/embedding.py +3 -3
- mindspore/nn/layer/normalization.py +8 -7
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +55 -23
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +48 -26
- mindspore/nn/learning_rate_schedule.py +5 -3
- mindspore/nn/loss/loss.py +31 -36
- mindspore/nn/optim/ada_grad.py +1 -0
- mindspore/nn/optim/adadelta.py +2 -2
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/rprop.py +2 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/utils/__init__.py +22 -0
- mindspore/nn/utils/init.py +73 -0
- mindspore/nn/wrap/cell_wrapper.py +4 -6
- mindspore/nn/wrap/loss_scale.py +3 -4
- mindspore/numpy/array_creations.py +60 -62
- mindspore/numpy/array_ops.py +148 -143
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +361 -359
- mindspore/numpy/utils.py +16 -16
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +2 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
- mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_vmap/vmap_array_ops.py +20 -19
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
- mindspore/ops/_vmap/vmap_math_ops.py +11 -9
- mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
- mindspore/ops/auto_generate/gen_extend_func.py +554 -60
- mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
- mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
- mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
- mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
- mindspore/ops/function/__init__.py +12 -0
- mindspore/ops/function/array_func.py +561 -159
- mindspore/ops/function/clip_func.py +64 -0
- mindspore/ops/function/debug_func.py +28 -20
- mindspore/ops/function/image_func.py +1 -1
- mindspore/ops/function/linalg_func.py +5 -4
- mindspore/ops/function/math_func.py +1664 -294
- mindspore/ops/function/nn_func.py +988 -317
- mindspore/ops/function/parameter_func.py +3 -56
- mindspore/ops/function/random_func.py +243 -33
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/functional.py +18 -5
- mindspore/ops/functional_overload.py +897 -0
- mindspore/ops/operations/__init__.py +3 -2
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -34
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +38 -8
- mindspore/ops/operations/array_ops.py +45 -303
- mindspore/ops/operations/comm_ops.py +23 -17
- mindspore/ops/operations/custom_ops.py +7 -49
- mindspore/ops/operations/debug_ops.py +42 -47
- mindspore/ops/operations/inner_ops.py +6 -4
- mindspore/ops/operations/linalg_ops.py +3 -2
- mindspore/ops/operations/manually_defined/ops_def.py +185 -104
- mindspore/ops/operations/math_ops.py +11 -216
- mindspore/ops/operations/nn_ops.py +153 -310
- mindspore/ops/primitive.py +23 -21
- mindspore/ops/tensor_method.py +1669 -0
- mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
- mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
- mindspore/ops_generate/arg_handler.py +0 -61
- mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
- mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/base_generator.py +11 -0
- mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
- mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
- mindspore/ops_generate/functional_overload_py_generator.py +110 -0
- mindspore/ops_generate/functions_cc_generator.py +233 -0
- mindspore/ops_generate/gen_aclnn_implement.py +110 -114
- mindspore/ops_generate/gen_constants.py +157 -3
- mindspore/ops_generate/gen_ops.py +245 -990
- mindspore/ops_generate/gen_pyboost_func.py +97 -998
- mindspore/ops_generate/gen_utils.py +119 -33
- mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
- mindspore/ops_generate/op_api_proto.py +206 -0
- mindspore/ops_generate/op_def_py_generator.py +131 -0
- mindspore/ops_generate/op_prim_py_generator.py +480 -0
- mindspore/ops_generate/op_proto.py +373 -108
- mindspore/ops_generate/op_template_parser.py +436 -0
- mindspore/ops_generate/ops_def_cc_generator.py +288 -0
- mindspore/ops_generate/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/ops_name_h_generator.py +68 -0
- mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
- mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
- mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
- mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
- mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
- mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
- mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
- mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
- mindspore/ops_generate/pyboost_utils.py +92 -33
- mindspore/ops_generate/template.py +294 -44
- mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
- mindspore/parallel/__init__.py +3 -3
- mindspore/parallel/_auto_parallel_context.py +44 -34
- mindspore/parallel/_cell_wrapper.py +22 -3
- mindspore/parallel/_parallel_serialization.py +13 -2
- mindspore/parallel/_utils.py +4 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +44 -0
- mindspore/parallel/cluster/process_entity/_api.py +131 -37
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +20 -3
- mindspore/parallel/parameter_broadcast.py +1 -1
- mindspore/parallel/shard.py +3 -0
- mindspore/parallel/transform_safetensors.py +119 -253
- mindspore/pgodb140.dll +0 -0
- mindspore/pgort140.dll +0 -0
- mindspore/profiler/__init__.py +17 -4
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +174 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +202 -0
- mindspore/profiler/common/path_manager.py +371 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +476 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +210 -0
- mindspore/profiler/common/profiler_path_manager.py +120 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +270 -37
- mindspore/profiler/envprofiler.py +138 -0
- mindspore/profiler/mstx.py +199 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +309 -0
- mindspore/profiler/profiler.py +580 -93
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +114 -0
- mindspore/profiler/schedule.py +208 -0
- mindspore/rewrite/api/symbol_tree.py +1 -2
- mindspore/run_check/_check_version.py +18 -13
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +148 -0
- mindspore/runtime/memory.py +392 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tbbmalloc.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +2 -2
- mindspore/train/_utils.py +53 -18
- mindspore/train/amp.py +8 -4
- mindspore/train/callback/_checkpoint.py +32 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +105 -69
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_summary_collector.py +44 -6
- mindspore/train/callback/_tft_register.py +37 -15
- mindspore/train/dataset_helper.py +11 -11
- mindspore/train/metrics/precision.py +4 -5
- mindspore/train/mind_ir_pb2.py +167 -46
- mindspore/train/model.py +13 -14
- mindspore/train/serialization.py +461 -72
- mindspore/train/summary/summary_record.py +1 -2
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +4 -2
- mindspore/utils/dryrun.py +138 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/vcmeta.dll +0 -0
- mindspore/vcruntime140.dll +0 -0
- mindspore/vcruntime140_1.dll +0 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +391 -265
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# Copyright 2020-
|
|
1
|
+
# Copyright 2020-2024 Huawei Technologies Co., Ltd
|
|
2
2
|
#
|
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
# you may not use this file except in compliance with the License.
|
|
@@ -31,15 +31,18 @@ from mindspore.ops.primitive import PrimitiveWithInfer
|
|
|
31
31
|
from mindspore.ops.primitive import PrimitiveWithCheck
|
|
32
32
|
from mindspore.ops.primitive import prim_attr_register
|
|
33
33
|
from mindspore.run_check._check_version import AscendEnvChecker
|
|
34
|
-
from
|
|
34
|
+
from mindspore._c_expression import pyboost_all_finite
|
|
35
|
+
from mindspore.common._stub_tensor import _convert_stub
|
|
36
|
+
from ..auto_generate import (CeLU, Flatten, LogSoftmax, LogSoftmaxExt, GLU, ReLU, ReLU6, Dense, Tanh,
|
|
35
37
|
Elu, Sigmoid, Softmax, SoftplusExt, HSwish, HSigmoid, AvgPool, BiasAdd,
|
|
36
38
|
NLLLoss, OneHot, GeLU, FastGeLU, PReLU, RmsNorm, IncreFlashAttention, MSELossExt,
|
|
37
39
|
GridSampler3D, GridSampler2D, LayerNorm, LayerNormExt, HShrink, AdamWeightDecay, Dropout,
|
|
38
40
|
ApplyRotaryPosEmb, PagedAttention, PagedAttentionMask, ReshapeAndCache,
|
|
39
|
-
FlashAttentionScore, Embedding, UpsampleNearest1D, UpsampleNearest2D,
|
|
41
|
+
FlashAttentionScore, PromptFlashAttention, Embedding, UpsampleNearest1D, UpsampleNearest2D,
|
|
40
42
|
UpsampleNearest3D, UpsampleTrilinear3D,
|
|
41
43
|
UpsampleBilinear2D, UpsampleLinear1D,
|
|
42
|
-
BinaryCrossEntropy, BCEWithLogitsLoss, SoftShrink
|
|
44
|
+
BinaryCrossEntropy, BCEWithLogitsLoss, SoftShrink,
|
|
45
|
+
SmoothL1Loss)
|
|
43
46
|
from .manually_defined import BatchNorm
|
|
44
47
|
|
|
45
48
|
|
|
@@ -612,12 +615,12 @@ class InstanceNorm(PrimitiveWithInfer):
|
|
|
612
615
|
Inputs:
|
|
613
616
|
- **input_x** (Tensor) - The input of InstanceNorm, Tensor of shape :math:`(N, C)`,
|
|
614
617
|
data type: float16 or float32.
|
|
615
|
-
- **gamma** (Parameter) - Scale, Tensor of shape :math:`(C,)`,
|
|
618
|
+
- **gamma** (Union[Parameter, Tensor])) - Scale, Tensor of shape :math:`(C,)`,
|
|
616
619
|
data type: float32.
|
|
617
|
-
- **beta** (Parameter) - Bias, Tensor of shape :math:`(C,)`,
|
|
620
|
+
- **beta** (Union[Parameter, Tensor])) - Bias, Tensor of shape :math:`(C,)`,
|
|
618
621
|
data type: float32.
|
|
619
|
-
- **mean** (Parameter) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
620
|
-
- **variance** (Parameter) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
622
|
+
- **mean** (Union[Parameter, Tensor])) - Mean value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
623
|
+
- **variance** (Union[Parameter, Tensor])) - Variance value, Tensor of shape :math:`(C,)`, data type: float32.
|
|
621
624
|
|
|
622
625
|
Outputs:
|
|
623
626
|
Tuple of 3 Tensors, the normalized input, the updated parameters.
|
|
@@ -1430,6 +1433,9 @@ class MaxPool3D(Primitive):
|
|
|
1430
1433
|
\max_{l=0, \ldots, d_{ker}-1} \max_{m=0, \ldots, h_{ker}-1} \max_{n=0, \ldots, w_{ker}-1}
|
|
1431
1434
|
\text{input}(N_i, C_j, s_0 \times d + l, s_1 \times h + m, s_2 \times w + n)
|
|
1432
1435
|
|
|
1436
|
+
.. note::
|
|
1437
|
+
For Atlas training series products, this primitive is not supported.
|
|
1438
|
+
|
|
1433
1439
|
Args:
|
|
1434
1440
|
kernel_size (Union[int, tuple[int]]): The size of kernel used to take the maximum value,
|
|
1435
1441
|
is an int number that represents depth, height and width of the kernel, or a tuple
|
|
@@ -2284,9 +2290,9 @@ class ApplyMomentum(Primitive):
|
|
|
2284
2290
|
gradient_scale (float): The scale of the gradient. Default: ``1.0`` .
|
|
2285
2291
|
|
|
2286
2292
|
Inputs:
|
|
2287
|
-
- **variable** (Parameter) - Weights to be updated. Data type must be float64, int64, float,
|
|
2288
|
-
int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
|
|
2289
|
-
- **accumulation** (Parameter) - Accumulated gradient value by moment weight,
|
|
2293
|
+
- **variable** (Union[Parameter, Tensor]) - Weights to be updated. Data type must be float64, int64, float,
|
|
2294
|
+
float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128.
|
|
2295
|
+
- **accumulation** (Union[Parameter, Tensor]) - Accumulated gradient value by moment weight,
|
|
2290
2296
|
has the same data type with `variable`.
|
|
2291
2297
|
- **learning_rate** (Union[Number, Tensor]) - The learning rate value, must be a float64, int64, float,
|
|
2292
2298
|
float16, int16, int32, int8, uint16, uint32, uint64, uint8, complex64, complex128 number or
|
|
@@ -2303,7 +2309,7 @@ class ApplyMomentum(Primitive):
|
|
|
2303
2309
|
|
|
2304
2310
|
Raises:
|
|
2305
2311
|
TypeError: If the `use_locking` or `use_nesterov` is not a bool or `gradient_scale` is not a float.
|
|
2306
|
-
TypeError: If the data type of `var`, `accum` and `grad` conversion
|
|
2312
|
+
TypeError: If the data type of `var`, `accum` and `grad` conversion is not supported.
|
|
2307
2313
|
|
|
2308
2314
|
Supported Platforms:
|
|
2309
2315
|
``Ascend`` ``GPU`` ``CPU``
|
|
@@ -2351,55 +2357,6 @@ class ApplyMomentum(Primitive):
|
|
|
2351
2357
|
self.add_prim_attr('side_effect_mem', True)
|
|
2352
2358
|
|
|
2353
2359
|
|
|
2354
|
-
class SmoothL1Loss(Primitive):
|
|
2355
|
-
r"""
|
|
2356
|
-
Calculate the smooth L1 loss, and the L1 loss function has robustness.
|
|
2357
|
-
|
|
2358
|
-
Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
|
|
2359
|
-
|
|
2360
|
-
Args:
|
|
2361
|
-
beta (float, optional): A parameter used to control the point where the function will change between
|
|
2362
|
-
L1 to L2 loss. The value should be greater than zero. Default: ``1.0`` .
|
|
2363
|
-
reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
|
|
2364
|
-
``'sum'`` . Default: ``'none'`` .
|
|
2365
|
-
|
|
2366
|
-
- ``'none'``: no reduction will be applied.
|
|
2367
|
-
- ``'mean'``: compute and return the mean of elements in the output.
|
|
2368
|
-
- ``'sum'``: the output elements will be summed.
|
|
2369
|
-
|
|
2370
|
-
Inputs:
|
|
2371
|
-
- **logits** (Tensor) - Input Tensor of any dimension. Data type must be float16, float32 or float64.
|
|
2372
|
-
- **labels** (Tensor) - Ground truth data, has the same shape and dtype as the `logits`.
|
|
2373
|
-
|
|
2374
|
-
Outputs:
|
|
2375
|
-
Tensor, loss float tensor, same shape and dtype as the `logits`.
|
|
2376
|
-
|
|
2377
|
-
Supported Platforms:
|
|
2378
|
-
``Ascend`` ``GPU`` ``CPU``
|
|
2379
|
-
|
|
2380
|
-
Examples:
|
|
2381
|
-
>>> import mindspore
|
|
2382
|
-
>>> import numpy as np
|
|
2383
|
-
>>> from mindspore import Tensor, ops
|
|
2384
|
-
>>> loss = ops.SmoothL1Loss()
|
|
2385
|
-
>>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
|
|
2386
|
-
>>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
|
|
2387
|
-
>>> output = loss(logits, labels)
|
|
2388
|
-
>>> print(output)
|
|
2389
|
-
[0. 0. 0.5]
|
|
2390
|
-
"""
|
|
2391
|
-
|
|
2392
|
-
@prim_attr_register
|
|
2393
|
-
def __init__(self, beta=1.0, reduction='none'):
|
|
2394
|
-
"""Initialize SmoothL1Loss."""
|
|
2395
|
-
validator.check_value_type('beta', beta, [float], self.name)
|
|
2396
|
-
validator.check('beta', beta, '', 0, validator.GT, self.name)
|
|
2397
|
-
validator.check_string(
|
|
2398
|
-
reduction, ['none', 'sum', 'mean'], 'reduction', self.name)
|
|
2399
|
-
self.add_prim_attr('sigma', self.beta)
|
|
2400
|
-
self.init_prim_io_names(inputs=['prediction', 'target'], outputs=['output'])
|
|
2401
|
-
|
|
2402
|
-
|
|
2403
2360
|
class MultiMarginLoss(Primitive):
|
|
2404
2361
|
r"""
|
|
2405
2362
|
Creates a loss function that minimizes the hinge loss
|
|
@@ -3607,11 +3564,11 @@ class Adam(Primitive):
|
|
|
3607
3564
|
If ``False`` , update the gradients without using NAG. Default: ``False`` .
|
|
3608
3565
|
|
|
3609
3566
|
Inputs:
|
|
3610
|
-
- **var** (Parameter) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
|
|
3567
|
+
- **var** (Union[Parameter, Tensor]) - Weights to be updated. The shape is :math:`(N, *)` where :math:`*` means,
|
|
3611
3568
|
any number of additional dimensions. The data type can be float16 or float32.
|
|
3612
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula,
|
|
3569
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
|
|
3613
3570
|
the shape should be the same as `var`.
|
|
3614
|
-
- **v** (Parameter) - the 2nd moment vector in the updating formula,
|
|
3571
|
+
- **v** (Union[Parameter, Tensor]) - the 2nd moment vector in the updating formula,
|
|
3615
3572
|
the shape should be the same as `var`.
|
|
3616
3573
|
- **beta1_power** (float) - :math:`beta_1^t(\beta_1^{t})` in the updating formula.
|
|
3617
3574
|
- **beta2_power** (float) - :math:`beta_2^t(\beta_2^{t})` in the updating formula.
|
|
@@ -3782,8 +3739,8 @@ class AdamNoUpdateParam(Primitive):
|
|
|
3782
3739
|
|
|
3783
3740
|
class FusedSparseAdam(Primitive):
|
|
3784
3741
|
r"""
|
|
3785
|
-
Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation
|
|
3786
|
-
algorithm. This operator is used when the gradient is sparse.
|
|
3742
|
+
Merges the duplicate value of the gradient and then updates parameters or tensors by the Adaptive Moment Estimation
|
|
3743
|
+
(Adam) algorithm. This operator is used when the gradient is sparse.
|
|
3787
3744
|
|
|
3788
3745
|
The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
|
|
3789
3746
|
|
|
@@ -3816,11 +3773,12 @@ class FusedSparseAdam(Primitive):
|
|
|
3816
3773
|
If ``False`` , update the gradients without using NAG. Default: ``False`` .
|
|
3817
3774
|
|
|
3818
3775
|
Inputs:
|
|
3819
|
-
- **var** (Parameter) - Parameters to be updated with float32 data type. The shape is
|
|
3820
|
-
where :math:`*` means, any number of additional dimensions.
|
|
3821
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3822
|
-
|
|
3823
|
-
|
|
3776
|
+
- **var** (Union[Parameter, Tensor]) - Parameters or tensors to be updated with float32 data type. The shape is:
|
|
3777
|
+
math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
3778
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3779
|
+
type as `var`.
|
|
3780
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula, has the same shape and data
|
|
3781
|
+
type as `var`. Mean square gradients, has the same type as `var` with float32 data type.
|
|
3824
3782
|
- **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
|
|
3825
3783
|
The shape is :math:`(1, )`.
|
|
3826
3784
|
- **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
|
|
@@ -3838,7 +3796,7 @@ class FusedSparseAdam(Primitive):
|
|
|
3838
3796
|
- **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
|
|
3839
3797
|
|
|
3840
3798
|
Outputs:
|
|
3841
|
-
Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
|
|
3799
|
+
Tuple of 3 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
3842
3800
|
|
|
3843
3801
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
3844
3802
|
- **m** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -3908,8 +3866,8 @@ class FusedSparseAdam(Primitive):
|
|
|
3908
3866
|
|
|
3909
3867
|
class FusedSparseLazyAdam(Primitive):
|
|
3910
3868
|
r"""
|
|
3911
|
-
Merges the duplicate value of the gradient and then updates parameters by the Adaptive Moment Estimation
|
|
3912
|
-
algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
|
|
3869
|
+
Merges the duplicate value of the gradient and then updates parameters or tensors by the Adaptive Moment Estimation
|
|
3870
|
+
(Adam) algorithm. This operator is used when the gradient is sparse. The behavior is not equivalent to the
|
|
3913
3871
|
original Adam algorithm, as only the current indices parameters will be updated.
|
|
3914
3872
|
|
|
3915
3873
|
The Adam algorithm is proposed in `Adam: A Method for Stochastic Optimization <https://arxiv.org/abs/1412.6980>`_.
|
|
@@ -3943,11 +3901,12 @@ class FusedSparseLazyAdam(Primitive):
|
|
|
3943
3901
|
If ``False`` , update the gradients without using NAG. Default: ``False`` .
|
|
3944
3902
|
|
|
3945
3903
|
Inputs:
|
|
3946
|
-
- **var** (Parameter) - Parameters to be updated with float32 data type. The shape is
|
|
3947
|
-
where :math:`*` means, any number of additional dimensions.
|
|
3948
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3949
|
-
|
|
3950
|
-
|
|
3904
|
+
- **var** (Union[Parameter, Tensor]) - Parameters or tensors to be updated with float32 data type. The shape is:
|
|
3905
|
+
math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
3906
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape and data
|
|
3907
|
+
type as `var`.
|
|
3908
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula, has the same shape and data
|
|
3909
|
+
type as `var`. Mean square gradients, has the same type as `var` with float32 data type.
|
|
3951
3910
|
- **beta1_power** (Tensor) - :math:`beta_1^t` in the updating formula with float32 data type.
|
|
3952
3911
|
The shape is :math:`(1, )`.
|
|
3953
3912
|
- **beta2_power** (Tensor) - :math:`beta_2^t` in the updating formula with float32 data type.
|
|
@@ -3965,7 +3924,7 @@ class FusedSparseLazyAdam(Primitive):
|
|
|
3965
3924
|
- **indices** (Tensor) - Gradient indices with int32 data type and indices.shape[0] = gradient.shape[0].
|
|
3966
3925
|
|
|
3967
3926
|
Outputs:
|
|
3968
|
-
Tuple of 3 Tensors, this operator will update the input parameters directly, the outputs are useless.
|
|
3927
|
+
Tuple of 3 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
3969
3928
|
|
|
3970
3929
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
3971
3930
|
- **m** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -4051,17 +4010,18 @@ class FusedSparseFtrl(Primitive):
|
|
|
4051
4010
|
use_locking (bool): Use locks for updating operation if True . Default: ``False`` .
|
|
4052
4011
|
|
|
4053
4012
|
Inputs:
|
|
4054
|
-
- **var** (Parameter) - The variable to be updated. The data type must be float32. The shape is
|
|
4055
|
-
where :math:`*` means, any number of additional dimensions.
|
|
4056
|
-
- **accum** (Parameter) - The accumulation to be updated, must be same type and shape as `var`.
|
|
4057
|
-
- **linear** (Parameter) - the linear coefficient to be updated, must be same type and shape as
|
|
4013
|
+
- **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float32. The shape is:
|
|
4014
|
+
math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4015
|
+
- **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same type and shape as `var`.
|
|
4016
|
+
- **linear** (Union[Parameter, Tensor]) - the linear coefficient to be updated, must be same type and shape as
|
|
4017
|
+
`var`.
|
|
4058
4018
|
- **grad** (Tensor) - A tensor of the same type as `var` and
|
|
4059
4019
|
grad.shape[1:] = var.shape[1:] if var.shape > 1.
|
|
4060
4020
|
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
|
|
4061
4021
|
The type must be int32 and indices.shape[0] = grad.shape[0].
|
|
4062
4022
|
|
|
4063
4023
|
Outputs:
|
|
4064
|
-
Tuple of 3 Tensor, this operator will update the input parameters directly, the outputs are useless.
|
|
4024
|
+
Tuple of 3 Tensor, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
4065
4025
|
|
|
4066
4026
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
4067
4027
|
- **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -4148,9 +4108,10 @@ class FusedSparseProximalAdagrad(Primitive):
|
|
|
4148
4108
|
Default: ``False`` .
|
|
4149
4109
|
|
|
4150
4110
|
Inputs:
|
|
4151
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be float32.
|
|
4111
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be float32.
|
|
4152
4112
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4153
|
-
- **accum** (Parameter) - Variable tensor to be updated, has the same shape and data type as
|
|
4113
|
+
- **accum** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same shape and data type as
|
|
4114
|
+
`var`.
|
|
4154
4115
|
- **lr** (Tensor) - The learning rate value. The data type must be float32. The shape is :math:`(1, )`.
|
|
4155
4116
|
- **l1** (Tensor) - l1 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
|
|
4156
4117
|
- **l2** (Tensor) - l2 regularization strength. The data type must be float32. The shape is :math:`(1, )`.
|
|
@@ -4160,7 +4121,7 @@ class FusedSparseProximalAdagrad(Primitive):
|
|
|
4160
4121
|
The type must be int32 and indices.shape[0] = grad.shape[0].
|
|
4161
4122
|
|
|
4162
4123
|
Outputs:
|
|
4163
|
-
Tuple of 2 Tensors, this operator will update the input parameters directly, the outputs are useless.
|
|
4124
|
+
Tuple of 2 Tensors, this operator will update the input parameters or tensors directly, the outputs are useless.
|
|
4164
4125
|
|
|
4165
4126
|
- **var** (Tensor) - A Tensor with shape :math:`(N, *)`.
|
|
4166
4127
|
- **accum** (Tensor) - A Tensor with shape :math:`(1, )`.
|
|
@@ -4339,11 +4300,11 @@ class ApplyAdaMax(Primitive):
|
|
|
4339
4300
|
the relatively highest priority data type.
|
|
4340
4301
|
|
|
4341
4302
|
Inputs:
|
|
4342
|
-
- **var** (Parameter) - Variable to be updated. With float32 or float16 data type.
|
|
4303
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float32 or float16 data type.
|
|
4343
4304
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4344
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula, has the same shape as `var`.
|
|
4305
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula, has the same shape as `var`.
|
|
4345
4306
|
With float32 or float16 data type.
|
|
4346
|
-
- **v** (Parameter) - The 2nd moment vector in the updating formula. Mean square gradients
|
|
4307
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula. Mean square gradients
|
|
4347
4308
|
with the same shape as `var`. With float32 or float16 data type.
|
|
4348
4309
|
- **beta1_power** (Union[Number, Tensor]) - :math:`beta_1^t` in the updating formula, must be a scalar.
|
|
4349
4310
|
With float32 or float16 data type.
|
|
@@ -4359,7 +4320,7 @@ class ApplyAdaMax(Primitive):
|
|
|
4359
4320
|
With float32 or float16 data type.
|
|
4360
4321
|
|
|
4361
4322
|
Outputs:
|
|
4362
|
-
Tuple of 3 Tensor, the updated parameters.
|
|
4323
|
+
Tuple of 3 Tensor, the updated parameters or tensors.
|
|
4363
4324
|
|
|
4364
4325
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4365
4326
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -4453,10 +4414,11 @@ class ApplyAdadelta(Primitive):
|
|
|
4453
4414
|
the relatively highest priority data type.
|
|
4454
4415
|
|
|
4455
4416
|
Inputs:
|
|
4456
|
-
- **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
|
|
4417
|
+
- **var** (Union[Parameter, Tensor]) - Weights to be updated. With float32 or float16 data type.
|
|
4457
4418
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4458
|
-
- **accum** (Parameter) - Accumulation to be updated, has the same shape and data type as `var`.
|
|
4459
|
-
- **accum_update** (Parameter) - Accum_update to be updated, has the same shape and data type as
|
|
4419
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated, has the same shape and data type as `var`.
|
|
4420
|
+
- **accum_update** (Union[Parameter, Tensor]) - Accum_update to be updated, has the same shape and data type as
|
|
4421
|
+
`var`.
|
|
4460
4422
|
- **lr** (Union[Number, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
|
|
4461
4423
|
- **rho** (Union[Number, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
|
|
4462
4424
|
- **epsilon** (Union[Number, Tensor]) - A small value added for numerical stability, must be a scalar.
|
|
@@ -4464,7 +4426,7 @@ class ApplyAdadelta(Primitive):
|
|
|
4464
4426
|
- **grad** (Tensor) - Gradients, has the same shape and data type as `var`.
|
|
4465
4427
|
|
|
4466
4428
|
Outputs:
|
|
4467
|
-
Tuple of 3 Tensor, the updated parameters.
|
|
4429
|
+
Tuple of 3 Tensor, the updated parameters or tensors.
|
|
4468
4430
|
|
|
4469
4431
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4470
4432
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4555,14 +4517,14 @@ class ApplyAdagrad(Primitive):
|
|
|
4555
4517
|
update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
|
|
4556
4518
|
|
|
4557
4519
|
Inputs:
|
|
4558
|
-
- **var** (Parameter) - Variable to be updated. With float or complex data type.
|
|
4520
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float or complex data type.
|
|
4559
4521
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4560
|
-
- **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4522
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4561
4523
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. With float or complex data type.
|
|
4562
4524
|
- **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
|
|
4563
4525
|
|
|
4564
4526
|
Outputs:
|
|
4565
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4527
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
4566
4528
|
|
|
4567
4529
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4568
4530
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4642,15 +4604,15 @@ class ApplyAdagradV2(Primitive):
|
|
|
4642
4604
|
update_slots (bool): If ``True`` , `accum` will be updated. Default: ``True`` .
|
|
4643
4605
|
|
|
4644
4606
|
Inputs:
|
|
4645
|
-
- **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
|
|
4607
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float16 or float32 data type.
|
|
4646
4608
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4647
|
-
- **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4609
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4648
4610
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
|
|
4649
4611
|
a scalar tensor with float16 or float32 data type.
|
|
4650
4612
|
- **grad** (Tensor) - A tensor for gradient. The shape must be the same as `var`.
|
|
4651
4613
|
|
|
4652
4614
|
Outputs:
|
|
4653
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4615
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
4654
4616
|
|
|
4655
4617
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4656
4618
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4753,16 +4715,17 @@ class SparseApplyAdagradV2(Primitive):
|
|
|
4753
4715
|
update_slots (bool): If ``True`` , the computation logic will be different to `False`. Default: ``True`` .
|
|
4754
4716
|
|
|
4755
4717
|
Inputs:
|
|
4756
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
4718
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
4757
4719
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4758
|
-
- **accum** (Parameter) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4720
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. The shape must be the same as `var`.
|
|
4759
4721
|
- **grad** (Tensor) - Gradients has the same shape as `var` and
|
|
4760
4722
|
:math:`grad.shape[1:] = var.shape[1:]` if var.shape > 1.
|
|
4761
4723
|
- **indices** (Tensor) - A vector of indices into the first dimension of `var` and `accum`.
|
|
4762
|
-
The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`.
|
|
4724
|
+
The type must be int32 and :math:`indices.shape[0] = grad.shape[0]`. The value of indices
|
|
4725
|
+
must be unique. Otherwise, the result is unpredictable.
|
|
4763
4726
|
|
|
4764
4727
|
Outputs:
|
|
4765
|
-
Tuple of 2 tensors, the updated parameters.
|
|
4728
|
+
Tuple of 2 tensors, the updated parameters or tensors.
|
|
4766
4729
|
|
|
4767
4730
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4768
4731
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4842,9 +4805,10 @@ class ApplyProximalAdagrad(Primitive):
|
|
|
4842
4805
|
Default: ``False`` .
|
|
4843
4806
|
|
|
4844
4807
|
Inputs:
|
|
4845
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
4808
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
4846
4809
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4847
|
-
- **accum** (Parameter) - Accumulation to be updated, must have the same shape and dtype as
|
|
4810
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated, must have the same shape and dtype as
|
|
4811
|
+
`var`.
|
|
4848
4812
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar. The data type must be
|
|
4849
4813
|
float16 or float32.
|
|
4850
4814
|
- **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar. The data type must be
|
|
@@ -4854,7 +4818,7 @@ class ApplyProximalAdagrad(Primitive):
|
|
|
4854
4818
|
- **grad** (Tensor) - Gradient with the same shape and dtype as `var`.
|
|
4855
4819
|
|
|
4856
4820
|
Outputs:
|
|
4857
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
4821
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
4858
4822
|
|
|
4859
4823
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4860
4824
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -4939,9 +4903,9 @@ class SparseApplyProximalAdagrad(Primitive):
|
|
|
4939
4903
|
Default: ``False`` .
|
|
4940
4904
|
|
|
4941
4905
|
Inputs:
|
|
4942
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be float16 or float32.
|
|
4906
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be float16 or float32.
|
|
4943
4907
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
4944
|
-
- **accum** (
|
|
4908
|
+
- **accum** (Parameterv) - Variable tensor to be updated, has the same shape as `var`.
|
|
4945
4909
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a float number or
|
|
4946
4910
|
a scalar tensor with float16 or float32 data type. It must be positive.
|
|
4947
4911
|
- **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a float number or
|
|
@@ -4955,7 +4919,7 @@ class SparseApplyProximalAdagrad(Primitive):
|
|
|
4955
4919
|
following types: int32, int64 and :math:`indices.shape[0] = grad.shape[0]`.
|
|
4956
4920
|
|
|
4957
4921
|
Outputs:
|
|
4958
|
-
Tuple of 2 tensors, the updated parameters.
|
|
4922
|
+
Tuple of 2 tensors, the updated parameters or tensors.
|
|
4959
4923
|
|
|
4960
4924
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
4961
4925
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -5041,9 +5005,9 @@ class ApplyAddSign(Primitive):
|
|
|
5041
5005
|
the relatively highest priority data type.
|
|
5042
5006
|
|
|
5043
5007
|
Inputs:
|
|
5044
|
-
- **var** (Parameter) - Variable tensor to be updated.
|
|
5008
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated.
|
|
5045
5009
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5046
|
-
- **m** (Parameter) - Variable tensor to be updated, has the same data type as `var`.
|
|
5010
|
+
- **m** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same data type as `var`.
|
|
5047
5011
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be a scalar.
|
|
5048
5012
|
- **alpha** (Union[Number, Tensor]) - Must be a scalar.
|
|
5049
5013
|
- **sign_decay** (Union[Number, Tensor]) - Must be a scalar.
|
|
@@ -5051,7 +5015,7 @@ class ApplyAddSign(Primitive):
|
|
|
5051
5015
|
- **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
|
|
5052
5016
|
|
|
5053
5017
|
Outputs:
|
|
5054
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
5018
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
5055
5019
|
|
|
5056
5020
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
5057
5021
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -5140,10 +5104,10 @@ class ApplyPowerSign(Primitive):
|
|
|
5140
5104
|
On Ascend, input data type of float64 is currently not supported.
|
|
5141
5105
|
|
|
5142
5106
|
Inputs:
|
|
5143
|
-
- **var** (Parameter) - Variable tensor to be updated. With float64, float32 or float16 data
|
|
5144
|
-
If data type of `var` is float16, all inputs must have the same data type as `var`.
|
|
5107
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float64, float32 or float16 data
|
|
5108
|
+
type. If data type of `var` is float16, all inputs must have the same data type as `var`.
|
|
5145
5109
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5146
|
-
- **m** (Parameter) - Variable tensor to be updated, has the same shape as `var`.
|
|
5110
|
+
- **m** (Union[Parameter, Tensor]) - Variable tensor to be updated, has the same shape as `var`.
|
|
5147
5111
|
- **lr** (Union[Number, Tensor]) - The learning rate value, should be a scalar or Tensor
|
|
5148
5112
|
with float64, float32 or float16 data type.
|
|
5149
5113
|
- **logbase** (Union[Number, Tensor]) - Should be a scalar or Tensor with float64, float32 or float16 data type.
|
|
@@ -5154,7 +5118,7 @@ class ApplyPowerSign(Primitive):
|
|
|
5154
5118
|
- **grad** (Tensor) - A tensor of the same shape as `var`, for the gradient.
|
|
5155
5119
|
|
|
5156
5120
|
Outputs:
|
|
5157
|
-
Tuple of 2 Tensors, the updated parameters.
|
|
5121
|
+
Tuple of 2 Tensors, the updated parameters or tensors.
|
|
5158
5122
|
|
|
5159
5123
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
5160
5124
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -5231,7 +5195,7 @@ class ApplyGradientDescent(Primitive):
|
|
|
5231
5195
|
the relatively highest priority data type.
|
|
5232
5196
|
|
|
5233
5197
|
Inputs:
|
|
5234
|
-
- **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5198
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5235
5199
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5236
5200
|
- **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
|
|
5237
5201
|
- **delta** (Tensor) - A tensor for the change, has the same shape as `var`.
|
|
@@ -5300,7 +5264,7 @@ class ApplyProximalGradientDescent(Primitive):
|
|
|
5300
5264
|
the relatively highest priority data type.
|
|
5301
5265
|
|
|
5302
5266
|
Inputs:
|
|
5303
|
-
- **var** (Parameter) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5267
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. With float32 or float16 data type.
|
|
5304
5268
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5305
5269
|
- **alpha** (Union[Number, Tensor]) - Scaling factor, must be a scalar. With float32 or float16 data type.
|
|
5306
5270
|
- **l1** (Union[Number, Tensor]) - l1 regularization strength, must be a scalar.
|
|
@@ -5444,10 +5408,10 @@ class ApplyFtrl(Primitive):
|
|
|
5444
5408
|
use_locking (bool): Use locks for updating operation if ``True`` . Default: ``False`` .
|
|
5445
5409
|
|
|
5446
5410
|
Inputs:
|
|
5447
|
-
- **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
|
|
5411
|
+
- **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float16 or float32.
|
|
5448
5412
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5449
|
-
- **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
|
|
5450
|
-
- **linear** (Parameter) - The linear coefficient to be updated, must be same shape as `var`.
|
|
5413
|
+
- **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same shape as `var`.
|
|
5414
|
+
- **linear** (Union[Parameter, Tensor]) - The linear coefficient to be updated, must be same shape as `var`.
|
|
5451
5415
|
- **grad** (Tensor) - Gradient. The data type must be float16 or float32.
|
|
5452
5416
|
- **lr** (Union[Number, Tensor]) - The learning rate value, must be positive. Default: ``0.001`` .
|
|
5453
5417
|
It must be a float number or a scalar tensor with float16 or float32 data type.
|
|
@@ -5460,16 +5424,16 @@ class ApplyFtrl(Primitive):
|
|
|
5460
5424
|
Default: ``-0.5`` . It must be a float number or a scalar tensor with float16 or float32 data type.
|
|
5461
5425
|
|
|
5462
5426
|
Outputs:
|
|
5463
|
-
- **var** (Tensor) - Represents the updated `var`. As the input parameters has been updated in-place,
|
|
5464
|
-
value is always zero when the platform is GPU.
|
|
5427
|
+
- **var** (Tensor) - Represents the updated `var`. As the input parameters or tensors has been updated in-place,
|
|
5428
|
+
this value is always zero when the platform is GPU.
|
|
5465
5429
|
|
|
5466
5430
|
Raises:
|
|
5467
5431
|
TypeError: If `use_locking` is not a bool.
|
|
5468
5432
|
TypeError: If dtype of `var`, `grad`, `lr`, `l1`, `l2` or `lr_power` is neither float16 nor float32.
|
|
5469
5433
|
TypeError: If `lr`, `l1`, `l2` or `lr_power` is neither a Number nor a Tensor.
|
|
5470
5434
|
TypeError: If `grad` is not a Tensor.
|
|
5471
|
-
TypeError: If the parameter types of `var`, `accum` and `linear` are inconsistent.
|
|
5472
|
-
TypeError: If the parameter types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
|
|
5435
|
+
TypeError: If the parameter or tensor types of `var`, `accum` and `linear` are inconsistent.
|
|
5436
|
+
TypeError: If the parameter or tensor types of `grad`, `lr`, `l1`, `l2`, `lr_power` are inconsistent with `var`
|
|
5473
5437
|
and the precision is greater than `var`.
|
|
5474
5438
|
|
|
5475
5439
|
Supported Platforms:
|
|
@@ -5544,10 +5508,10 @@ class SparseApplyFtrl(Primitive):
|
|
|
5544
5508
|
use_locking (bool, optional): Use locks for updating operation if ``True`` . Default: ``False`` .
|
|
5545
5509
|
|
|
5546
5510
|
Inputs:
|
|
5547
|
-
- **var** (Parameter) - The variable to be updated. The data type must be float16 or float32.
|
|
5511
|
+
- **var** (Union[Parameter, Tensor]) - The variable to be updated. The data type must be float16 or float32.
|
|
5548
5512
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
5549
|
-
- **accum** (Parameter) - The accumulation to be updated, must be same shape as `var`.
|
|
5550
|
-
- **linear** (Parameter) - The linear coefficient to be updated, must be the same shape as `var`.
|
|
5513
|
+
- **accum** (Union[Parameter, Tensor]) - The accumulation to be updated, must be same shape as `var`.
|
|
5514
|
+
- **linear** (Union[Parameter, Tensor]) - The linear coefficient to be updated, must be the same shape as `var`.
|
|
5551
5515
|
- **grad** (Tensor) - A tensor must meet with :math:`grad.shape[1:] = var.shape[1:]`
|
|
5552
5516
|
if var.shape > 1.
|
|
5553
5517
|
- **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
|
|
@@ -6904,7 +6868,7 @@ class SparseApplyAdadelta(Primitive):
|
|
|
6904
6868
|
to make the data types consistent. Besides, inputs of 'lr' and 'rho' also support implicit type conversion.
|
|
6905
6869
|
If they have different data types, the lower priority data type will be converted to
|
|
6906
6870
|
relatively highest priority data type.
|
|
6907
|
-
RuntimeError exception will be thrown when the data type conversion of Parameter is required.
|
|
6871
|
+
RuntimeError exception will be thrown when the data type conversion of Parameter or Tensor is required.
|
|
6908
6872
|
|
|
6909
6873
|
Note:
|
|
6910
6874
|
If there are negative values or values greater than or equal to var.shape[0] in `indices`,
|
|
@@ -6916,11 +6880,11 @@ class SparseApplyAdadelta(Primitive):
|
|
|
6916
6880
|
Default: ``False`` .
|
|
6917
6881
|
|
|
6918
6882
|
Inputs:
|
|
6919
|
-
- **var** (Parameter) - Weights to be updated. With float32 or float16 data type.
|
|
6920
|
-
- **accum** (Parameter) - Accumulation to be updated. Mush have the same shape and dtype as
|
|
6921
|
-
With float32 or float16 data type.
|
|
6922
|
-
- **accum_update** (Parameter) - Accum_update to be updated. Must have the same shape and dtype
|
|
6923
|
-
With float32 or float16 data type.
|
|
6883
|
+
- **var** (Union[Parameter, Tensor]) - Weights to be updated. With float32 or float16 data type.
|
|
6884
|
+
- **accum** (Union[Parameter, Tensor]) - Accumulation to be updated. Mush have the same shape and dtype as
|
|
6885
|
+
`var`. With float32 or float16 data type.
|
|
6886
|
+
- **accum_update** (Union[Parameter, Tensor]) - Accum_update to be updated. Must have the same shape and dtype
|
|
6887
|
+
as `var`. With float32 or float16 data type.
|
|
6924
6888
|
- **lr** (Union[float, Tensor]) - Learning rate, must be a scalar. With float32 or float16 data type.
|
|
6925
6889
|
- **rho** (Union[float, Tensor]) - Decay rate, must be a scalar. With float32 or float16 data type.
|
|
6926
6890
|
- **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
|
|
@@ -6928,7 +6892,7 @@ class SparseApplyAdadelta(Primitive):
|
|
|
6928
6892
|
Must be one of the following types: int32, int64 and indices.shape[0] = grad.shape[0].
|
|
6929
6893
|
|
|
6930
6894
|
Outputs:
|
|
6931
|
-
Tuple of 3 Tensor, the updated parameters.
|
|
6895
|
+
Tuple of 3 Tensor, the updated parameters or tensors.
|
|
6932
6896
|
|
|
6933
6897
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
6934
6898
|
- **accum** (Tensor) - The same shape and data type as `accum`.
|
|
@@ -7158,7 +7122,8 @@ class Conv3DTranspose(Primitive):
|
|
|
7158
7122
|
\times (\text{kernel_size}[2] - 1) + \text{output_padding}[2] + 1
|
|
7159
7123
|
|
|
7160
7124
|
Note:
|
|
7161
|
-
In Ascend, only support :math:`group=1`.
|
|
7125
|
+
- In Ascend, only support :math:`group=1`.
|
|
7126
|
+
- For Atlas A2 training series products, `output_padding` is currently not supported.
|
|
7162
7127
|
|
|
7163
7128
|
Args:
|
|
7164
7129
|
in_channel (int): The channel of the input x.
|
|
@@ -7204,12 +7169,15 @@ class Conv3DTranspose(Primitive):
|
|
|
7204
7169
|
Inputs:
|
|
7205
7170
|
- **dout** (Tensor) - The gradients with respect to the output of the convolution.
|
|
7206
7171
|
The shape conforms to the default.
|
|
7207
|
-
data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
|
|
7208
|
-
|
|
7172
|
+
data_format :math:`(N, C_{in}, D_{out}, H_{out}, W_{out})`.
|
|
7173
|
+
Supported dtypes:
|
|
7174
|
+
|
|
7175
|
+
- Ascend: float16.
|
|
7176
|
+
- GPU/CPU: float16, float32.
|
|
7209
7177
|
- **weight** (Tensor) - Set size of kernel is :math:`(K_d, K_h, K_w)`, then the shape is
|
|
7210
7178
|
:math:`(C_{in}, C_{out}//group, K_d, K_h, K_w)`. Where :math:`group` is the Args parameter,
|
|
7211
7179
|
:math:`//` is the symbol for integer division.
|
|
7212
|
-
|
|
7180
|
+
It has the same dtype as `dout`.
|
|
7213
7181
|
- **bias** (Tensor) - Tensor of shape :math:`C_{out}`. Currently, only support none. Default: ``None`` .
|
|
7214
7182
|
|
|
7215
7183
|
Outputs:
|
|
@@ -7495,12 +7463,12 @@ class ApplyAdagradDA(Primitive):
|
|
|
7495
7463
|
Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
|
|
7496
7464
|
|
|
7497
7465
|
Inputs:
|
|
7498
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
7466
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
7499
7467
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
7500
|
-
- **gradient_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_accum`.
|
|
7501
|
-
shape as `var`.
|
|
7502
|
-
- **gradient_squared_accumulator** (Parameter) - The dict of mutable tensor :math:`grad\_squared\_accum`.
|
|
7468
|
+
- **gradient_accumulator** (Union[Parameter, Tensor]) - The dict of mutable tensor :math:`grad\_accum`.
|
|
7503
7469
|
Must have the same shape as `var`.
|
|
7470
|
+
- **gradient_squared_accumulator** (Union[Parameter, Tensor]) - The dict of mutable tensor
|
|
7471
|
+
:math:`grad\_squared\_accum`. Must have the same shape as `var`.
|
|
7504
7472
|
- **grad** (Tensor) - A tensor for gradient. Must have the same shape as `var`.
|
|
7505
7473
|
- **lr** ([Number, Tensor]) - Scaling factor. Must be a scalar. With float32 or float16 data type.
|
|
7506
7474
|
- **l1** ([Number, Tensor]) - L1 regularization. Must be a scalar. With float32 or float16 data type.
|
|
@@ -7508,12 +7476,12 @@ class ApplyAdagradDA(Primitive):
|
|
|
7508
7476
|
- **global_step** ([Number, Tensor]) - Training step number. Must be a scalar. With int32 or int64 data type.
|
|
7509
7477
|
|
|
7510
7478
|
Outputs:
|
|
7511
|
-
Tuple of 1 Tensors, the updated parameters.
|
|
7479
|
+
Tuple of 1 Tensors, the updated parameters or tensors.
|
|
7512
7480
|
|
|
7513
7481
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
7514
7482
|
|
|
7515
7483
|
Raises:
|
|
7516
|
-
TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator`
|
|
7484
|
+
TypeError: If `var`, `gradient_accumulator` or `gradient_squared_accumulator` neither a Parameter nor a Tensor.
|
|
7517
7485
|
TypeError: If `grad` is not a Tensor.
|
|
7518
7486
|
TypeError: If `lr`, `l1`, `l2` or `global_step` is neither a Number nor a Tensor.
|
|
7519
7487
|
TypeError: If use_locking is not a bool.
|
|
@@ -7607,10 +7575,12 @@ class SparseApplyRMSProp(Primitive):
|
|
|
7607
7575
|
otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
|
|
7608
7576
|
|
|
7609
7577
|
Inputs:
|
|
7610
|
-
- **var** (Parameter) - Variable to be updated. The data type must be float16 or float32.
|
|
7578
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type must be float16 or float32.
|
|
7611
7579
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
7612
|
-
- **ms** (Parameter) - The dict of mutable tensor ms. Must have the same shape and dtype as
|
|
7613
|
-
|
|
7580
|
+
- **ms** (Union[Parameter, Tensor]) - The dict of mutable tensor ms. Must have the same shape and dtype as
|
|
7581
|
+
`var`.
|
|
7582
|
+
- **mom** (Union[Parameter, Tensor]) - The dict of mutable tensor mom. Must have the same shape and dtype as
|
|
7583
|
+
`var`.
|
|
7614
7584
|
- **lr** ([Number, Tensor]) - Learning rate. Must be a scalar. With float16 or float32 data type.
|
|
7615
7585
|
- **grad** (Tensor) - A tensor for gradient. Must have the same shape and dtype as `var`.
|
|
7616
7586
|
- **indices** (Tensor) - A tensor of indices in the first dimension of `var`, `ms` and `mom`.
|
|
@@ -7618,7 +7588,7 @@ class SparseApplyRMSProp(Primitive):
|
|
|
7618
7588
|
following types: int32, int64 and indices.shape[0] = var.shape[0].
|
|
7619
7589
|
|
|
7620
7590
|
Outputs:
|
|
7621
|
-
Tuple of 3 Tensors, the updated parameters.
|
|
7591
|
+
Tuple of 3 Tensors, the updated parameters or tensors.
|
|
7622
7592
|
|
|
7623
7593
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
7624
7594
|
- **ms** (Tensor) - The same shape and data type as `ms`.
|
|
@@ -7724,12 +7694,12 @@ class SparseApplyCenteredRMSProp(Primitive):
|
|
|
7724
7694
|
Default: ``False`` .
|
|
7725
7695
|
|
|
7726
7696
|
Inputs:
|
|
7727
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
7728
|
-
uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
7697
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
7698
|
+
int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
7729
7699
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
7730
|
-
- **mg** (Parameter) - Mean gradients. Must have the same shape and dtype as `var`.
|
|
7731
|
-
- **ms** (Parameter) - Mean square gradients. Must have the same shape and dtype as `var`.
|
|
7732
|
-
- **mom** (Parameter) - Delta of `var`. Must have the same shape and dtype as `var`.
|
|
7700
|
+
- **mg** (Union[Parameter, Tensor]) - Mean gradients. Must have the same shape and dtype as `var`.
|
|
7701
|
+
- **ms** (Union[Parameter, Tensor]) - Mean square gradients. Must have the same shape and dtype as `var`.
|
|
7702
|
+
- **mom** (Union[Parameter, Tensor]) - Delta of `var`. Must have the same shape and dtype as `var`.
|
|
7733
7703
|
- **lr** (Union[Number, Tensor]) - Learning rate. Must be a float number or a scalar tensor.
|
|
7734
7704
|
Must have the same type as `var`.
|
|
7735
7705
|
- **rho** (Union[Number, Tensor]) - Decay rate. Must be a float number or a scalar tensor.
|
|
@@ -7832,8 +7802,9 @@ class ApplyKerasMomentum(Primitive):
|
|
|
7832
7802
|
so in the end, the var you get is actually var + momentum * accum. Default: ``False`` .
|
|
7833
7803
|
|
|
7834
7804
|
Inputs:
|
|
7835
|
-
- **var** (Parameter) - Variable to be updated. With float16 or float32 data type.
|
|
7836
|
-
- **accum** (Parameter) - Must have the same shape and type as `var`. With float16 or float32
|
|
7805
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. With float16 or float32 data type.
|
|
7806
|
+
- **accum** (Union[Parameter, Tensor]) - Must have the same shape and type as `var`. With float16 or float32
|
|
7807
|
+
data type.
|
|
7837
7808
|
- **lr** (Union[Number, Tensor]) - Scaling factor. Must be a scalar. With float16 or float32 data type.
|
|
7838
7809
|
- **grad** (Tensor) - The gradient. Must have the same shape and type as `var`.
|
|
7839
7810
|
With float16 or float32 data type.
|
|
@@ -7984,12 +7955,12 @@ class ApplyAdamWithAmsgrad(Primitive):
|
|
|
7984
7955
|
Default: ``False`` .
|
|
7985
7956
|
|
|
7986
7957
|
Inputs:
|
|
7987
|
-
- **var** (Parameter) - Variable to be updated. The data type can be float16 or float32.
|
|
7988
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula,
|
|
7958
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type can be float16 or float32.
|
|
7959
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
|
|
7989
7960
|
the shape and data type value should be the same as `var`.
|
|
7990
|
-
- **v** (Parameter) - the 2nd moment vector in the updating formula,
|
|
7961
|
+
- **v** (Union[Parameter, Tensor]) - the 2nd moment vector in the updating formula,
|
|
7991
7962
|
the shape and data type value should be the same as `var`.
|
|
7992
|
-
- **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
|
|
7963
|
+
- **vhat** (Union[Parameter, Tensor]) - :math:`\hat v_t` in the updating formula,
|
|
7993
7964
|
the shape and data type value should be the same as `var`.
|
|
7994
7965
|
- **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
|
|
7995
7966
|
a scalar tensor with float16 or float32 data type.
|
|
@@ -7999,7 +7970,7 @@ class ApplyAdamWithAmsgrad(Primitive):
|
|
|
7999
7970
|
- **grad** (Tensor) - The gradient, has the same shape and data type as `var`.
|
|
8000
7971
|
|
|
8001
7972
|
Outputs:
|
|
8002
|
-
Tuple of 4 Tensors, the updated parameters.
|
|
7973
|
+
Tuple of 4 Tensors, the updated parameters or tensors.
|
|
8003
7974
|
|
|
8004
7975
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
8005
7976
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -8007,7 +7978,7 @@ class ApplyAdamWithAmsgrad(Primitive):
|
|
|
8007
7978
|
- **vhat** (Tensor) - The same shape and data type as `vhat`.
|
|
8008
7979
|
|
|
8009
7980
|
Raises:
|
|
8010
|
-
TypeError: If `var`, `m`, `v`, `vhat`
|
|
7981
|
+
TypeError: If `var`, `m`, `v`, `vhat` neither a Parameter nor a Tensor.
|
|
8011
7982
|
TypeError: If `beta1_power`, `beta2_power`, `lr` is neither a Number nor a Tensor.
|
|
8012
7983
|
TypeError: If `grad` is not a Tensor.
|
|
8013
7984
|
TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
|
|
@@ -8091,12 +8062,12 @@ class ApplyAdamWithAmsgradV2(Primitive):
|
|
|
8091
8062
|
Default: ``False`` .
|
|
8092
8063
|
|
|
8093
8064
|
Inputs:
|
|
8094
|
-
- **var** (Parameter) - Variable to be updated. The data type can be float16, float32 or float64.
|
|
8095
|
-
- **m** (Parameter) - The 1st moment vector in the updating formula,
|
|
8065
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated. The data type can be float16, float32 or float64.
|
|
8066
|
+
- **m** (Union[Parameter, Tensor]) - The 1st moment vector in the updating formula,
|
|
8096
8067
|
the shape should be the same as `var`.
|
|
8097
|
-
- **v** (Parameter) - The 2nd moment vector in the updating formula,
|
|
8068
|
+
- **v** (Union[Parameter, Tensor]) - The 2nd moment vector in the updating formula,
|
|
8098
8069
|
the shape should be the same as `var`.
|
|
8099
|
-
- **vhat** (Parameter) - :math:`\hat v_t` in the updating formula,
|
|
8070
|
+
- **vhat** (Union[Parameter, Tensor]) - :math:`\hat v_t` in the updating formula,
|
|
8100
8071
|
the shape and data type value should be the same as `var`.
|
|
8101
8072
|
- **beta1_power** (Union[float, Tensor]) - :math:`beta_1^t(\beta_1^{t})` in the updating formula,
|
|
8102
8073
|
with float16, float32 or float64 data type.
|
|
@@ -8112,7 +8083,7 @@ class ApplyAdamWithAmsgradV2(Primitive):
|
|
|
8112
8083
|
- **grad** (Tensor) - The gradient, has the same shape as `var`.
|
|
8113
8084
|
|
|
8114
8085
|
Outputs:
|
|
8115
|
-
Tuple of 4 Tensors, the updated parameters.
|
|
8086
|
+
Tuple of 4 Tensors, the updated parameters or tensors.
|
|
8116
8087
|
|
|
8117
8088
|
- **var** (Tensor) - The same shape and data type as `var`.
|
|
8118
8089
|
- **m** (Tensor) - The same shape and data type as `m`.
|
|
@@ -8120,7 +8091,7 @@ class ApplyAdamWithAmsgradV2(Primitive):
|
|
|
8120
8091
|
- **vhat** (Tensor) - The same shape and data type as `vhat`.
|
|
8121
8092
|
|
|
8122
8093
|
Raises:
|
|
8123
|
-
TypeError: If `var`, `m`, `v`, `vhat`
|
|
8094
|
+
TypeError: If `var`, `m`, `v`, `vhat` neither a Parameter nor a Tensor.
|
|
8124
8095
|
TypeError: If dtype of `var`, `m`, `v`, `vhat`, `beta1_power`, `beta2_power`,
|
|
8125
8096
|
`lr`, `beta1` , `beta2` , `epsilon` or `grad` is not float64, float32 or float16.
|
|
8126
8097
|
RuntimeError: If the data type of `var`, `m`, `v` , `vhat` and `grad` conversion of Parameter is not supported.
|
|
@@ -8800,11 +8771,11 @@ class SparseApplyAdagradDA(Primitive):
|
|
|
8800
8771
|
Otherwise the behavior is undefined, but may exhibit less contention. Default: ``False`` .
|
|
8801
8772
|
|
|
8802
8773
|
Inputs:
|
|
8803
|
-
- **var** (Parameter) - Variable to be updated.
|
|
8774
|
+
- **var** (Union[Parameter, Tensor]) - Variable to be updated.
|
|
8804
8775
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
8805
|
-
- **grad_accum** (Parameter) - The dict of mutable tensor grad_accum. Must have the same
|
|
8776
|
+
- **grad_accum** (Union[Parameter, Tensor]) - The dict of mutable tensor grad_accum. Must have the same
|
|
8806
8777
|
shape and dtype as `var`.
|
|
8807
|
-
- **grad_square_accum** (Parameter) - The dict of mutable tensor grad_square_accum.
|
|
8778
|
+
- **grad_square_accum** (Union[Parameter, Tensor]) - The dict of mutable tensor grad_square_accum.
|
|
8808
8779
|
Must have the same shape and dtype as `var`.
|
|
8809
8780
|
- **grad** (Tensor) - A tensor of the same type as `var` and grad.shape[1:] = var.shape[1:] if rank(var) > 1.
|
|
8810
8781
|
- **indices** (Tensor) - A tensor of indices in the first dimension of `var` and `accum`.
|
|
@@ -8982,8 +8953,8 @@ class SparseApplyProximalGradientDescent(Primitive):
|
|
|
8982
8953
|
Default: ``False`` .
|
|
8983
8954
|
|
|
8984
8955
|
Inputs:
|
|
8985
|
-
- **var** (Parameter) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
8986
|
-
uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
8956
|
+
- **var** (Union[Parameter, Tensor]) - Variable tensor to be updated. The data type must be int8, int16, int32,
|
|
8957
|
+
int64, uint8, uint16, uint32, uint64, float16, float32 or float64.
|
|
8987
8958
|
The shape is :math:`(N, *)` where :math:`*` means, any number of additional dimensions.
|
|
8988
8959
|
- **alpha** (Union[Number, Tensor]) - Scaling factor. Must be a scalar with same type as `var`.
|
|
8989
8960
|
- **l1** (Union[Number, Tensor]) - L1 regularization. Must be a scalar with same type as `var`.
|
|
@@ -8998,7 +8969,7 @@ class SparseApplyProximalGradientDescent(Primitive):
|
|
|
8998
8969
|
- **var** (Tensor) - Tensor, has the same shape and type as 'var'.
|
|
8999
8970
|
|
|
9000
8971
|
Raises:
|
|
9001
|
-
TypeError: If `var
|
|
8972
|
+
TypeError: If `var` neither a Parameter nor a Tensor.
|
|
9002
8973
|
TypeError: If `alpha`, `l1`, `l2` is neither a Number nor a Tensor.
|
|
9003
8974
|
TypeError: If `use_locking` is not a bool.
|
|
9004
8975
|
TypeError: If dtype of `var`, `alpha`, `l1`, `l2` or `grad` is not one of int8, int16,
|
|
@@ -9134,51 +9105,6 @@ class NuclearNorm(Primitive):
|
|
|
9134
9105
|
validator.check_value_type("keepdim", keepdim, [bool], self.name)
|
|
9135
9106
|
|
|
9136
9107
|
|
|
9137
|
-
class GLU(Primitive):
|
|
9138
|
-
r"""
|
|
9139
|
-
Computes GLU (Gated Linear Unit activation function) of input tensors.
|
|
9140
|
-
|
|
9141
|
-
.. warning::
|
|
9142
|
-
This is an experimental API that is subject to change or deletion.
|
|
9143
|
-
|
|
9144
|
-
Refer to :func:`mindspore.ops.glu` for more details.
|
|
9145
|
-
|
|
9146
|
-
Args:
|
|
9147
|
-
axis (int, optional): Axis on which to split the input.
|
|
9148
|
-
The value of `axis` must be an int within range [-rank(`x`), rank(`x`)).
|
|
9149
|
-
Default: ``-1`` , specifying the last dimension.
|
|
9150
|
-
|
|
9151
|
-
Inputs:
|
|
9152
|
-
- **x** (Tensor) - Input tensor. `x.shape[axis]` must be even.
|
|
9153
|
-
|
|
9154
|
-
Outputs:
|
|
9155
|
-
Tensor, has the same data type with `x`.
|
|
9156
|
-
|
|
9157
|
-
Supported Platforms:
|
|
9158
|
-
``Ascend`` ``CPU``
|
|
9159
|
-
|
|
9160
|
-
Examples:
|
|
9161
|
-
>>> from mindspore import ops, Tensor
|
|
9162
|
-
>>> from mindspore import dtype as mstype
|
|
9163
|
-
>>> import numpy as np
|
|
9164
|
-
>>> axis = 0
|
|
9165
|
-
>>> x = Tensor(np.array([0.3220, 0.9545, 0.7879, 0.0975, 0.3698,
|
|
9166
|
-
... 0.5135, 0.5740, 0.3435, 0.1895, 0.8764,
|
|
9167
|
-
... 0.4980, 0.9673, 0.9879, 0.6988, 0.9022,
|
|
9168
|
-
... 0.9304, 0.1558, 0.0153, 0.1559, 0.9852]).reshape([2, 2, 5]), mstype.float32)
|
|
9169
|
-
>>> glu = ops.GLU(axis=axis)
|
|
9170
|
-
>>> y = glu(x)
|
|
9171
|
-
>>> print(y)
|
|
9172
|
-
[[[0.20028052 0.6916126 0.57412136 0.06512236 0.26307625]
|
|
9173
|
-
[0.3682598 0.3093122 0.17306386 0.10212085 0.63814086]]]
|
|
9174
|
-
"""
|
|
9175
|
-
|
|
9176
|
-
@prim_attr_register
|
|
9177
|
-
def __init__(self, axis=-1):
|
|
9178
|
-
"""Initialize GLU"""
|
|
9179
|
-
validator.check_value_type("axis", axis, [int], self.name)
|
|
9180
|
-
|
|
9181
|
-
|
|
9182
9108
|
class FractionalMaxPoolWithFixedKsize(Primitive):
|
|
9183
9109
|
r"""
|
|
9184
9110
|
Applies a 2D fractional max pooling to an input signal composed of multiple input planes.
|
|
@@ -9262,7 +9188,8 @@ class FractionalMaxPoolWithFixedKsize(Primitive):
|
|
|
9262
9188
|
class ChannelShuffle(Primitive):
|
|
9263
9189
|
r"""
|
|
9264
9190
|
Divide the channels in a tensor of shape :math:`(*, C, H, W)` into :math:`g` group and
|
|
9265
|
-
rearrange them as :math:`(*, \frac
|
|
9191
|
+
rearrange them as :math:`(*, \frac{C}{g}, g, H*W)`, while retaining the original tensor
|
|
9192
|
+
shape in the final output.
|
|
9266
9193
|
|
|
9267
9194
|
.. warning::
|
|
9268
9195
|
This is an experimental API that is subject to change or deletion.
|
|
@@ -9470,93 +9397,6 @@ class WKV(Primitive):
|
|
|
9470
9397
|
outputs=["output", "out_sp", "out_sq", "out_sm"])
|
|
9471
9398
|
|
|
9472
9399
|
|
|
9473
|
-
class PromptFlashAttention(Primitive):
|
|
9474
|
-
r"""
|
|
9475
|
-
The interface for fully inference.
|
|
9476
|
-
B -- Batch size
|
|
9477
|
-
S -- Sequence length
|
|
9478
|
-
H -- Hidden size
|
|
9479
|
-
|
|
9480
|
-
Note:
|
|
9481
|
-
experiment ops
|
|
9482
|
-
|
|
9483
|
-
.. warning::
|
|
9484
|
-
This is an experimental API that is subject to change or deletion.
|
|
9485
|
-
|
|
9486
|
-
Args:
|
|
9487
|
-
num_heads (int): The number of heads.
|
|
9488
|
-
scale_value (float): The scale value indicating the scale coefficient, which is used as the scalar of
|
|
9489
|
-
Muls in the calculation. Default: 1.0.
|
|
9490
|
-
pre_tokens (int): Previous tokens. Default: 2147483547.
|
|
9491
|
-
next_tokens (int): next tokens. Default: 0.
|
|
9492
|
-
indicate the upper triangle, Indicate the number of data blocks involved in the calculation. The value 0
|
|
9493
|
-
indicates that the data blocks in the upper triangle are not involved in the calculation
|
|
9494
|
-
input_layout (str): the data layout of the input qkv, support `(BSH)` and `(BNSD)`, Default `BSH`.
|
|
9495
|
-
num_key_value_heads (int): head numbers of key/value which are used in GQA algorithm.
|
|
9496
|
-
The value o indicates if the key and value have the same head nums, use numHeads. Default: 0.
|
|
9497
|
-
sparse_mode (int): Default: 0
|
|
9498
|
-
inner_precise (int): 0, float16 high precision. 1, high performance. default 1
|
|
9499
|
-
|
|
9500
|
-
Inputs:
|
|
9501
|
-
- **query** (Tensor) - The query tensor with data type of float16 or float32.
|
|
9502
|
-
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9503
|
-
- **key** (Tensor) - The key tensor with data type of float16 or float32.
|
|
9504
|
-
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9505
|
-
- **value** (Tensor) - The value tensor with data type of float16 or float32.
|
|
9506
|
-
Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9507
|
-
- **attn_mask** (Tensor) - The attention mask tensor with data type of float16 or float32.
|
|
9508
|
-
For each element, 0 indicates retention and 1 indicates discard. Input tensor of shape :math:`(B, 1, S, S)`.
|
|
9509
|
-
- **actual_seq_lengths** (Tensor): Describe actual sequence length of each input with data type of int64.
|
|
9510
|
-
- **actual_seq_lengths_kv** (Tensor): Describe actual sequence length of each input with data type of int64.
|
|
9511
|
-
- **pse_shift** (Tensor) - The position encoding tensor with data type of float16 or float32.
|
|
9512
|
-
- **dep_scale1** (Tensor)
|
|
9513
|
-
- **quant_scale1** (Tensor)
|
|
9514
|
-
- **deq_scale2** (Tensor)
|
|
9515
|
-
- **quant_scale2** (Tensor)
|
|
9516
|
-
- **quant_offset2** (Tensor)
|
|
9517
|
-
|
|
9518
|
-
Outputs:
|
|
9519
|
-
- **attention_out** (Tensor) - Input tensor of shape :math:`(B, S, H)` / `(B, N, S, D)`.
|
|
9520
|
-
|
|
9521
|
-
Supported Platforms:
|
|
9522
|
-
``Ascend``
|
|
9523
|
-
|
|
9524
|
-
Examples:
|
|
9525
|
-
>>> import mindspore.ops.operations.nn_ops as P
|
|
9526
|
-
>>> from mindspore import Tensor
|
|
9527
|
-
>>> import numpy as np
|
|
9528
|
-
>>> B = 1
|
|
9529
|
-
>>> N = 16
|
|
9530
|
-
>>> S = 256
|
|
9531
|
-
>>> D = 16
|
|
9532
|
-
>>> query = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
9533
|
-
>>> key = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
9534
|
-
>>> value = Tensor(np.ones((B, N, S, D), dtype=np.float16))
|
|
9535
|
-
>>> attn_mask = Tensor(np.ones((B, 1, S, S), dtype=np.float16))
|
|
9536
|
-
>>> pfa = P.PromptFlashAttention(N, input_layout='BNSD')
|
|
9537
|
-
>>> out = pfa(query, key, value, attn_mask, None, None, None, None, None, None, None, None)
|
|
9538
|
-
>>> print(out.shape)
|
|
9539
|
-
(1, 16, 256, 16)
|
|
9540
|
-
"""
|
|
9541
|
-
|
|
9542
|
-
@prim_attr_register
|
|
9543
|
-
def __init__(self, num_heads, scale_value=1.0, pre_tokens=214748647, next_tokens=0, input_layout='BSH',
|
|
9544
|
-
num_key_value_heads=0, sparse_mode=0, inner_precise=1):
|
|
9545
|
-
"""Initialize PromptFlashAttention."""
|
|
9546
|
-
validator.check_value_type('num_heads', num_heads, [int], self.name)
|
|
9547
|
-
validator.check_value_type('scale_value', scale_value, [float], self.name)
|
|
9548
|
-
validator.check_value_type('pre_tokens', pre_tokens, [int], self.name)
|
|
9549
|
-
validator.check_value_type('next_tokens', next_tokens, [int], self.name)
|
|
9550
|
-
validator.check_value_type('input_layout', input_layout, [str], self.name)
|
|
9551
|
-
validator.check_value_type('num_key_value_heads', num_key_value_heads, [int], self.name)
|
|
9552
|
-
validator.check_value_type('sparse_mode', sparse_mode, [int], self.name)
|
|
9553
|
-
validator.check_value_type('inner_precise', inner_precise, [int], self.name)
|
|
9554
|
-
self.init_prim_io_names(inputs=["query", "key", "value", "attn_mask", "actual_seq_lengths",
|
|
9555
|
-
"actual_seq_lengths_kv", "pse_shift", "deq_scale1", "quant_scale1",
|
|
9556
|
-
"deq_scale2", "quant_scale2", "quant_offset2"],
|
|
9557
|
-
outputs=["attention_out"])
|
|
9558
|
-
|
|
9559
|
-
|
|
9560
9400
|
class AllFinite(Primitive):
|
|
9561
9401
|
r"""
|
|
9562
9402
|
Check all gradients is finite.
|
|
@@ -9573,3 +9413,6 @@ class AllFinite(Primitive):
|
|
|
9573
9413
|
raise RuntimeError(
|
|
9574
9414
|
"The version of Ascend AI software package installed "
|
|
9575
9415
|
"in the current environment does not support AllFinite.")
|
|
9416
|
+
|
|
9417
|
+
def __call__(self, *args):
|
|
9418
|
+
return _convert_stub(pyboost_all_finite(self, args))
|