mindspore 2.4.1__cp310-cp310-win_amd64.whl → 2.5.0__cp310-cp310-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +8 -3
- mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +0 -5
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/compile_config.py +64 -0
- mindspore/_extends/parse/deprecated/__init__.py +0 -0
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
- mindspore/_extends/parse/parser.py +23 -5
- mindspore/_extends/parse/standard_method.py +123 -27
- mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
- mindspore/amp.py +7 -1
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/boost_cell_wrapper.py +136 -41
- mindspore/common/__init__.py +3 -1
- mindspore/common/_register_for_tensor.py +0 -1
- mindspore/common/_stub_tensor.py +25 -4
- mindspore/common/_tensor_cpp_method.py +17 -0
- mindspore/common/_tensor_docs.py +6132 -0
- mindspore/common/api.py +99 -25
- mindspore/common/dtype.py +34 -34
- mindspore/common/dump.py +2 -1
- mindspore/common/file_system.py +8 -1
- mindspore/common/generator.py +2 -0
- mindspore/common/hook_handle.py +3 -1
- mindspore/common/initializer.py +3 -4
- mindspore/common/lazy_inline.py +8 -2
- mindspore/common/mindir_util.py +10 -2
- mindspore/common/parameter.py +30 -27
- mindspore/common/tensor.py +713 -1337
- mindspore/communication/__init__.py +1 -1
- mindspore/communication/_comm_helper.py +10 -0
- mindspore/communication/comm_func.py +215 -173
- mindspore/communication/management.py +23 -20
- mindspore/context.py +292 -193
- mindspore/dataset/__init__.py +23 -19
- mindspore/dataset/callback/ds_callback.py +2 -1
- mindspore/dataset/core/config.py +84 -3
- mindspore/dataset/engine/cache_admin.py +3 -3
- mindspore/dataset/engine/cache_client.py +5 -4
- mindspore/dataset/engine/datasets.py +192 -149
- mindspore/dataset/engine/datasets_audio.py +14 -0
- mindspore/dataset/engine/datasets_standard_format.py +28 -11
- mindspore/dataset/engine/datasets_text.py +38 -1
- mindspore/dataset/engine/datasets_user_defined.py +125 -65
- mindspore/dataset/engine/datasets_vision.py +81 -8
- mindspore/dataset/engine/iterators.py +281 -63
- mindspore/dataset/engine/obs/util.py +8 -0
- mindspore/dataset/engine/queue.py +40 -0
- mindspore/dataset/engine/samplers.py +26 -2
- mindspore/dataset/engine/serializer_deserializer.py +1 -1
- mindspore/dataset/engine/validators.py +43 -11
- mindspore/dataset/transforms/py_transforms_util.py +17 -0
- mindspore/dataset/transforms/transforms.py +29 -12
- mindspore/dataset/vision/validators.py +1 -2
- mindspore/device_context/__init__.py +21 -0
- mindspore/device_context/ascend/__init__.py +25 -0
- mindspore/device_context/ascend/device.py +72 -0
- mindspore/device_context/ascend/op_debug.py +94 -0
- mindspore/device_context/ascend/op_precision.py +193 -0
- mindspore/device_context/ascend/op_tuning.py +127 -0
- mindspore/device_context/cpu/__init__.py +25 -0
- mindspore/device_context/cpu/device.py +62 -0
- mindspore/device_context/cpu/op_tuning.py +43 -0
- mindspore/device_context/gpu/__init__.py +21 -0
- mindspore/device_context/gpu/device.py +70 -0
- mindspore/device_context/gpu/op_precision.py +67 -0
- mindspore/device_context/gpu/op_tuning.py +175 -0
- mindspore/device_manager.py +134 -0
- mindspore/dnnl.dll +0 -0
- mindspore/experimental/llm_boost/__init__.py +3 -2
- mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
- mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
- mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
- mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
- mindspore/experimental/llm_boost/register.py +1 -0
- mindspore/experimental/optim/adadelta.py +26 -22
- mindspore/experimental/optim/adam.py +3 -0
- mindspore/experimental/optim/lr_scheduler.py +33 -24
- mindspore/experimental/optim/radam.py +33 -30
- mindspore/hal/device.py +28 -0
- mindspore/hal/event.py +17 -0
- mindspore/hal/memory.py +94 -3
- mindspore/hal/stream.py +91 -6
- mindspore/include/api/context.h +1 -2
- mindspore/include/dataset/constants.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/log.py +12 -0
- mindspore/mindrecord/__init__.py +1 -1
- mindspore/mindrecord/config.py +17 -316
- mindspore/mindrecord/filereader.py +1 -9
- mindspore/mindrecord/filewriter.py +5 -15
- mindspore/mindrecord/mindpage.py +1 -9
- mindspore/mindspore_backend.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/mint/__init__.py +824 -218
- mindspore/mint/distributed/__init__.py +66 -4
- mindspore/mint/distributed/distributed.py +2594 -44
- mindspore/mint/linalg/__init__.py +6 -0
- mindspore/mint/nn/__init__.py +473 -14
- mindspore/mint/nn/functional.py +486 -11
- mindspore/mint/nn/layer/__init__.py +17 -4
- mindspore/mint/nn/layer/_functions.py +330 -0
- mindspore/mint/nn/layer/activation.py +169 -1
- mindspore/mint/nn/layer/basic.py +123 -0
- mindspore/mint/nn/layer/conv.py +727 -0
- mindspore/mint/nn/layer/normalization.py +215 -19
- mindspore/mint/nn/layer/padding.py +797 -0
- mindspore/mint/nn/layer/pooling.py +170 -0
- mindspore/mint/optim/__init__.py +2 -1
- mindspore/mint/optim/adam.py +223 -0
- mindspore/mint/optim/adamw.py +26 -19
- mindspore/mint/special/__init__.py +2 -1
- mindspore/multiprocessing/__init__.py +5 -0
- mindspore/nn/__init__.py +2 -0
- mindspore/nn/cell.py +142 -21
- mindspore/nn/dynamic_lr.py +2 -1
- mindspore/nn/layer/activation.py +6 -6
- mindspore/nn/layer/basic.py +35 -25
- mindspore/nn/layer/channel_shuffle.py +3 -3
- mindspore/nn/layer/conv.py +3 -0
- mindspore/nn/layer/embedding.py +3 -3
- mindspore/nn/layer/normalization.py +8 -7
- mindspore/nn/layer/padding.py +4 -3
- mindspore/nn/layer/pooling.py +55 -23
- mindspore/nn/layer/rnn_cells.py +1 -1
- mindspore/nn/layer/rnns.py +2 -1
- mindspore/nn/layer/timedistributed.py +5 -5
- mindspore/nn/layer/transformer.py +48 -26
- mindspore/nn/learning_rate_schedule.py +5 -3
- mindspore/nn/loss/loss.py +31 -36
- mindspore/nn/optim/ada_grad.py +1 -0
- mindspore/nn/optim/adadelta.py +2 -2
- mindspore/nn/optim/adam.py +1 -1
- mindspore/nn/optim/lars.py +1 -4
- mindspore/nn/optim/optimizer.py +1 -1
- mindspore/nn/optim/rprop.py +2 -2
- mindspore/nn/optim/thor.py +2 -1
- mindspore/nn/utils/__init__.py +22 -0
- mindspore/nn/utils/init.py +73 -0
- mindspore/nn/wrap/cell_wrapper.py +4 -6
- mindspore/nn/wrap/loss_scale.py +3 -4
- mindspore/numpy/array_creations.py +60 -62
- mindspore/numpy/array_ops.py +148 -143
- mindspore/numpy/logic_ops.py +41 -42
- mindspore/numpy/math_ops.py +361 -359
- mindspore/numpy/utils.py +16 -16
- mindspore/numpy/utils_const.py +4 -4
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/opencv_imgproc452.dll +0 -0
- mindspore/ops/__init__.py +2 -1
- mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
- mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
- mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
- mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
- mindspore/ops/_op_impl/cpu/__init__.py +1 -0
- mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
- mindspore/ops/_vmap/vmap_array_ops.py +20 -19
- mindspore/ops/_vmap/vmap_base.py +0 -2
- mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
- mindspore/ops/_vmap/vmap_math_ops.py +11 -9
- mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
- mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
- mindspore/ops/auto_generate/gen_extend_func.py +554 -60
- mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
- mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
- mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
- mindspore/ops/composite/base.py +1 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
- mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
- mindspore/ops/function/__init__.py +12 -0
- mindspore/ops/function/array_func.py +561 -159
- mindspore/ops/function/clip_func.py +64 -0
- mindspore/ops/function/debug_func.py +28 -20
- mindspore/ops/function/image_func.py +1 -1
- mindspore/ops/function/linalg_func.py +5 -4
- mindspore/ops/function/math_func.py +1664 -294
- mindspore/ops/function/nn_func.py +988 -317
- mindspore/ops/function/parameter_func.py +3 -56
- mindspore/ops/function/random_func.py +243 -33
- mindspore/ops/function/sparse_unary_func.py +1 -1
- mindspore/ops/functional.py +18 -5
- mindspore/ops/functional_overload.py +897 -0
- mindspore/ops/operations/__init__.py +3 -2
- mindspore/ops/operations/_embedding_cache_ops.py +4 -4
- mindspore/ops/operations/_grad_ops.py +2 -34
- mindspore/ops/operations/_infer_ops.py +2 -1
- mindspore/ops/operations/_inner_ops.py +38 -8
- mindspore/ops/operations/array_ops.py +45 -303
- mindspore/ops/operations/comm_ops.py +23 -17
- mindspore/ops/operations/custom_ops.py +7 -49
- mindspore/ops/operations/debug_ops.py +42 -47
- mindspore/ops/operations/inner_ops.py +6 -4
- mindspore/ops/operations/linalg_ops.py +3 -2
- mindspore/ops/operations/manually_defined/ops_def.py +185 -104
- mindspore/ops/operations/math_ops.py +11 -216
- mindspore/ops/operations/nn_ops.py +153 -310
- mindspore/ops/primitive.py +23 -21
- mindspore/ops/tensor_method.py +1669 -0
- mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
- mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
- mindspore/ops_generate/arg_handler.py +0 -61
- mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
- mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
- mindspore/ops_generate/base_generator.py +11 -0
- mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
- mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
- mindspore/ops_generate/functional_overload_py_generator.py +110 -0
- mindspore/ops_generate/functions_cc_generator.py +233 -0
- mindspore/ops_generate/gen_aclnn_implement.py +110 -114
- mindspore/ops_generate/gen_constants.py +157 -3
- mindspore/ops_generate/gen_ops.py +245 -990
- mindspore/ops_generate/gen_pyboost_func.py +97 -998
- mindspore/ops_generate/gen_utils.py +119 -33
- mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
- mindspore/ops_generate/op_api_proto.py +206 -0
- mindspore/ops_generate/op_def_py_generator.py +131 -0
- mindspore/ops_generate/op_prim_py_generator.py +480 -0
- mindspore/ops_generate/op_proto.py +373 -108
- mindspore/ops_generate/op_template_parser.py +436 -0
- mindspore/ops_generate/ops_def_cc_generator.py +288 -0
- mindspore/ops_generate/ops_def_h_generator.py +74 -0
- mindspore/ops_generate/ops_name_h_generator.py +68 -0
- mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
- mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
- mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
- mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
- mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
- mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
- mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
- mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
- mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
- mindspore/ops_generate/pyboost_utils.py +92 -33
- mindspore/ops_generate/template.py +294 -44
- mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
- mindspore/parallel/__init__.py +3 -3
- mindspore/parallel/_auto_parallel_context.py +44 -34
- mindspore/parallel/_cell_wrapper.py +22 -3
- mindspore/parallel/_parallel_serialization.py +13 -2
- mindspore/parallel/_utils.py +4 -2
- mindspore/parallel/algo_parameter_config.py +1 -1
- mindspore/parallel/checkpoint_transform.py +44 -0
- mindspore/parallel/cluster/process_entity/_api.py +131 -37
- mindspore/parallel/cluster/process_entity/_utils.py +41 -6
- mindspore/parallel/cluster/run.py +20 -3
- mindspore/parallel/parameter_broadcast.py +1 -1
- mindspore/parallel/shard.py +3 -0
- mindspore/parallel/transform_safetensors.py +119 -253
- mindspore/profiler/__init__.py +17 -4
- mindspore/profiler/analysis/__init__.py +0 -0
- mindspore/profiler/analysis/parser/__init__.py +0 -0
- mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
- mindspore/profiler/analysis/parser/base_parser.py +158 -0
- mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
- mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
- mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
- mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
- mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
- mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
- mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
- mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
- mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
- mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
- mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
- mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
- mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
- mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
- mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
- mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
- mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
- mindspore/profiler/analysis/task_manager.py +131 -0
- mindspore/profiler/analysis/time_converter.py +84 -0
- mindspore/profiler/analysis/viewer/__init__.py +0 -0
- mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
- mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
- mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
- mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
- mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
- mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
- mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
- mindspore/profiler/analysis/work_flow.py +73 -0
- mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
- mindspore/profiler/common/command_executor.py +90 -0
- mindspore/profiler/common/constant.py +174 -3
- mindspore/profiler/common/file_manager.py +208 -0
- mindspore/profiler/common/log.py +130 -0
- mindspore/profiler/common/msprof_cmd_tool.py +202 -0
- mindspore/profiler/common/path_manager.py +371 -0
- mindspore/profiler/common/process_bar.py +168 -0
- mindspore/profiler/common/process_pool.py +9 -3
- mindspore/profiler/common/profiler_context.py +476 -0
- mindspore/profiler/common/profiler_info.py +304 -0
- mindspore/profiler/common/profiler_output_path.py +284 -0
- mindspore/profiler/common/profiler_parameters.py +210 -0
- mindspore/profiler/common/profiler_path_manager.py +120 -0
- mindspore/profiler/common/record_function.py +76 -0
- mindspore/profiler/common/tlv_decoder.py +76 -0
- mindspore/profiler/common/util.py +75 -2
- mindspore/profiler/dynamic_profiler.py +270 -37
- mindspore/profiler/envprofiler.py +138 -0
- mindspore/profiler/mstx.py +199 -0
- mindspore/profiler/platform/__init__.py +21 -0
- mindspore/profiler/platform/base_profiler.py +40 -0
- mindspore/profiler/platform/cpu_profiler.py +124 -0
- mindspore/profiler/platform/gpu_profiler.py +74 -0
- mindspore/profiler/platform/npu_profiler.py +309 -0
- mindspore/profiler/profiler.py +580 -93
- mindspore/profiler/profiler_action_controller.py +187 -0
- mindspore/profiler/profiler_interface.py +114 -0
- mindspore/profiler/schedule.py +208 -0
- mindspore/rewrite/api/symbol_tree.py +1 -2
- mindspore/run_check/_check_version.py +18 -13
- mindspore/runtime/__init__.py +37 -0
- mindspore/runtime/device.py +27 -0
- mindspore/runtime/event.py +209 -0
- mindspore/runtime/executor.py +148 -0
- mindspore/runtime/memory.py +392 -0
- mindspore/runtime/stream.py +460 -0
- mindspore/runtime/thread_bind_core.py +401 -0
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/train/__init__.py +2 -2
- mindspore/train/_utils.py +53 -18
- mindspore/train/amp.py +8 -4
- mindspore/train/callback/_checkpoint.py +32 -18
- mindspore/train/callback/_early_stop.py +1 -1
- mindspore/train/callback/_flops_collector.py +105 -69
- mindspore/train/callback/_history.py +1 -1
- mindspore/train/callback/_summary_collector.py +44 -6
- mindspore/train/callback/_tft_register.py +37 -15
- mindspore/train/dataset_helper.py +11 -11
- mindspore/train/metrics/precision.py +4 -5
- mindspore/train/mind_ir_pb2.py +167 -46
- mindspore/train/model.py +13 -14
- mindspore/train/serialization.py +461 -72
- mindspore/train/summary/summary_record.py +1 -2
- mindspore/train/train_thor/model_thor.py +1 -1
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +4 -2
- mindspore/utils/dryrun.py +138 -0
- mindspore/utils/runtime_execution_order_check.py +550 -0
- mindspore/version.py +1 -1
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +368 -242
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
- mindspore/common/_tensor_overload.py +0 -139
- mindspore/mindspore_np_dtype.dll +0 -0
- mindspore/profiler/envprofiling.py +0 -254
- mindspore/profiler/profiling.py +0 -1926
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
- {mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0
|
@@ -15,11 +15,14 @@
|
|
|
15
15
|
"""llm boost"""
|
|
16
16
|
import json
|
|
17
17
|
import mindspore.common.dtype as mstype
|
|
18
|
-
from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
|
|
18
|
+
from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase, NormType
|
|
19
19
|
from mindspore._c_expression import LlmBoostBinder
|
|
20
20
|
from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
|
|
21
21
|
|
|
22
22
|
|
|
23
|
+
CPP_QWEN_MODEL_CLASS_NAME = "qwen_QwenDecoderModel"
|
|
24
|
+
|
|
25
|
+
|
|
23
26
|
@LlmBoostRegister.register(LlmBoostType.BUILDIN, "Qwen")
|
|
24
27
|
class QwenBoost(AtbBoostBase):
|
|
25
28
|
"""QwenBoost class"""
|
|
@@ -30,9 +33,11 @@ class QwenBoost(AtbBoostBase):
|
|
|
30
33
|
self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
|
|
31
34
|
self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
|
|
32
35
|
self.atb_encoder_operation = LlmBoostBinder(
|
|
33
|
-
|
|
36
|
+
self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
|
|
37
|
+
)
|
|
34
38
|
self.atb_decoder_operation = LlmBoostBinder(
|
|
35
|
-
|
|
39
|
+
self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
|
|
40
|
+
)
|
|
36
41
|
|
|
37
42
|
def init(self):
|
|
38
43
|
"""set param"""
|
|
@@ -42,24 +47,43 @@ class QwenBoost(AtbBoostBase):
|
|
|
42
47
|
"withEmbedding": True,
|
|
43
48
|
"isEmbeddingParallel": True,
|
|
44
49
|
"isLmHeadParallel": True,
|
|
45
|
-
"linearTransposeType": [
|
|
50
|
+
"linearTransposeType": [
|
|
51
|
+
[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
|
|
52
|
+
],
|
|
46
53
|
"lmHeadTransposeType": 1,
|
|
47
|
-
"
|
|
48
|
-
"
|
|
54
|
+
"enableSwiGLU": not self.need_nz,
|
|
55
|
+
"normEps": self.config.rms_norm_eps,
|
|
56
|
+
"normType": NormType.RMS_NORM,
|
|
49
57
|
"numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
|
|
50
58
|
"hiddenSizePerAttentionHead": self.head_dim,
|
|
51
59
|
"numHiddenLayers": self.num_layers,
|
|
52
60
|
"numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
|
|
53
61
|
"rank": self.rank_id,
|
|
54
62
|
"worldSize": self.device_num,
|
|
55
|
-
"backend":
|
|
63
|
+
"backend": self.config.communication_backend,
|
|
56
64
|
"packQuantType": [[1, 1] for _ in range(self.num_layers)],
|
|
57
|
-
"linearQuantType": [
|
|
58
|
-
|
|
65
|
+
"linearQuantType": [
|
|
66
|
+
[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
|
|
67
|
+
],
|
|
68
|
+
"linearHasBias": [[True, False, False, False]] * self.num_layers,
|
|
69
|
+
"enableKvQuant": self.kv_quant is not None,
|
|
70
|
+
"enableLora": False,
|
|
71
|
+
"isUnpadInputs": True,
|
|
72
|
+
"enableAddNorm": False,
|
|
73
|
+
}
|
|
74
|
+
encoder_param = {
|
|
75
|
+
**param_dict,
|
|
76
|
+
"isPrefill": True,
|
|
77
|
+
"enableLcoc": False,
|
|
78
|
+
"enableSplitFuse": False,
|
|
79
|
+
}
|
|
80
|
+
decoder_param = {
|
|
81
|
+
**param_dict,
|
|
82
|
+
"isPrefill": False,
|
|
83
|
+
"enableLcoc": False,
|
|
84
|
+
"enableSpeculate": False,
|
|
85
|
+
"enablePrefixCache": False,
|
|
59
86
|
}
|
|
60
|
-
encoder_param = {**param_dict, "isPrefill": True, "supportLcoc": False}
|
|
61
|
-
decoder_param = {**param_dict, "isPrefill": False,
|
|
62
|
-
"supportLcoc": False, "supportSpeculate": False}
|
|
63
87
|
self.atb_encoder_operation.init(json.dumps({**encoder_param}))
|
|
64
88
|
self.atb_decoder_operation.init(json.dumps({**decoder_param}))
|
|
65
89
|
|
|
@@ -79,13 +103,14 @@ class QwenBoost(AtbBoostBase):
|
|
|
79
103
|
**kwargs
|
|
80
104
|
):
|
|
81
105
|
"""prepare inputs"""
|
|
82
|
-
self.acl_param = json.dumps(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
106
|
+
self.acl_param = json.dumps(
|
|
107
|
+
{
|
|
108
|
+
"seqLen": seqLen,
|
|
109
|
+
}
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
self.acl_decoder_operation_inputs[0] = input_ids
|
|
113
|
+
self.acl_decoder_operation_inputs[1] = position_ids
|
|
89
114
|
self.acl_decoder_operation_inputs[2] = cos_embed
|
|
90
115
|
self.acl_decoder_operation_inputs[3] = sin_embed
|
|
91
116
|
self.acl_decoder_operation_inputs[4] = attention_mask
|
|
@@ -93,9 +118,7 @@ class QwenBoost(AtbBoostBase):
|
|
|
93
118
|
self.acl_decoder_operation_inputs[6] = slots
|
|
94
119
|
self.acl_decoder_operation_inputs[7] = self.placeholder
|
|
95
120
|
self.acl_decoder_operation_inputs[8] = self.placeholder
|
|
96
|
-
self.acl_decoder_operation_inputs[9] = self.
|
|
97
|
-
|
|
98
|
-
self.acl_decoder_operation_inputs[
|
|
99
|
-
lm_head_indices, mstype.int64)
|
|
100
|
-
self.acl_decoder_operation_inputs[11] = self.placeholder
|
|
121
|
+
self.acl_decoder_operation_inputs[9] = self.placeholder
|
|
122
|
+
self.acl_decoder_operation_inputs[10] = input_lengths
|
|
123
|
+
self.acl_decoder_operation_inputs[11] = lm_head_indices
|
|
101
124
|
return self.acl_decoder_operation_inputs, self.acl_param
|
|
@@ -37,28 +37,32 @@ class Adadelta(Optimizer):
|
|
|
37
37
|
Implements Adadelta algorithm.
|
|
38
38
|
|
|
39
39
|
.. math::
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
&\textbf{
|
|
46
|
-
\:
|
|
47
|
-
|
|
48
|
-
&\textbf{
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
&\
|
|
54
|
-
|
|
55
|
-
&\
|
|
56
|
-
|
|
57
|
-
&\
|
|
58
|
-
&\
|
|
59
|
-
&\
|
|
60
|
-
&\
|
|
61
|
-
|
|
40
|
+
\newcommand{\grad}[2]{\nabla_{#1} f_{#2}(#2_{#2 - 1})}
|
|
41
|
+
\newcommand{\updateVar}[3]{#1_{#2} \leftarrow #1_{#2 - 1} \rho + #3_{#2} (1 - \rho)}
|
|
42
|
+
|
|
43
|
+
\begin{align*}
|
|
44
|
+
&\rule{150mm}{0.4pt} \\
|
|
45
|
+
&\textbf{Input}:
|
|
46
|
+
\gamma \text{ (lr)}, \: \theta_0 \text{ (params)}, \: f(\theta) \text{ (objective)},
|
|
47
|
+
\: \rho \text{ (decay)}, \: \lambda \text{ (weight decay)} \\
|
|
48
|
+
&\textbf{Initialize}:
|
|
49
|
+
\begin{cases}
|
|
50
|
+
v_0 \leftarrow 0 \text{ (square avg)} \\
|
|
51
|
+
u_0 \leftarrow 0 \text{ (accumulate variables)}
|
|
52
|
+
\end{cases} \\
|
|
53
|
+
&\rule{110mm}{0.4pt} \\
|
|
54
|
+
&\textbf{For } t = 1 \text{ to } \ldots \text{ do}: \\
|
|
55
|
+
&\quad g_t \leftarrow \grad{\theta}{t} \\
|
|
56
|
+
&\quad \text{If } \lambda \neq 0: \\
|
|
57
|
+
&\quad\quad g_t \leftarrow g_t + \lambda \theta_{t - 1} \\
|
|
58
|
+
&\quad v_t \leftarrow \updateVar{v}{t}{g^2} \\
|
|
59
|
+
&\quad \Delta x_t \leftarrow \frac{\sqrt{u_{t - 1} + \epsilon}}{\sqrt{v_t + \epsilon}} g_t \\
|
|
60
|
+
&\quad u_t \leftarrow \updateVar{u}{t}{\Delta x^2} \\
|
|
61
|
+
&\quad \theta_t \leftarrow \theta_{t - 1} - \gamma \Delta x_t \\
|
|
62
|
+
&\rule{110mm}{0.4pt} \\
|
|
63
|
+
&\bf{Return}: \theta_t \\
|
|
64
|
+
&\rule{110mm}{0.4pt}
|
|
65
|
+
\end{align*}
|
|
62
66
|
|
|
63
67
|
.. warning::
|
|
64
68
|
This is an experimental optimizer API that is subject to change.
|
|
@@ -78,6 +78,9 @@ class Adam(Optimizer):
|
|
|
78
78
|
\end{aligned}
|
|
79
79
|
|
|
80
80
|
.. warning::
|
|
81
|
+
The implementation formula of this optimizer interface is not completely consistent with that in the paper.
|
|
82
|
+
If you want to use an interface that is completely consistent, it is recommended to use
|
|
83
|
+
:class:`mindspore.mint.optim.Adam`, which currently only supports Ascend.
|
|
81
84
|
This is an experimental optimizer API that is subject to change.
|
|
82
85
|
This module must be used with lr scheduler module in `LRScheduler Class
|
|
83
86
|
<https://www.mindspore.cn/docs/en/master/api_python/mindspore.nn.html#learningrateschedule-class>`_ .
|
|
@@ -24,7 +24,6 @@ from mindspore.ops import functional as F
|
|
|
24
24
|
from mindspore.ops import operations as P
|
|
25
25
|
from mindspore import _checkparam as Validator
|
|
26
26
|
|
|
27
|
-
|
|
28
27
|
__all__ = ['StepLR', 'LinearLR', 'LRScheduler', 'ExponentialLR', 'PolynomialLR',
|
|
29
28
|
'MultiplicativeLR', 'ConstantLR', 'MultiStepLR', 'LambdaLR', 'SequentialLR', 'ReduceLROnPlateau',
|
|
30
29
|
'CyclicLR', 'CosineAnnealingWarmRestarts', 'CosineAnnealingLR']
|
|
@@ -82,6 +81,7 @@ class LRScheduler:
|
|
|
82
81
|
[Tensor(shape=[], dtype=Float32, value= 0.01)]
|
|
83
82
|
[Tensor(shape=[], dtype=Float32, value= 0.01)]
|
|
84
83
|
"""
|
|
84
|
+
|
|
85
85
|
def __init__(self, optimizer, last_epoch=-1):
|
|
86
86
|
if not isinstance(optimizer, Optimizer):
|
|
87
87
|
raise TypeError('{} is not an Optimizer'.format(
|
|
@@ -192,6 +192,7 @@ class StepLR(LRScheduler):
|
|
|
192
192
|
... scheduler.step()
|
|
193
193
|
... current_lr = scheduler.get_last_lr()
|
|
194
194
|
"""
|
|
195
|
+
|
|
195
196
|
def __init__(self, optimizer, step_size, gamma=0.1, last_epoch=-1):
|
|
196
197
|
if not isinstance(step_size, int) and not isinstance(step_size, bool):
|
|
197
198
|
raise TypeError(f"For 'StepLR', the 'step_size' must be int, but got {type(step_size)}.")
|
|
@@ -297,8 +298,8 @@ class LinearLR(LRScheduler):
|
|
|
297
298
|
if self.last_epoch > self.total_iters:
|
|
298
299
|
return [lr * 1. for lr in self._last_lr]
|
|
299
300
|
|
|
300
|
-
factor = 1. + (self.end_factor - self.start_factor) /
|
|
301
|
-
|
|
301
|
+
factor = 1. + (self.end_factor - self.start_factor) / \
|
|
302
|
+
(self.total_iters * self.start_factor + (self.last_epoch - 1) * (self.end_factor - self.start_factor))
|
|
302
303
|
return [lr * factor for lr in self._last_lr]
|
|
303
304
|
|
|
304
305
|
def _get_closed_form_lr(self):
|
|
@@ -419,6 +420,7 @@ class PolynomialLR(LRScheduler):
|
|
|
419
420
|
[Tensor(shape=[], dtype=Float32, value= 0)]
|
|
420
421
|
[Tensor(shape=[], dtype=Float32, value= 0)]
|
|
421
422
|
"""
|
|
423
|
+
|
|
422
424
|
def __init__(self, optimizer, total_iters=5, power=1.0, last_epoch=-1):
|
|
423
425
|
if not isinstance(power, float):
|
|
424
426
|
raise TypeError(f"For 'PolynomialLR', the 'power' must be float, but got {type(power)}.")
|
|
@@ -435,8 +437,8 @@ class PolynomialLR(LRScheduler):
|
|
|
435
437
|
def get_lr(self):
|
|
436
438
|
if self.last_epoch == 0 or self.last_epoch > self.total_iters:
|
|
437
439
|
return [lr * 1. for lr in self._last_lr]
|
|
438
|
-
factor = ((1.0 - self.last_epoch / self.total_iters) /
|
|
439
|
-
|
|
440
|
+
factor = ((1.0 - self.last_epoch / self.total_iters) /
|
|
441
|
+
(1.0 - (self.last_epoch - 1) / self.total_iters)) ** self.power
|
|
440
442
|
return [lr * factor for lr in self._last_lr]
|
|
441
443
|
|
|
442
444
|
def _get_closed_form_lr(self):
|
|
@@ -483,14 +485,16 @@ class LambdaLR(LRScheduler):
|
|
|
483
485
|
[Tensor(shape=[], dtype=Float32, value= 0.0081)]
|
|
484
486
|
[Tensor(shape=[], dtype=Float32, value= 0.00729)]
|
|
485
487
|
"""
|
|
488
|
+
|
|
486
489
|
def __init__(self, optimizer, lr_lambda, last_epoch=-1):
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
if len(lr_lambda) != len(optimizer.param_groups):
|
|
490
|
+
param_groups_length = len(optimizer.param_groups)
|
|
491
|
+
if isinstance(lr_lambda, (list, tuple)):
|
|
492
|
+
if len(lr_lambda) != param_groups_length:
|
|
491
493
|
raise ValueError("Expected {} lr_lambdas, but got {}".format(
|
|
492
|
-
|
|
494
|
+
param_groups_length, len(lr_lambda)))
|
|
493
495
|
self.lr_lambdas = list(lr_lambda)
|
|
496
|
+
else:
|
|
497
|
+
self.lr_lambdas = [lr_lambda] * param_groups_length
|
|
494
498
|
super(LambdaLR, self).__init__(optimizer, last_epoch)
|
|
495
499
|
|
|
496
500
|
def get_lr(self):
|
|
@@ -533,14 +537,16 @@ class MultiplicativeLR(LRScheduler):
|
|
|
533
537
|
[Tensor(shape=[], dtype=Float32, value= 0.009025)]
|
|
534
538
|
[Tensor(shape=[], dtype=Float32, value= 0.00857375)]
|
|
535
539
|
"""
|
|
540
|
+
|
|
536
541
|
def __init__(self, optimizer, lr_lambda, last_epoch=-1):
|
|
537
|
-
if
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
542
|
+
if isinstance(lr_lambda, (list, tuple)):
|
|
543
|
+
if len(lr_lambda) == len(optimizer.param_groups):
|
|
544
|
+
self.lr_lambdas = list(lr_lambda)
|
|
545
|
+
else:
|
|
541
546
|
raise ValueError("Expected {} lr_lambdas, but got {}".format(
|
|
542
547
|
len(optimizer.param_groups), len(lr_lambda)))
|
|
543
|
-
|
|
548
|
+
else:
|
|
549
|
+
self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups)
|
|
544
550
|
super(MultiplicativeLR, self).__init__(optimizer, last_epoch)
|
|
545
551
|
|
|
546
552
|
def get_lr(self):
|
|
@@ -599,6 +605,7 @@ class MultiStepLR(LRScheduler):
|
|
|
599
605
|
[Tensor(shape=[], dtype=Float32, value= 0.0005)]
|
|
600
606
|
[Tensor(shape=[], dtype=Float32, value= 0.0005)]
|
|
601
607
|
"""
|
|
608
|
+
|
|
602
609
|
def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1):
|
|
603
610
|
Validator.check_value_type('milestones', milestones, [list])
|
|
604
611
|
for milestone in milestones:
|
|
@@ -668,6 +675,7 @@ class ConstantLR(LRScheduler):
|
|
|
668
675
|
[Tensor(shape=[], dtype=Float32, value= 0.05)]
|
|
669
676
|
[Tensor(shape=[], dtype=Float32, value= 0.05)]
|
|
670
677
|
"""
|
|
678
|
+
|
|
671
679
|
def __init__(self, optimizer, factor=1.0 / 3, total_iters=5, last_epoch=-1):
|
|
672
680
|
if factor > 1.0 or factor < 0:
|
|
673
681
|
raise ValueError('Constant multiplicative factor expected to be between 0 and 1.')
|
|
@@ -735,6 +743,7 @@ class SequentialLR:
|
|
|
735
743
|
[Tensor(shape=[], dtype=Float32, value= 0.0729)]
|
|
736
744
|
[Tensor(shape=[], dtype=Float32, value= 0.06561)]
|
|
737
745
|
"""
|
|
746
|
+
|
|
738
747
|
def __init__(self, optimizer, schedulers, milestones, last_epoch=-1):
|
|
739
748
|
for sched_idx in range(len(schedulers)):
|
|
740
749
|
if schedulers[sched_idx].optimizer != optimizer:
|
|
@@ -863,6 +872,7 @@ class ReduceLROnPlateau:
|
|
|
863
872
|
[Tensor(shape=[], dtype=Float32, value= 0.001)]
|
|
864
873
|
[Tensor(shape=[], dtype=Float32, value= 0.0001)]
|
|
865
874
|
"""
|
|
875
|
+
|
|
866
876
|
def __init__(self, optimizer, mode='min', factor=0.1, patience=10,
|
|
867
877
|
threshold=1e-4, threshold_mode='rel', cooldown=0,
|
|
868
878
|
min_lr=0, eps=1e-8):
|
|
@@ -1053,6 +1063,7 @@ class CyclicLR(LRScheduler):
|
|
|
1053
1063
|
[Tensor(shape=[], dtype=Float32, value= 0.01018)]
|
|
1054
1064
|
[Tensor(shape=[], dtype=Float32, value= 0.010225)]
|
|
1055
1065
|
"""
|
|
1066
|
+
|
|
1056
1067
|
def __init__(self,
|
|
1057
1068
|
optimizer,
|
|
1058
1069
|
base_lr,
|
|
@@ -1127,12 +1138,12 @@ class CyclicLR(LRScheduler):
|
|
|
1127
1138
|
def _triangular_scale_fn(self, x):
|
|
1128
1139
|
return 1.
|
|
1129
1140
|
|
|
1130
|
-
def _triangular2_scale_fn(self, x):
|
|
1131
|
-
return 1 / (2. ** (x - 1))
|
|
1132
|
-
|
|
1133
1141
|
def _exp_range_scale_fn(self, x):
|
|
1134
1142
|
return self.gamma ** (x)
|
|
1135
1143
|
|
|
1144
|
+
def _triangular2_scale_fn(self, x):
|
|
1145
|
+
return 1 / (2. ** (x - 1))
|
|
1146
|
+
|
|
1136
1147
|
def get_lr(self):
|
|
1137
1148
|
cycle = self.floor(1 + self.last_epoch / self.total_step_size)
|
|
1138
1149
|
x = 1. + self.last_epoch / self.total_step_size - cycle
|
|
@@ -1143,13 +1154,9 @@ class CyclicLR(LRScheduler):
|
|
|
1143
1154
|
lrs = []
|
|
1144
1155
|
for base_lr, max_lr in zip(self.base_lrs, self.max_lrs):
|
|
1145
1156
|
base_height = (max_lr - base_lr) * scale_factor
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
lr = base_lr + base_height * self.scale_fn(cycle)
|
|
1149
|
-
else:
|
|
1150
|
-
lr = base_lr + base_height * self.scale_fn(self.last_epoch)
|
|
1157
|
+
cycle_or_epoch = cycle if self.scale_mode == 'cycle' else self.last_epoch
|
|
1158
|
+
lr = base_lr + base_height * self.scale_fn(cycle_or_epoch)
|
|
1151
1159
|
lrs.append(lr)
|
|
1152
|
-
|
|
1153
1160
|
return lrs
|
|
1154
1161
|
|
|
1155
1162
|
|
|
@@ -1211,6 +1218,7 @@ class CosineAnnealingWarmRestarts(LRScheduler):
|
|
|
1211
1218
|
[Tensor(shape=[], dtype=Float32, value= 0.025)]
|
|
1212
1219
|
[Tensor(shape=[], dtype=Float32, value= 0.00669873)]
|
|
1213
1220
|
"""
|
|
1221
|
+
|
|
1214
1222
|
def __init__(self, optimizer, T_0, T_mult=1, eta_min=0, last_epoch=-1):
|
|
1215
1223
|
if T_0 <= 0 or not isinstance(T_0, int):
|
|
1216
1224
|
raise ValueError("T_0 should be an integer and equal or greater than 0, but got {}".format(T_0))
|
|
@@ -1336,6 +1344,7 @@ class CosineAnnealingLR(LRScheduler):
|
|
|
1336
1344
|
[Tensor(shape=[], dtype=Float32, value= 0.05)]
|
|
1337
1345
|
[Tensor(shape=[], dtype=Float32, value= 0)]
|
|
1338
1346
|
"""
|
|
1347
|
+
|
|
1339
1348
|
def __init__(self, optimizer, T_max, eta_min=0.0, last_epoch=-1):
|
|
1340
1349
|
if not isinstance(eta_min, (float, int)):
|
|
1341
1350
|
raise TypeError(f"For 'CosineAnnealingLR', the 'eta_min' must be float or int, but got {type(eta_min)}.")
|
|
@@ -55,36 +55,39 @@ class RAdam(Optimizer):
|
|
|
55
55
|
Implements RAdam algorithm.
|
|
56
56
|
|
|
57
57
|
.. math::
|
|
58
|
-
\begin{
|
|
59
|
-
&\rule{110mm}{0.4pt}
|
|
60
|
-
&\textbf{
|
|
61
|
-
\text{ (
|
|
62
|
-
\
|
|
63
|
-
|
|
64
|
-
&\textbf{
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
&\
|
|
71
|
-
&\
|
|
72
|
-
&\
|
|
73
|
-
&\
|
|
74
|
-
&\
|
|
75
|
-
&\
|
|
76
|
-
|
|
77
|
-
&\
|
|
78
|
-
&\
|
|
79
|
-
&\
|
|
80
|
-
|
|
81
|
-
&\
|
|
82
|
-
&\
|
|
83
|
-
|
|
84
|
-
&\
|
|
85
|
-
&\
|
|
86
|
-
&\
|
|
87
|
-
|
|
58
|
+
\begin{align*}
|
|
59
|
+
&\rule{110mm}{0.4pt} \\
|
|
60
|
+
&\textbf{Input}:
|
|
61
|
+
\gamma \text{ (lr)}, \: \beta_1, \beta_2 \text{ (betas)}, \: \theta_0 \text{ (params)}, \:f(\theta)
|
|
62
|
+
\text{ (objective)}, \:
|
|
63
|
+
\lambda \text{ (weightdecay)}, \: \epsilon \text{ (epsilon)} \\
|
|
64
|
+
&\textbf{Initialize}:
|
|
65
|
+
\begin{cases}
|
|
66
|
+
m_0 \leftarrow 0 \text{ (first moment)} \\
|
|
67
|
+
v_0 \leftarrow 0 \text{ (second moment)} \\
|
|
68
|
+
\rho_{\infty} \xleftarrow{\text{def}} \dfrac{2}{1 - \beta_2} - 1
|
|
69
|
+
\end{cases} \\
|
|
70
|
+
&\rule{110mm}{0.4pt} \\
|
|
71
|
+
&\textbf{For } t = 1 \text{ to } \ldots \text{ do}: \\
|
|
72
|
+
&\quad g_t \leftarrow \nabla_{\theta} f_t(\theta_{t - 1}) \\
|
|
73
|
+
&\quad \text{If } \lambda \neq 0: \\
|
|
74
|
+
&\quad\quad g_t \leftarrow g_t + \lambda \theta_{t - 1} \\
|
|
75
|
+
&\quad m_t \leftarrow \beta_1 m_{t - 1} + (1 - \beta_1) g_t \\
|
|
76
|
+
&\quad v_t \leftarrow \beta_2 v_{t - 1} + (1 - \beta_2) g_t^2 \\
|
|
77
|
+
&\quad \widehat{m_t} \leftarrow \dfrac{m_t}{1 - \beta_1^t} \\
|
|
78
|
+
&\quad \text{Let } \rho_t' = 2 t \beta_2^t /(1 - \beta_2^t) \quad \text{(auxiliary variable)} \\
|
|
79
|
+
&\quad \rho_t \leftarrow \rho_{\infty} - \rho_t' \\
|
|
80
|
+
&\quad \text{If } \rho_t > 5: \\
|
|
81
|
+
&\quad\quad l_t \leftarrow \dfrac{\sqrt{1 - \beta_2^t}}{\sqrt{v_t} + \epsilon} \\
|
|
82
|
+
&\quad\quad r_t \leftarrow \sqrt{\dfrac{(\rho_t - 4)(\rho_t - 2)\rho_{\infty}}{(\rho_{\infty} - 4)
|
|
83
|
+
(\rho_{\infty} - 2) \rho_t}} \\
|
|
84
|
+
&\quad\quad \theta_t \leftarrow \theta_{t - 1} - \gamma \widehat{m_t} r_t l_t \\
|
|
85
|
+
&\quad \text{Else}: \\
|
|
86
|
+
&\quad\quad \theta_t \leftarrow \theta_{t - 1} - \gamma \widehat{m_t} \\
|
|
87
|
+
&\rule{110mm}{0.4pt} \\
|
|
88
|
+
&\bf{Return}: \theta_t \\
|
|
89
|
+
&\rule{110mm}{0.4pt}
|
|
90
|
+
\end{align*}
|
|
88
91
|
|
|
89
92
|
.. warning::
|
|
90
93
|
This is an experimental optimizer API that is subject to change.
|
mindspore/hal/device.py
CHANGED
|
@@ -122,6 +122,9 @@ def is_initialized(device_target):
|
|
|
122
122
|
"""
|
|
123
123
|
Returns whether specified backend is initialized.
|
|
124
124
|
|
|
125
|
+
Note:
|
|
126
|
+
- The api will be deprecated.
|
|
127
|
+
|
|
125
128
|
Note:
|
|
126
129
|
MindSpore's backends "CPU", "GPU" and "Ascend" will be initialized in the following scenarios:
|
|
127
130
|
|
|
@@ -162,6 +165,12 @@ def is_available(device_target):
|
|
|
162
165
|
Returns whether specified backend is available.
|
|
163
166
|
All dependent libraries should be successfully loaded if this backend is available.
|
|
164
167
|
|
|
168
|
+
Note:
|
|
169
|
+
- The api will be deprecated.
|
|
170
|
+
- CPU hardware, please use the interface :func:`mindspore.device_context.cpu.is_available`.
|
|
171
|
+
- GPU hardware, please use the interface :func:`mindspore.device_context.gpu.is_available`.
|
|
172
|
+
- Ascend hardware, please use the interface :func:`mindspore.device_context.ascend.is_available`.
|
|
173
|
+
|
|
165
174
|
Args:
|
|
166
175
|
device_target (str): The device name of backend, should be one of "CPU", "GPU" and "Ascend".
|
|
167
176
|
|
|
@@ -194,6 +203,13 @@ def device_count(device_target=None):
|
|
|
194
203
|
"""
|
|
195
204
|
Returns device count of specified backend.
|
|
196
205
|
|
|
206
|
+
Note:
|
|
207
|
+
- The api will be deprecated.
|
|
208
|
+
- CPU hardware, please use the interface :func:`mindspore.device_context.cpu.device_count`.
|
|
209
|
+
- GPU hardware, please use the interface :func:`mindspore.device_context.gpu.device_count`.
|
|
210
|
+
- Ascend hardware, please use the interface :func:`mindspore.device_context.ascend.device_count`.
|
|
211
|
+
|
|
212
|
+
|
|
197
213
|
Note:
|
|
198
214
|
If `device_target` is not specified, get the device count of the current backend set by context.
|
|
199
215
|
For CPU backend, this method always returns 1.
|
|
@@ -221,6 +237,9 @@ def get_device_capability(device_id, device_target=None):
|
|
|
221
237
|
"""
|
|
222
238
|
Get specified device's capability.
|
|
223
239
|
|
|
240
|
+
Note:
|
|
241
|
+
- The api will be deprecated.
|
|
242
|
+
|
|
224
243
|
Note:
|
|
225
244
|
If `device_target` is not specified, get the device capability of the current backend set by context.
|
|
226
245
|
|
|
@@ -253,6 +272,9 @@ def get_device_properties(device_id, device_target=None):
|
|
|
253
272
|
"""
|
|
254
273
|
Get specified device's properties.
|
|
255
274
|
|
|
275
|
+
Note:
|
|
276
|
+
- The api will be deprecated.
|
|
277
|
+
|
|
256
278
|
Note:
|
|
257
279
|
If `device_target` is not specified, get the device properties of the current backend set by context.
|
|
258
280
|
For Ascend, backend must be initialized before calling this method,
|
|
@@ -308,6 +330,9 @@ def get_device_name(device_id, device_target=None):
|
|
|
308
330
|
"""
|
|
309
331
|
Get specified device's name.
|
|
310
332
|
|
|
333
|
+
Note:
|
|
334
|
+
- The api will be deprecated.
|
|
335
|
+
|
|
311
336
|
Note:
|
|
312
337
|
If `device_target` is not specified, get the device name of the current backend set by context.
|
|
313
338
|
This method always returns "CPU" for CPU backend.
|
|
@@ -335,6 +360,9 @@ def get_arch_list(device_target=None):
|
|
|
335
360
|
"""
|
|
336
361
|
Get the architecture list this MindSpore was compiled for.
|
|
337
362
|
|
|
363
|
+
Note:
|
|
364
|
+
- The api will be deprecated.
|
|
365
|
+
|
|
338
366
|
Note:
|
|
339
367
|
If `device_target` is not specified, get the device name of the current backend set by context.
|
|
340
368
|
|
mindspore/hal/event.py
CHANGED
|
@@ -17,6 +17,9 @@ from mindspore._c_expression import Event as Event_
|
|
|
17
17
|
from mindspore._c_expression import Stream as Stream_
|
|
18
18
|
from mindspore._c_expression import current_stream as current_stream_
|
|
19
19
|
from mindspore import _checkparam as Validator
|
|
20
|
+
from mindspore import log as logger
|
|
21
|
+
|
|
22
|
+
function_event_status = {'Event': False, 'wait': False}
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
class Event(Event_):
|
|
@@ -28,6 +31,9 @@ class Event(Event_):
|
|
|
28
31
|
|
|
29
32
|
The underlying device events are lazily initialized when the event is first recorded.
|
|
30
33
|
|
|
34
|
+
Note:
|
|
35
|
+
- The api will be deprecated, please use the api :func:`mindspore.runtime.Event` instead.
|
|
36
|
+
|
|
31
37
|
Args:
|
|
32
38
|
enable_timing (bool, optional): indicates if the event should measure time (default: ``False``)
|
|
33
39
|
blocking (bool, optional): if ``True``, `wait` will be blocking (default: ``False``)
|
|
@@ -58,7 +64,13 @@ class Event(Event_):
|
|
|
58
64
|
[5. 5.]]
|
|
59
65
|
>>> elapsed_time = start.elapsed_time(end)
|
|
60
66
|
"""
|
|
67
|
+
|
|
61
68
|
def __init__(self, enable_timing=False, blocking=False):
|
|
69
|
+
if not function_event_status['Event']:
|
|
70
|
+
function_event_status['Event'] = True
|
|
71
|
+
logger.warning(
|
|
72
|
+
"WARN_DEPRECATED: The usage of mindspore.hal.Event(enable_timing=True) is deprecated."
|
|
73
|
+
" Please use mindspore.runtime.Event(enable_timing=True)")
|
|
62
74
|
# pylint: disable=useless-super-delegation
|
|
63
75
|
Validator.check_bool(enable_timing, "enable_timing", "Event")
|
|
64
76
|
Validator.check_bool(blocking, "blocking", "Event")
|
|
@@ -118,6 +130,11 @@ class Event(Event_):
|
|
|
118
130
|
[[4. 4.]
|
|
119
131
|
[4. 4.]]
|
|
120
132
|
"""
|
|
133
|
+
if not function_event_status['wait']:
|
|
134
|
+
function_event_status['wait'] = True
|
|
135
|
+
logger.warning(
|
|
136
|
+
"WARN_DEPRECATED: The usage of mindspore.hal.Event() is deprecated."
|
|
137
|
+
" Please use mindspore.runtime.Event()")
|
|
121
138
|
if stream is None:
|
|
122
139
|
stream = current_stream_()
|
|
123
140
|
if not isinstance(stream, Stream_):
|