mindspore 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mindspore might be problematic. Click here for more details.
- mindspore/.commit_id +1 -1
- mindspore/__init__.py +5 -2
- mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
- mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
- mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
- mindspore/_checkparam.py +2 -2
- mindspore/_extends/builtin_operations.py +3 -3
- mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
- mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
- mindspore/_extends/parse/__init__.py +3 -3
- mindspore/_extends/parse/compile_config.py +24 -1
- mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
- mindspore/_extends/parse/parser.py +28 -22
- mindspore/_extends/parse/resources.py +1 -1
- mindspore/_extends/parse/standard_method.py +23 -2
- mindspore/_extends/parse/trope.py +2 -1
- mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
- mindspore/amp.py +0 -18
- mindspore/avcodec-59.dll +0 -0
- mindspore/avdevice-59.dll +0 -0
- mindspore/avfilter-8.dll +0 -0
- mindspore/avformat-59.dll +0 -0
- mindspore/avutil-57.dll +0 -0
- mindspore/boost/base.py +29 -2
- mindspore/common/__init__.py +18 -12
- mindspore/common/_decorator.py +3 -2
- mindspore/common/_grad_function.py +3 -1
- mindspore/common/_tensor_cpp_method.py +1 -1
- mindspore/common/_tensor_docs.py +371 -96
- mindspore/common/_utils.py +7 -43
- mindspore/common/api.py +434 -135
- mindspore/common/dtype.py +98 -57
- mindspore/common/dump.py +7 -108
- mindspore/common/dynamic_shape/__init__.py +0 -0
- mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
- mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
- mindspore/common/file_system.py +59 -9
- mindspore/common/hook_handle.py +82 -3
- mindspore/common/jit_config.py +5 -1
- mindspore/common/jit_trace.py +27 -12
- mindspore/common/lazy_inline.py +5 -3
- mindspore/common/np_dtype.py +3 -3
- mindspore/common/parameter.py +17 -127
- mindspore/common/recompute.py +4 -13
- mindspore/common/tensor.py +50 -217
- mindspore/communication/_comm_helper.py +11 -1
- mindspore/communication/comm_func.py +138 -4
- mindspore/communication/management.py +85 -1
- mindspore/config/op_info.config +0 -15
- mindspore/context.py +20 -106
- mindspore/dataset/__init__.py +1 -1
- mindspore/dataset/audio/transforms.py +1 -1
- mindspore/dataset/core/config.py +35 -1
- mindspore/dataset/engine/datasets.py +338 -319
- mindspore/dataset/engine/datasets_user_defined.py +38 -22
- mindspore/dataset/engine/datasets_vision.py +1 -1
- mindspore/dataset/engine/validators.py +1 -15
- mindspore/dataset/transforms/c_transforms.py +2 -2
- mindspore/dataset/transforms/transforms.py +3 -3
- mindspore/dataset/vision/__init__.py +1 -1
- mindspore/dataset/vision/py_transforms.py +8 -8
- mindspore/dataset/vision/transforms.py +17 -5
- mindspore/dataset/vision/utils.py +632 -21
- mindspore/device_context/ascend/op_tuning.py +35 -1
- mindspore/dnnl.dll +0 -0
- mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
- mindspore/graph/custom_pass.py +55 -0
- mindspore/include/api/cell.h +28 -4
- mindspore/include/api/cfg.h +24 -7
- mindspore/include/api/context.h +1 -0
- mindspore/include/api/delegate.h +0 -2
- mindspore/include/api/dual_abi_helper.h +100 -19
- mindspore/include/api/graph.h +14 -1
- mindspore/include/api/kernel.h +16 -3
- mindspore/include/api/kernel_api.h +9 -1
- mindspore/include/api/metrics/accuracy.h +9 -0
- mindspore/include/api/model.h +5 -1
- mindspore/include/api/model_group.h +4 -0
- mindspore/include/api/model_parallel_runner.h +2 -0
- mindspore/include/api/status.h +48 -10
- mindspore/include/api/types.h +6 -1
- mindspore/include/dataset/constants.h +9 -0
- mindspore/include/dataset/execute.h +2 -2
- mindspore/jpeg62.dll +0 -0
- mindspore/mindrecord/__init__.py +3 -3
- mindspore/mindrecord/common/exceptions.py +1 -0
- mindspore/mindrecord/config.py +1 -1
- mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
- mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
- mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
- mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
- mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
- mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
- mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
- mindspore/mindrecord/filereader.py +4 -4
- mindspore/mindrecord/filewriter.py +5 -5
- mindspore/mindrecord/mindpage.py +2 -2
- mindspore/mindrecord/tools/cifar10.py +4 -3
- mindspore/mindrecord/tools/cifar100.py +1 -1
- mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
- mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
- mindspore/mindrecord/tools/csv_to_mr.py +1 -1
- mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
- mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
- mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
- mindspore/mindspore_backend_common.dll +0 -0
- mindspore/mindspore_backend_manager.dll +0 -0
- mindspore/mindspore_cluster.dll +0 -0
- mindspore/mindspore_common.dll +0 -0
- mindspore/mindspore_core.dll +0 -0
- mindspore/mindspore_cpu.dll +0 -0
- mindspore/mindspore_dump.dll +0 -0
- mindspore/mindspore_frontend.dll +0 -0
- mindspore/mindspore_glog.dll +0 -0
- mindspore/mindspore_hardware_abstract.dll +0 -0
- mindspore/mindspore_memory_pool.dll +0 -0
- mindspore/mindspore_ms_backend.dll +0 -0
- mindspore/mindspore_ops.dll +0 -0
- mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
- mindspore/mindspore_profiler.dll +0 -0
- mindspore/mindspore_pyboost.dll +0 -0
- mindspore/mindspore_pynative.dll +0 -0
- mindspore/mindspore_runtime_pipeline.dll +0 -0
- mindspore/mindspore_runtime_utils.dll +0 -0
- mindspore/mindspore_tools.dll +0 -0
- mindspore/mint/__init__.py +15 -10
- mindspore/mint/distributed/__init__.py +4 -0
- mindspore/mint/distributed/distributed.py +392 -69
- mindspore/mint/nn/__init__.py +2 -16
- mindspore/mint/nn/functional.py +4 -110
- mindspore/mint/nn/layer/__init__.py +0 -2
- mindspore/mint/nn/layer/_functions.py +1 -2
- mindspore/mint/nn/layer/activation.py +0 -6
- mindspore/mint/nn/layer/basic.py +0 -47
- mindspore/mint/nn/layer/conv.py +10 -10
- mindspore/mint/nn/layer/normalization.py +11 -16
- mindspore/mint/nn/layer/pooling.py +0 -4
- mindspore/nn/__init__.py +1 -3
- mindspore/nn/cell.py +231 -239
- mindspore/nn/layer/activation.py +4 -2
- mindspore/nn/layer/basic.py +56 -14
- mindspore/nn/layer/container.py +16 -0
- mindspore/nn/layer/embedding.py +4 -169
- mindspore/nn/layer/image.py +1 -1
- mindspore/nn/layer/normalization.py +2 -1
- mindspore/nn/layer/thor_layer.py +4 -85
- mindspore/nn/optim/ada_grad.py +0 -1
- mindspore/nn/optim/adafactor.py +0 -1
- mindspore/nn/optim/adam.py +32 -127
- mindspore/nn/optim/adamax.py +0 -1
- mindspore/nn/optim/asgd.py +0 -1
- mindspore/nn/optim/ftrl.py +8 -102
- mindspore/nn/optim/lamb.py +1 -4
- mindspore/nn/optim/lars.py +0 -3
- mindspore/nn/optim/lazyadam.py +25 -218
- mindspore/nn/optim/momentum.py +5 -43
- mindspore/nn/optim/optimizer.py +6 -55
- mindspore/nn/optim/proximal_ada_grad.py +0 -1
- mindspore/nn/optim/rmsprop.py +0 -1
- mindspore/nn/optim/rprop.py +0 -1
- mindspore/nn/optim/sgd.py +0 -1
- mindspore/nn/optim/tft_wrapper.py +2 -4
- mindspore/nn/optim/thor.py +0 -2
- mindspore/nn/probability/bijector/bijector.py +7 -8
- mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
- mindspore/nn/probability/bijector/power_transform.py +20 -21
- mindspore/nn/probability/bijector/scalar_affine.py +5 -5
- mindspore/nn/probability/bijector/softplus.py +13 -14
- mindspore/nn/probability/distribution/_utils/utils.py +2 -2
- mindspore/nn/wrap/cell_wrapper.py +39 -5
- mindspore/nn/wrap/grad_reducer.py +4 -89
- mindspore/numpy/array_creations.py +4 -4
- mindspore/numpy/fft.py +9 -9
- mindspore/numpy/utils_const.py +1 -1
- mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
- mindspore/onnx/onnx_export.py +137 -0
- mindspore/opencv_core4110.dll +0 -0
- mindspore/opencv_imgcodecs4110.dll +0 -0
- mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
- mindspore/ops/__init__.py +2 -0
- mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
- mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
- mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
- mindspore/ops/_op_impl/cpu/__init__.py +1 -5
- mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
- mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
- mindspore/ops/auto_generate/gen_extend_func.py +6 -11
- mindspore/ops/auto_generate/gen_ops_def.py +385 -154
- mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
- mindspore/ops/communication.py +97 -0
- mindspore/ops/composite/__init__.py +5 -2
- mindspore/ops/composite/base.py +16 -2
- mindspore/ops/composite/multitype_ops/__init__.py +3 -1
- mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
- mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
- mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
- mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
- mindspore/ops/function/__init__.py +2 -0
- mindspore/ops/function/array_func.py +24 -18
- mindspore/ops/function/comm_func.py +3883 -0
- mindspore/ops/function/debug_func.py +7 -6
- mindspore/ops/function/grad/grad_func.py +4 -12
- mindspore/ops/function/math_func.py +89 -86
- mindspore/ops/function/nn_func.py +92 -313
- mindspore/ops/function/random_func.py +9 -18
- mindspore/ops/functional.py +4 -1
- mindspore/ops/functional_overload.py +377 -30
- mindspore/ops/operations/__init__.py +2 -5
- mindspore/ops/operations/_custom_ops_utils.py +7 -9
- mindspore/ops/operations/_inner_ops.py +12 -50
- mindspore/ops/operations/_rl_inner_ops.py +0 -933
- mindspore/ops/operations/array_ops.py +5 -50
- mindspore/ops/operations/comm_ops.py +95 -17
- mindspore/ops/operations/custom_ops.py +237 -22
- mindspore/ops/operations/debug_ops.py +33 -35
- mindspore/ops/operations/manually_defined/ops_def.py +39 -318
- mindspore/ops/operations/math_ops.py +5 -5
- mindspore/ops/operations/nn_ops.py +3 -3
- mindspore/ops/operations/sparse_ops.py +0 -83
- mindspore/ops/primitive.py +4 -27
- mindspore/ops/tensor_method.py +88 -10
- mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
- mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
- mindspore/ops_generate/api/functions_cc_generator.py +53 -4
- mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
- mindspore/ops_generate/common/gen_constants.py +11 -10
- mindspore/ops_generate/common/op_proto.py +18 -1
- mindspore/ops_generate/common/template.py +102 -245
- mindspore/ops_generate/common/template_utils.py +212 -0
- mindspore/ops_generate/gen_custom_ops.py +69 -0
- mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
- mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
- mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
- mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
- mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
- mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
- mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
- mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
- mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
- mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
- mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
- mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
- mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
- mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
- mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
- mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
- mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
- mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
- mindspore/ops_generate/resources/yaml_loader.py +13 -0
- mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
- mindspore/parallel/_auto_parallel_context.py +5 -15
- mindspore/parallel/_cell_wrapper.py +1 -1
- mindspore/parallel/_parallel_serialization.py +4 -6
- mindspore/parallel/_ps_context.py +2 -2
- mindspore/parallel/_utils.py +34 -17
- mindspore/parallel/auto_parallel.py +23 -9
- mindspore/parallel/checkpoint_transform.py +20 -2
- mindspore/parallel/cluster/process_entity/_api.py +28 -33
- mindspore/parallel/cluster/process_entity/_utils.py +9 -5
- mindspore/parallel/cluster/run.py +5 -3
- mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
- mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
- mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
- mindspore/parallel/function/reshard_func.py +6 -5
- mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
- mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
- mindspore/parallel/shard.py +7 -21
- mindspore/parallel/strategy.py +336 -0
- mindspore/parallel/transform_safetensors.py +127 -20
- mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
- mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
- mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
- mindspore/profiler/common/constant.py +5 -0
- mindspore/profiler/common/file_manager.py +9 -0
- mindspore/profiler/common/msprof_cmd_tool.py +40 -4
- mindspore/profiler/common/path_manager.py +65 -24
- mindspore/profiler/common/profiler_context.py +27 -14
- mindspore/profiler/common/profiler_info.py +3 -3
- mindspore/profiler/common/profiler_meta_data.py +1 -0
- mindspore/profiler/common/profiler_op_analyse.py +10 -6
- mindspore/profiler/common/profiler_path_manager.py +13 -0
- mindspore/profiler/common/util.py +30 -3
- mindspore/profiler/dynamic_profiler.py +91 -46
- mindspore/profiler/envprofiler.py +30 -5
- mindspore/profiler/experimental_config.py +18 -2
- mindspore/profiler/platform/cpu_profiler.py +10 -4
- mindspore/profiler/platform/npu_profiler.py +34 -7
- mindspore/profiler/profiler.py +193 -145
- mindspore/profiler/profiler_action_controller.py +1 -1
- mindspore/profiler/profiler_interface.py +2 -2
- mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
- mindspore/run_check/_check_version.py +108 -24
- mindspore/runtime/__init__.py +9 -6
- mindspore/runtime/executor.py +35 -0
- mindspore/runtime/memory.py +113 -0
- mindspore/runtime/thread_bind_core.py +1 -1
- mindspore/swresample-4.dll +0 -0
- mindspore/swscale-6.dll +0 -0
- mindspore/tinyxml2.dll +0 -0
- mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
- mindspore/tools/data_dump.py +130 -0
- mindspore/tools/sdc_detect.py +91 -0
- mindspore/tools/stress_detect.py +63 -0
- mindspore/train/__init__.py +6 -6
- mindspore/train/_utils.py +8 -21
- mindspore/train/amp.py +6 -7
- mindspore/train/callback/_callback.py +2 -1
- mindspore/train/callback/_checkpoint.py +1 -17
- mindspore/train/callback/_flops_collector.py +10 -6
- mindspore/train/callback/_train_fault_tolerance.py +72 -25
- mindspore/train/data_sink.py +5 -9
- mindspore/train/dataset_helper.py +5 -5
- mindspore/train/model.py +41 -230
- mindspore/train/serialization.py +160 -401
- mindspore/train/train_thor/model_thor.py +2 -2
- mindspore/turbojpeg.dll +0 -0
- mindspore/utils/__init__.py +6 -3
- mindspore/utils/dlpack.py +92 -0
- mindspore/utils/dryrun.py +1 -1
- mindspore/utils/runtime_execution_order_check.py +10 -0
- mindspore/utils/sdc_detect.py +14 -12
- mindspore/utils/stress_detect.py +43 -0
- mindspore/utils/utils.py +152 -16
- mindspore/version.py +1 -1
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
- mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
- mindspore/communication/_hccl_management.py +0 -297
- mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
- mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
- mindspore/experimental/llm_boost/atb/__init__.py +0 -23
- mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
- mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
- mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
- mindspore/experimental/llm_boost/register.py +0 -130
- mindspore/experimental/llm_boost/utils.py +0 -31
- mindspore/include/OWNERS +0 -7
- mindspore/mindspore_cpu_res_manager.dll +0 -0
- mindspore/mindspore_ops_kernel_common.dll +0 -0
- mindspore/mindspore_res_manager.dll +0 -0
- mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
- mindspore/nn/reinforcement/_batch_read_write.py +0 -142
- mindspore/nn/reinforcement/_tensors_queue.py +0 -152
- mindspore/nn/reinforcement/tensor_array.py +0 -145
- mindspore/opencv_core452.dll +0 -0
- mindspore/opencv_imgcodecs452.dll +0 -0
- mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
- mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
- mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
- mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
- mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
- mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
- mindspore/ops/operations/_tensor_array.py +0 -359
- mindspore/ops/operations/rl_ops.py +0 -288
- mindspore/parallel/_offload_context.py +0 -275
- mindspore/parallel/_recovery_context.py +0 -115
- mindspore/parallel/_transformer/__init__.py +0 -35
- mindspore/parallel/_transformer/layers.py +0 -765
- mindspore/parallel/_transformer/loss.py +0 -251
- mindspore/parallel/_transformer/moe.py +0 -693
- mindspore/parallel/_transformer/op_parallel_config.py +0 -222
- mindspore/parallel/_transformer/transformer.py +0 -3124
- mindspore/parallel/mpi/_mpi_config.py +0 -116
- mindspore/profiler/common/validator/validate_path.py +0 -84
- mindspore/train/memory_profiling_pb2.py +0 -298
- mindspore/utils/hooks.py +0 -81
- /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
- {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
|
@@ -1,385 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""boost base class"""
|
|
16
|
-
from enum import Enum
|
|
17
|
-
import numpy as np
|
|
18
|
-
import mindspore as ms
|
|
19
|
-
from mindspore import ops, Tensor
|
|
20
|
-
from mindspore import log as logger
|
|
21
|
-
import mindspore.common.dtype as mstype
|
|
22
|
-
from mindspore._c_expression import _set_format
|
|
23
|
-
from mindspore.common.parameter import Parameter
|
|
24
|
-
from mindspore.experimental.llm_boost.utils import get_real_rank, get_real_group_size
|
|
25
|
-
from mindspore.common.initializer import Zero
|
|
26
|
-
|
|
27
|
-
FORMAT_NZ = "FRACTAL_NZ"
|
|
28
|
-
BUILDIN_BACKEND_NAME = "ATB"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class PositionEmbeddingType(int, Enum):
|
|
32
|
-
ROPE = 0
|
|
33
|
-
ALIBI = 1
|
|
34
|
-
ABSOLUTE = 2
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class NormType(int, Enum):
|
|
38
|
-
RMS_NORM = 0
|
|
39
|
-
LAYER_NORM = 1
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class AttentionMask:
|
|
43
|
-
"""attention mask"""
|
|
44
|
-
|
|
45
|
-
@classmethod
|
|
46
|
-
def static(cls, max_seq_len, dtype=mstype.float16, need_nz=False):
|
|
47
|
-
"""cache mask"""
|
|
48
|
-
bias_cache = Tensor(
|
|
49
|
-
np.tril(np.ones((max_seq_len, max_seq_len), dtype=np.bool_))
|
|
50
|
-
).reshape(max_seq_len, max_seq_len)
|
|
51
|
-
bias_cache = ~bias_cache
|
|
52
|
-
if dtype == mstype.float16:
|
|
53
|
-
mask_value = Tensor(np.finfo(np.float32).min, mstype.float16)
|
|
54
|
-
else:
|
|
55
|
-
mask_value = Tensor(1)
|
|
56
|
-
attn_mask = ops.masked_fill(
|
|
57
|
-
Tensor(np.zeros((max_seq_len, max_seq_len)), dtype=mstype.float16),
|
|
58
|
-
bias_cache,
|
|
59
|
-
mask_value,
|
|
60
|
-
)
|
|
61
|
-
if need_nz:
|
|
62
|
-
# ND -> NZ
|
|
63
|
-
attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len))
|
|
64
|
-
attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len // 16, 16))
|
|
65
|
-
attn_mask = ops.transpose(attn_mask, (0, 2, 1, 3)).contiguous()
|
|
66
|
-
attn_mask = _set_format(attn_mask, FORMAT_NZ)
|
|
67
|
-
return attn_mask
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class AtbBoostBase:
|
|
71
|
-
"""atb boost base class"""
|
|
72
|
-
|
|
73
|
-
def __init__(self, config):
|
|
74
|
-
super().__init__()
|
|
75
|
-
self.backend_name = BUILDIN_BACKEND_NAME
|
|
76
|
-
self.is_first_iteration = False
|
|
77
|
-
self.config = config
|
|
78
|
-
self.dtype = config.compute_dtype
|
|
79
|
-
self.num_heads = config.num_heads
|
|
80
|
-
self.num_kv_heads = config.n_kv_heads if config.n_kv_heads else self.num_heads
|
|
81
|
-
self.num_layers = config.num_layers
|
|
82
|
-
self.n_kv_heads = config.n_kv_heads if config.n_kv_heads else config.num_heads
|
|
83
|
-
self.head_dim = config.hidden_size // self.num_heads
|
|
84
|
-
self.need_nz = False
|
|
85
|
-
if hasattr(config, "need_nz"):
|
|
86
|
-
self.need_nz = config.need_nz
|
|
87
|
-
self.placeholder = Tensor(np.zeros(1), dtype=self.dtype)
|
|
88
|
-
self.lm_head_indices_fake = Tensor([0], dtype=mstype.int64)
|
|
89
|
-
self.position_embedding_type = PositionEmbeddingType.ROPE
|
|
90
|
-
self.add_norm_enable = True
|
|
91
|
-
self.max_decode_length = self.config.max_decode_length
|
|
92
|
-
self.max_base_len = 128
|
|
93
|
-
self.attn_mask = AttentionMask.static(
|
|
94
|
-
self.max_base_len, dtype=self.dtype, need_nz=self.need_nz
|
|
95
|
-
)
|
|
96
|
-
|
|
97
|
-
self.cast = ops.Cast()
|
|
98
|
-
self.reshape = ops.Reshape()
|
|
99
|
-
self.kv_quant = None
|
|
100
|
-
self.rank_id = get_real_rank()
|
|
101
|
-
self.device_num = get_real_group_size()
|
|
102
|
-
self.ascend_weight = []
|
|
103
|
-
self.k_caches = []
|
|
104
|
-
self.v_caches = []
|
|
105
|
-
|
|
106
|
-
def _convert_tensor_format_and_dtype(self, tensor, dtype=mstype.float16):
|
|
107
|
-
tensor = self.cast(tensor, dtype=dtype)
|
|
108
|
-
if self.need_nz:
|
|
109
|
-
tensor = _set_format(tensor, FORMAT_NZ)
|
|
110
|
-
return tensor
|
|
111
|
-
|
|
112
|
-
def _convert_qkv_concat_weight(self, param_dict):
|
|
113
|
-
"""convert qkv concat weight"""
|
|
114
|
-
for i in range(self.num_layers):
|
|
115
|
-
# qkv weight concat
|
|
116
|
-
wq_weight_name = f"model.layers.{i}.attention.wq.weight"
|
|
117
|
-
wk_weight_name = f"model.layers.{i}.attention.wk.weight"
|
|
118
|
-
wv_weight_name = f"model.layers.{i}.attention.wv.weight"
|
|
119
|
-
qkv_concat_weight_name = f"model.layers.{i}.attention.w_qkv.weight"
|
|
120
|
-
if wq_weight_name not in param_dict:
|
|
121
|
-
break
|
|
122
|
-
wq_weight = param_dict[wq_weight_name].asnumpy()
|
|
123
|
-
wk_weight = param_dict[wk_weight_name].asnumpy()
|
|
124
|
-
wv_weight = param_dict[wv_weight_name].asnumpy()
|
|
125
|
-
qkv_weight = np.concatenate((wq_weight, wk_weight, wv_weight), 0)
|
|
126
|
-
param_dict[qkv_concat_weight_name] = Parameter(
|
|
127
|
-
qkv_weight, name=qkv_concat_weight_name
|
|
128
|
-
)
|
|
129
|
-
|
|
130
|
-
# gate hidden weight concat
|
|
131
|
-
ffn_gate_weight_name = f"model.layers.{i}.feed_forward.w1.weight"
|
|
132
|
-
ffn_hidden_weight_name = f"model.layers.{i}.feed_forward.w3.weight"
|
|
133
|
-
gate_hidden_concat_weight_name = (
|
|
134
|
-
f"model.layers.{i}.feed_forward.w_gate_hidden.weight"
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
ffn_gate_weight = param_dict[ffn_gate_weight_name].asnumpy()
|
|
138
|
-
ffn_hidden_weight = param_dict[ffn_hidden_weight_name].asnumpy()
|
|
139
|
-
gate_hidden_weight = np.concatenate((ffn_gate_weight, ffn_hidden_weight), 0)
|
|
140
|
-
param_dict[gate_hidden_concat_weight_name] = Parameter(
|
|
141
|
-
gate_hidden_weight, name=gate_hidden_concat_weight_name
|
|
142
|
-
)
|
|
143
|
-
|
|
144
|
-
param_dict.pop(wq_weight_name)
|
|
145
|
-
param_dict.pop(wk_weight_name)
|
|
146
|
-
param_dict.pop(wv_weight_name)
|
|
147
|
-
param_dict.pop(ffn_gate_weight_name)
|
|
148
|
-
param_dict.pop(ffn_hidden_weight_name)
|
|
149
|
-
logger.info(f"transform: {qkv_concat_weight_name}")
|
|
150
|
-
logger.info(f"transform: {gate_hidden_concat_weight_name}")
|
|
151
|
-
|
|
152
|
-
for i in range(self.num_layers):
|
|
153
|
-
# qkv bias concat
|
|
154
|
-
wq_bias_name = f"model.layers.{i}.attention.wq.bias"
|
|
155
|
-
wk_bias_name = f"model.layers.{i}.attention.wk.bias"
|
|
156
|
-
wv_bias_name = f"model.layers.{i}.attention.wv.bias"
|
|
157
|
-
qkv_concat_bias_name = f"model.layers.{i}.attention.w_qkv.bias"
|
|
158
|
-
if wq_bias_name not in param_dict:
|
|
159
|
-
break
|
|
160
|
-
|
|
161
|
-
wq_bias_weight = param_dict[wq_bias_name].asnumpy()
|
|
162
|
-
wk_bias_weight = param_dict[wk_bias_name].asnumpy()
|
|
163
|
-
wv_bias_weight = param_dict[wv_bias_name].asnumpy()
|
|
164
|
-
qkv_bias_weight = np.concatenate(
|
|
165
|
-
(wq_bias_weight, wk_bias_weight, wv_bias_weight), 0
|
|
166
|
-
)
|
|
167
|
-
param_dict[qkv_concat_bias_name] = Parameter(
|
|
168
|
-
qkv_bias_weight, name=qkv_concat_bias_name
|
|
169
|
-
)
|
|
170
|
-
|
|
171
|
-
param_dict.pop(wq_bias_name)
|
|
172
|
-
param_dict.pop(wk_bias_name)
|
|
173
|
-
param_dict.pop(wv_bias_name)
|
|
174
|
-
logger.info(f"transform: {qkv_concat_bias_name}")
|
|
175
|
-
return param_dict
|
|
176
|
-
|
|
177
|
-
def set_weights(self, parm_dict, dtype=mstype.float16):
|
|
178
|
-
"""set weights for llm boost"""
|
|
179
|
-
self._convert_qkv_concat_weight(parm_dict)
|
|
180
|
-
embedding_weight_name = "model.tok_embeddings.embedding_weight"
|
|
181
|
-
attention_norm_name = "attention_norm"
|
|
182
|
-
qkv_name = "attention.w_qkv"
|
|
183
|
-
o_name = "attention.wo"
|
|
184
|
-
mlp_norm_name = "ffn_norm"
|
|
185
|
-
mlp_gate_name = "feed_forward.w_gate_hidden"
|
|
186
|
-
mlp_down_name = "feed_forward.w2"
|
|
187
|
-
norm_out_name = "model.norm_out"
|
|
188
|
-
lm_head_name = "lm_head"
|
|
189
|
-
placeholder = Parameter(Tensor(np.zeros(1), dtype=dtype))
|
|
190
|
-
|
|
191
|
-
ascend_weight = []
|
|
192
|
-
ascend_weight.append(self.cast(parm_dict[embedding_weight_name], dtype))
|
|
193
|
-
for i in range(self.num_layers):
|
|
194
|
-
ascend_weight.append(
|
|
195
|
-
self._convert_tensor_format_and_dtype(
|
|
196
|
-
parm_dict[f"model.layers.{i}.{attention_norm_name}.weight"], dtype
|
|
197
|
-
)
|
|
198
|
-
)
|
|
199
|
-
ascend_weight.extend([placeholder] * 3)
|
|
200
|
-
|
|
201
|
-
ascend_weight.append(
|
|
202
|
-
self._convert_tensor_format_and_dtype(
|
|
203
|
-
parm_dict[f"model.layers.{i}.{qkv_name}.weight"], dtype
|
|
204
|
-
)
|
|
205
|
-
)
|
|
206
|
-
ascend_weight.append(
|
|
207
|
-
self._convert_tensor_format_and_dtype(
|
|
208
|
-
parm_dict.get(f"model.layers.{i}.{qkv_name}.bias", placeholder),
|
|
209
|
-
dtype,
|
|
210
|
-
)
|
|
211
|
-
)
|
|
212
|
-
ascend_weight.extend([placeholder] * 16)
|
|
213
|
-
|
|
214
|
-
ascend_weight.append(
|
|
215
|
-
self._convert_tensor_format_and_dtype(
|
|
216
|
-
parm_dict[f"model.layers.{i}.{o_name}.weight"], dtype
|
|
217
|
-
)
|
|
218
|
-
)
|
|
219
|
-
ascend_weight.append(
|
|
220
|
-
self._convert_tensor_format_and_dtype(
|
|
221
|
-
parm_dict.get(f"model.layers.{i}.{o_name}.bias", placeholder), dtype
|
|
222
|
-
)
|
|
223
|
-
)
|
|
224
|
-
ascend_weight.extend([placeholder] * 4)
|
|
225
|
-
|
|
226
|
-
ascend_weight.append(
|
|
227
|
-
self._convert_tensor_format_and_dtype(
|
|
228
|
-
parm_dict[f"model.layers.{i}.{mlp_norm_name}.weight"], dtype
|
|
229
|
-
)
|
|
230
|
-
)
|
|
231
|
-
ascend_weight.extend([placeholder] * 3)
|
|
232
|
-
|
|
233
|
-
ascend_weight.append(
|
|
234
|
-
self._convert_tensor_format_and_dtype(
|
|
235
|
-
parm_dict[f"model.layers.{i}.{mlp_gate_name}.weight"], dtype
|
|
236
|
-
)
|
|
237
|
-
)
|
|
238
|
-
ascend_weight.append(
|
|
239
|
-
self._convert_tensor_format_and_dtype(
|
|
240
|
-
parm_dict.get(
|
|
241
|
-
f"model.layers.{i}.{mlp_gate_name}.bias", placeholder
|
|
242
|
-
),
|
|
243
|
-
dtype,
|
|
244
|
-
)
|
|
245
|
-
)
|
|
246
|
-
ascend_weight.extend([placeholder] * 10)
|
|
247
|
-
|
|
248
|
-
ascend_weight.append(
|
|
249
|
-
self._convert_tensor_format_and_dtype(
|
|
250
|
-
parm_dict[f"model.layers.{i}.{mlp_down_name}.weight"], dtype
|
|
251
|
-
)
|
|
252
|
-
)
|
|
253
|
-
ascend_weight.append(
|
|
254
|
-
self._convert_tensor_format_and_dtype(
|
|
255
|
-
parm_dict.get(
|
|
256
|
-
f"model.layers.{i}.{mlp_down_name}.bias", placeholder
|
|
257
|
-
),
|
|
258
|
-
dtype,
|
|
259
|
-
)
|
|
260
|
-
)
|
|
261
|
-
ascend_weight.extend([placeholder] * 4)
|
|
262
|
-
|
|
263
|
-
ascend_weight.append(
|
|
264
|
-
self._convert_tensor_format_and_dtype(
|
|
265
|
-
parm_dict[f"{norm_out_name}.weight"], dtype
|
|
266
|
-
)
|
|
267
|
-
)
|
|
268
|
-
ascend_weight.append(
|
|
269
|
-
self._convert_tensor_format_and_dtype(
|
|
270
|
-
parm_dict[f"{lm_head_name}.weight"], dtype
|
|
271
|
-
)
|
|
272
|
-
)
|
|
273
|
-
self.ascend_weight = ascend_weight
|
|
274
|
-
self.atb_encoder_operation.set_weights(ascend_weight)
|
|
275
|
-
self.atb_decoder_operation.set_weights(ascend_weight)
|
|
276
|
-
|
|
277
|
-
def set_kvcache(self, k_caches=None, v_caches=None):
|
|
278
|
-
"""set kv_cache for llm boost"""
|
|
279
|
-
if not k_caches or v_caches:
|
|
280
|
-
if self.need_nz:
|
|
281
|
-
kv_shape = (
|
|
282
|
-
self.config.num_blocks,
|
|
283
|
-
self.num_kv_heads * self.head_dim // self.device_num // 16,
|
|
284
|
-
self.config.block_size,
|
|
285
|
-
16,
|
|
286
|
-
)
|
|
287
|
-
k_caches = [
|
|
288
|
-
_set_format(
|
|
289
|
-
Parameter(
|
|
290
|
-
Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
|
|
291
|
-
),
|
|
292
|
-
FORMAT_NZ,
|
|
293
|
-
)
|
|
294
|
-
for _ in range(self.num_layers)
|
|
295
|
-
]
|
|
296
|
-
v_caches = [
|
|
297
|
-
_set_format(
|
|
298
|
-
Parameter(
|
|
299
|
-
Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
|
|
300
|
-
),
|
|
301
|
-
FORMAT_NZ,
|
|
302
|
-
)
|
|
303
|
-
for _ in range(self.num_layers)
|
|
304
|
-
]
|
|
305
|
-
else:
|
|
306
|
-
kv_shape = (
|
|
307
|
-
self.config.num_blocks,
|
|
308
|
-
self.config.block_size,
|
|
309
|
-
self.num_kv_heads // self.device_num,
|
|
310
|
-
self.head_dim,
|
|
311
|
-
)
|
|
312
|
-
k_caches = [
|
|
313
|
-
Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
|
|
314
|
-
for _ in range(self.num_layers)
|
|
315
|
-
]
|
|
316
|
-
v_caches = [
|
|
317
|
-
Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
|
|
318
|
-
for _ in range(self.num_layers)
|
|
319
|
-
]
|
|
320
|
-
self.k_caches = k_caches
|
|
321
|
-
self.v_caches = v_caches
|
|
322
|
-
self.atb_encoder_operation.set_kvcache(k_caches, v_caches)
|
|
323
|
-
self.atb_decoder_operation.set_kvcache(k_caches, v_caches)
|
|
324
|
-
|
|
325
|
-
def add_flags(self, is_first_iteration):
|
|
326
|
-
"""add_flags."""
|
|
327
|
-
self.is_first_iteration = is_first_iteration
|
|
328
|
-
|
|
329
|
-
def _execute_operator(self, acl_inputs, acl_param):
|
|
330
|
-
"""execute operator."""
|
|
331
|
-
if self.is_first_iteration:
|
|
332
|
-
acl_model_out = self.atb_encoder_operation.forward(acl_inputs, acl_param)
|
|
333
|
-
else:
|
|
334
|
-
acl_model_out = self.atb_decoder_operation.forward(acl_inputs, acl_param)
|
|
335
|
-
acl_hidden_state = acl_model_out[0]
|
|
336
|
-
return acl_hidden_state
|
|
337
|
-
|
|
338
|
-
def forward(self, boost_inputs):
|
|
339
|
-
r"""
|
|
340
|
-
LlmBoost forward.
|
|
341
|
-
"""
|
|
342
|
-
input_ids = boost_inputs.get("input_ids", None)
|
|
343
|
-
position_ids = boost_inputs.get("position_ids", None)
|
|
344
|
-
cos_embed = boost_inputs.get("cos_embed", None)
|
|
345
|
-
sin_embed = boost_inputs.get("sin_embed", None)
|
|
346
|
-
block_tables = boost_inputs.get("block_tables", None)
|
|
347
|
-
slot_mapping = boost_inputs.get("slot_mapping", None)
|
|
348
|
-
batch_valid_length = boost_inputs.get("batch_valid_length", None)
|
|
349
|
-
lm_head_indices = boost_inputs.get("lm_head_indices", None)
|
|
350
|
-
seqLen = boost_inputs.get("seq_lens", None)
|
|
351
|
-
input_ids = self.reshape(input_ids, (-1,))
|
|
352
|
-
if self.is_first_iteration:
|
|
353
|
-
attention_mask = self.attn_mask
|
|
354
|
-
else:
|
|
355
|
-
if position_ids is None:
|
|
356
|
-
position_ids = batch_valid_length - 1
|
|
357
|
-
attention_mask = self.placeholder
|
|
358
|
-
lm_head_indices = self.lm_head_indices_fake
|
|
359
|
-
|
|
360
|
-
if input_ids is not None and input_ids.dtype != mstype.int64:
|
|
361
|
-
input_ids = self.cast(input_ids, mstype.int64)
|
|
362
|
-
if position_ids is not None and position_ids.dtype != mstype.int64:
|
|
363
|
-
position_ids = self.cast(position_ids, mstype.int64)
|
|
364
|
-
if batch_valid_length is not None and batch_valid_length.dtype != mstype.int32:
|
|
365
|
-
batch_valid_length = self.cast(batch_valid_length, mstype.int32)
|
|
366
|
-
if lm_head_indices is not None and lm_head_indices.dtype != mstype.int64:
|
|
367
|
-
lm_head_indices = self.cast(lm_head_indices, mstype.int64)
|
|
368
|
-
|
|
369
|
-
acl_inputs, acl_param = self._prepare_inputs(
|
|
370
|
-
prefill=self.is_first_iteration,
|
|
371
|
-
input_ids=input_ids,
|
|
372
|
-
position_ids=position_ids,
|
|
373
|
-
cos_embed=cos_embed,
|
|
374
|
-
sin_embed=sin_embed,
|
|
375
|
-
attention_mask=attention_mask,
|
|
376
|
-
block_tables=block_tables,
|
|
377
|
-
slots=slot_mapping,
|
|
378
|
-
input_lengths=batch_valid_length,
|
|
379
|
-
lm_head_indices=lm_head_indices,
|
|
380
|
-
seqLen=seqLen,
|
|
381
|
-
)
|
|
382
|
-
ms.hal.synchronize()
|
|
383
|
-
logits = self._execute_operator(acl_inputs, acl_param)
|
|
384
|
-
logits = self.cast(logits, mstype.float32)
|
|
385
|
-
return logits
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""llm boost"""
|
|
16
|
-
import json
|
|
17
|
-
import mindspore.common.dtype as mstype
|
|
18
|
-
from mindspore.experimental.llm_boost.atb.boost_base import (
|
|
19
|
-
AtbBoostBase,
|
|
20
|
-
PositionEmbeddingType,
|
|
21
|
-
NormType,
|
|
22
|
-
)
|
|
23
|
-
from mindspore._c_expression import LlmBoostBinder
|
|
24
|
-
from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
|
|
25
|
-
|
|
26
|
-
CPP_LLAMA_MODEL_CLASS_NAME = "llama_LlamaDecoderModel"
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@LlmBoostRegister.register(LlmBoostType.BUILDIN, "Llama")
|
|
30
|
-
class LlamaBoost(AtbBoostBase):
|
|
31
|
-
"""LlamaBoost class"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, config):
|
|
34
|
-
super().__init__(config)
|
|
35
|
-
self.in_tensor_length = 13
|
|
36
|
-
self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
|
|
37
|
-
self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
|
|
38
|
-
self.atb_encoder_operation = LlmBoostBinder(
|
|
39
|
-
self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
|
|
40
|
-
)
|
|
41
|
-
self.atb_decoder_operation = LlmBoostBinder(
|
|
42
|
-
self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
def init(self):
|
|
46
|
-
"""
|
|
47
|
-
Initialize the object
|
|
48
|
-
returns True if object needs input manipulation by mindformers
|
|
49
|
-
"""
|
|
50
|
-
|
|
51
|
-
coder_param = {
|
|
52
|
-
"normEps": self.config.rms_norm_eps,
|
|
53
|
-
"normType": NormType.RMS_NORM,
|
|
54
|
-
"numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
|
|
55
|
-
"hiddenSizePerAttentionHead": self.head_dim,
|
|
56
|
-
"numHiddenLayers": self.num_layers,
|
|
57
|
-
"numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
|
|
58
|
-
"skipWordEmbedding": False,
|
|
59
|
-
"isFA": False,
|
|
60
|
-
"isBF16": self.dtype == mstype.bfloat16,
|
|
61
|
-
"packQuantType": [[1, 1] for _ in range(self.num_layers)],
|
|
62
|
-
"linearQuantType": [
|
|
63
|
-
[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
|
|
64
|
-
],
|
|
65
|
-
"linearTransposeType": [
|
|
66
|
-
[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
|
|
67
|
-
],
|
|
68
|
-
"isEmbeddingParallel": False,
|
|
69
|
-
"isLmHeadParallel": not self.config.parallel_config.vocab_emb_dp,
|
|
70
|
-
"lmHeadTransposeType": 1,
|
|
71
|
-
"enableSwiGLU": True,
|
|
72
|
-
"enablekvQuant": self.kv_quant is not None,
|
|
73
|
-
"rank": self.rank_id,
|
|
74
|
-
"worldSize": self.device_num,
|
|
75
|
-
"backend": self.config.communication_backend,
|
|
76
|
-
"rankTableFile": "",
|
|
77
|
-
"positionEmbeddingType": PositionEmbeddingType.ROPE,
|
|
78
|
-
"hiddenSize": self.config.hidden_size,
|
|
79
|
-
"gemma": False,
|
|
80
|
-
"enableAddNorm": False,
|
|
81
|
-
"enableCompressHead": False,
|
|
82
|
-
"isUnpadInputs": True,
|
|
83
|
-
}
|
|
84
|
-
encoder_param = {
|
|
85
|
-
**coder_param,
|
|
86
|
-
"isPrefill": True,
|
|
87
|
-
"enableLcoc": True,
|
|
88
|
-
"enableSpeculate": False,
|
|
89
|
-
"skipWordEmbedding": False,
|
|
90
|
-
"enableSplitFuse": False,
|
|
91
|
-
}
|
|
92
|
-
decoder_param = {
|
|
93
|
-
**coder_param,
|
|
94
|
-
"isPrefill": False,
|
|
95
|
-
"enableLcoc": False,
|
|
96
|
-
"enableSpeculate": False,
|
|
97
|
-
}
|
|
98
|
-
self.atb_encoder_operation.init(json.dumps({**encoder_param}))
|
|
99
|
-
self.atb_decoder_operation.init(json.dumps({**decoder_param}))
|
|
100
|
-
return True
|
|
101
|
-
|
|
102
|
-
def _prepare_inputs(
|
|
103
|
-
self,
|
|
104
|
-
prefill=None,
|
|
105
|
-
input_ids=None,
|
|
106
|
-
position_ids=None,
|
|
107
|
-
cos_embed=None,
|
|
108
|
-
sin_embed=None,
|
|
109
|
-
attention_mask=None,
|
|
110
|
-
block_tables=None,
|
|
111
|
-
slots=None,
|
|
112
|
-
input_lengths=None,
|
|
113
|
-
lm_head_indices=None,
|
|
114
|
-
seqLen=None,
|
|
115
|
-
**kwargs
|
|
116
|
-
):
|
|
117
|
-
"""prepare inputs"""
|
|
118
|
-
self.acl_param = json.dumps(
|
|
119
|
-
{
|
|
120
|
-
"seqLen": seqLen,
|
|
121
|
-
}
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
self.acl_decoder_operation_inputs[0] = input_ids
|
|
125
|
-
self.acl_decoder_operation_inputs[1] = self.placeholder
|
|
126
|
-
self.acl_decoder_operation_inputs[2] = position_ids
|
|
127
|
-
self.acl_decoder_operation_inputs[3] = cos_embed
|
|
128
|
-
self.acl_decoder_operation_inputs[4] = sin_embed
|
|
129
|
-
self.acl_decoder_operation_inputs[5] = attention_mask
|
|
130
|
-
self.acl_decoder_operation_inputs[6] = block_tables
|
|
131
|
-
self.acl_decoder_operation_inputs[7] = slots
|
|
132
|
-
self.acl_decoder_operation_inputs[8] = self.placeholder
|
|
133
|
-
self.acl_decoder_operation_inputs[9] = self.placeholder
|
|
134
|
-
self.acl_decoder_operation_inputs[10] = self.placeholder
|
|
135
|
-
self.acl_decoder_operation_inputs[11] = input_lengths
|
|
136
|
-
self.acl_decoder_operation_inputs[12] = lm_head_indices
|
|
137
|
-
return self.acl_decoder_operation_inputs, self.acl_param
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
# Copyright 2024 Huawei Technologies Co., Ltd
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ============================================================================
|
|
15
|
-
"""llm boost"""
|
|
16
|
-
import json
|
|
17
|
-
import mindspore.common.dtype as mstype
|
|
18
|
-
from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase, NormType
|
|
19
|
-
from mindspore._c_expression import LlmBoostBinder
|
|
20
|
-
from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
CPP_QWEN_MODEL_CLASS_NAME = "qwen_QwenDecoderModel"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
@LlmBoostRegister.register(LlmBoostType.BUILDIN, "Qwen")
|
|
27
|
-
class QwenBoost(AtbBoostBase):
|
|
28
|
-
"""QwenBoost class"""
|
|
29
|
-
|
|
30
|
-
def __init__(self, config):
|
|
31
|
-
super().__init__(config)
|
|
32
|
-
self.in_tensor_length = 12
|
|
33
|
-
self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
|
|
34
|
-
self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
|
|
35
|
-
self.atb_encoder_operation = LlmBoostBinder(
|
|
36
|
-
self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
|
|
37
|
-
)
|
|
38
|
-
self.atb_decoder_operation = LlmBoostBinder(
|
|
39
|
-
self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def init(self):
|
|
43
|
-
"""set param"""
|
|
44
|
-
param_dict = {
|
|
45
|
-
"isFA": False,
|
|
46
|
-
"isBF16": self.dtype == mstype.bfloat16,
|
|
47
|
-
"withEmbedding": True,
|
|
48
|
-
"isEmbeddingParallel": True,
|
|
49
|
-
"isLmHeadParallel": True,
|
|
50
|
-
"linearTransposeType": [
|
|
51
|
-
[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
|
|
52
|
-
],
|
|
53
|
-
"lmHeadTransposeType": 1,
|
|
54
|
-
"enableSwiGLU": not self.need_nz,
|
|
55
|
-
"normEps": self.config.rms_norm_eps,
|
|
56
|
-
"normType": NormType.RMS_NORM,
|
|
57
|
-
"numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
|
|
58
|
-
"hiddenSizePerAttentionHead": self.head_dim,
|
|
59
|
-
"numHiddenLayers": self.num_layers,
|
|
60
|
-
"numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
|
|
61
|
-
"rank": self.rank_id,
|
|
62
|
-
"worldSize": self.device_num,
|
|
63
|
-
"backend": self.config.communication_backend,
|
|
64
|
-
"packQuantType": [[1, 1] for _ in range(self.num_layers)],
|
|
65
|
-
"linearQuantType": [
|
|
66
|
-
[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
|
|
67
|
-
],
|
|
68
|
-
"linearHasBias": [[True, False, False, False]] * self.num_layers,
|
|
69
|
-
"enableKvQuant": self.kv_quant is not None,
|
|
70
|
-
"enableLora": False,
|
|
71
|
-
"isUnpadInputs": True,
|
|
72
|
-
"enableAddNorm": False,
|
|
73
|
-
}
|
|
74
|
-
encoder_param = {
|
|
75
|
-
**param_dict,
|
|
76
|
-
"isPrefill": True,
|
|
77
|
-
"enableLcoc": False,
|
|
78
|
-
"enableSplitFuse": False,
|
|
79
|
-
}
|
|
80
|
-
decoder_param = {
|
|
81
|
-
**param_dict,
|
|
82
|
-
"isPrefill": False,
|
|
83
|
-
"enableLcoc": False,
|
|
84
|
-
"enableSpeculate": False,
|
|
85
|
-
"enablePrefixCache": False,
|
|
86
|
-
}
|
|
87
|
-
self.atb_encoder_operation.init(json.dumps({**encoder_param}))
|
|
88
|
-
self.atb_decoder_operation.init(json.dumps({**decoder_param}))
|
|
89
|
-
|
|
90
|
-
def _prepare_inputs(
|
|
91
|
-
self,
|
|
92
|
-
prefill=None,
|
|
93
|
-
input_ids=None,
|
|
94
|
-
position_ids=None,
|
|
95
|
-
cos_embed=None,
|
|
96
|
-
sin_embed=None,
|
|
97
|
-
attention_mask=None,
|
|
98
|
-
block_tables=None,
|
|
99
|
-
slots=None,
|
|
100
|
-
input_lengths=None,
|
|
101
|
-
lm_head_indices=None,
|
|
102
|
-
seqLen=None,
|
|
103
|
-
**kwargs
|
|
104
|
-
):
|
|
105
|
-
"""prepare inputs"""
|
|
106
|
-
self.acl_param = json.dumps(
|
|
107
|
-
{
|
|
108
|
-
"seqLen": seqLen,
|
|
109
|
-
}
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
self.acl_decoder_operation_inputs[0] = input_ids
|
|
113
|
-
self.acl_decoder_operation_inputs[1] = position_ids
|
|
114
|
-
self.acl_decoder_operation_inputs[2] = cos_embed
|
|
115
|
-
self.acl_decoder_operation_inputs[3] = sin_embed
|
|
116
|
-
self.acl_decoder_operation_inputs[4] = attention_mask
|
|
117
|
-
self.acl_decoder_operation_inputs[5] = block_tables
|
|
118
|
-
self.acl_decoder_operation_inputs[6] = slots
|
|
119
|
-
self.acl_decoder_operation_inputs[7] = self.placeholder
|
|
120
|
-
self.acl_decoder_operation_inputs[8] = self.placeholder
|
|
121
|
-
self.acl_decoder_operation_inputs[9] = self.placeholder
|
|
122
|
-
self.acl_decoder_operation_inputs[10] = input_lengths
|
|
123
|
-
self.acl_decoder_operation_inputs[11] = lm_head_indices
|
|
124
|
-
return self.acl_decoder_operation_inputs, self.acl_param
|