PyPI - mindspore - Versions diffs - 2.4.1__cp39-cp39-win_amd64.whl → 2.5.0__cp39-cp39-win_amd64.whl - Mend

mindspore 2.4.1__cp39-cp39-win_amd64.whl → 2.5.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mindspore might be problematic. Click here for more details.

Files changed (395) hide show

mindspore/.commit_id +1 -1
mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
mindspore/Newtonsoft.Json.dll +0 -0
mindspore/__init__.py +8 -3
mindspore/_c_dataengine.cp39-win_amd64.pyd +0 -0
mindspore/_c_expression.cp39-win_amd64.pyd +0 -0
mindspore/_c_mindrecord.cp39-win_amd64.pyd +0 -0
mindspore/_checkparam.py +0 -5
mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
mindspore/_extends/parse/compile_config.py +64 -0
mindspore/_extends/parse/deprecated/__init__.py +0 -0
mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +375 -0
mindspore/_extends/parse/parser.py +23 -5
mindspore/_extends/parse/standard_method.py +123 -27
mindspore/_extends/pijit/pijit_func_white_list.py +1 -1
mindspore/amp.py +7 -1
mindspore/atlprov.dll +0 -0
mindspore/avcodec-59.dll +0 -0
mindspore/avdevice-59.dll +0 -0
mindspore/avfilter-8.dll +0 -0
mindspore/avformat-59.dll +0 -0
mindspore/avutil-57.dll +0 -0
mindspore/boost/boost_cell_wrapper.py +136 -41
mindspore/c1.dll +0 -0
mindspore/c1xx.dll +0 -0
mindspore/c2.dll +0 -0
mindspore/common/__init__.py +3 -1
mindspore/common/_register_for_tensor.py +0 -1
mindspore/common/_stub_tensor.py +25 -4
mindspore/common/_tensor_cpp_method.py +17 -0
mindspore/common/_tensor_docs.py +6132 -0
mindspore/common/api.py +99 -25
mindspore/common/dtype.py +34 -34
mindspore/common/dump.py +2 -1
mindspore/common/file_system.py +8 -1
mindspore/common/generator.py +2 -0
mindspore/common/hook_handle.py +3 -1
mindspore/common/initializer.py +3 -4
mindspore/common/lazy_inline.py +8 -2
mindspore/common/mindir_util.py +10 -2
mindspore/common/parameter.py +30 -27
mindspore/common/tensor.py +713 -1337
mindspore/communication/__init__.py +1 -1
mindspore/communication/_comm_helper.py +10 -0
mindspore/communication/comm_func.py +215 -173
mindspore/communication/management.py +23 -20
mindspore/context.py +292 -193
mindspore/dataset/__init__.py +23 -19
mindspore/dataset/callback/ds_callback.py +2 -1
mindspore/dataset/core/config.py +84 -3
mindspore/dataset/engine/cache_admin.py +3 -3
mindspore/dataset/engine/cache_client.py +5 -4
mindspore/dataset/engine/datasets.py +192 -149
mindspore/dataset/engine/datasets_audio.py +14 -0
mindspore/dataset/engine/datasets_standard_format.py +28 -11
mindspore/dataset/engine/datasets_text.py +38 -1
mindspore/dataset/engine/datasets_user_defined.py +125 -65
mindspore/dataset/engine/datasets_vision.py +81 -8
mindspore/dataset/engine/iterators.py +281 -63
mindspore/dataset/engine/obs/util.py +8 -0
mindspore/dataset/engine/queue.py +40 -0
mindspore/dataset/engine/samplers.py +26 -2
mindspore/dataset/engine/serializer_deserializer.py +1 -1
mindspore/dataset/engine/validators.py +43 -11
mindspore/dataset/transforms/py_transforms_util.py +17 -0
mindspore/dataset/transforms/transforms.py +29 -12
mindspore/dataset/vision/validators.py +1 -2
mindspore/device_context/__init__.py +21 -0
mindspore/device_context/ascend/__init__.py +25 -0
mindspore/device_context/ascend/device.py +72 -0
mindspore/device_context/ascend/op_debug.py +94 -0
mindspore/device_context/ascend/op_precision.py +193 -0
mindspore/device_context/ascend/op_tuning.py +127 -0
mindspore/device_context/cpu/__init__.py +25 -0
mindspore/device_context/cpu/device.py +62 -0
mindspore/device_context/cpu/op_tuning.py +43 -0
mindspore/device_context/gpu/__init__.py +21 -0
mindspore/device_context/gpu/device.py +70 -0
mindspore/device_context/gpu/op_precision.py +67 -0
mindspore/device_context/gpu/op_tuning.py +175 -0
mindspore/device_manager.py +134 -0
mindspore/dnnl.dll +0 -0
mindspore/dpcmi.dll +0 -0
mindspore/experimental/llm_boost/__init__.py +3 -2
mindspore/experimental/llm_boost/ascend_native/__init__.py +22 -0
mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +211 -0
mindspore/experimental/llm_boost/ascend_native/llm_boost.py +52 -0
mindspore/experimental/llm_boost/atb/boost_base.py +239 -64
mindspore/experimental/llm_boost/atb/llama_boost.py +52 -30
mindspore/experimental/llm_boost/atb/qwen_boost.py +47 -24
mindspore/experimental/llm_boost/register.py +1 -0
mindspore/experimental/optim/adadelta.py +26 -22
mindspore/experimental/optim/adam.py +3 -0
mindspore/experimental/optim/lr_scheduler.py +33 -24
mindspore/experimental/optim/radam.py +33 -30
mindspore/hal/device.py +28 -0
mindspore/hal/event.py +17 -0
mindspore/hal/memory.py +94 -3
mindspore/hal/stream.py +91 -6
mindspore/include/api/context.h +1 -2
mindspore/include/dataset/constants.h +2 -2
mindspore/jpeg62.dll +0 -0
mindspore/log.py +12 -0
mindspore/mindrecord/__init__.py +1 -1
mindspore/mindrecord/config.py +17 -316
mindspore/mindrecord/filereader.py +1 -9
mindspore/mindrecord/filewriter.py +5 -15
mindspore/mindrecord/mindpage.py +1 -9
mindspore/mindspore_backend.dll +0 -0
mindspore/mindspore_common.dll +0 -0
mindspore/mindspore_core.dll +0 -0
mindspore/mindspore_glog.dll +0 -0
mindspore/mindspore_ops.dll +0 -0
mindspore/mint/__init__.py +824 -218
mindspore/mint/distributed/__init__.py +66 -4
mindspore/mint/distributed/distributed.py +2594 -44
mindspore/mint/linalg/__init__.py +6 -0
mindspore/mint/nn/__init__.py +473 -14
mindspore/mint/nn/functional.py +486 -11
mindspore/mint/nn/layer/__init__.py +17 -4
mindspore/mint/nn/layer/_functions.py +330 -0
mindspore/mint/nn/layer/activation.py +169 -1
mindspore/mint/nn/layer/basic.py +123 -0
mindspore/mint/nn/layer/conv.py +727 -0
mindspore/mint/nn/layer/normalization.py +215 -19
mindspore/mint/nn/layer/padding.py +797 -0
mindspore/mint/nn/layer/pooling.py +170 -0
mindspore/mint/optim/__init__.py +2 -1
mindspore/mint/optim/adam.py +223 -0
mindspore/mint/optim/adamw.py +26 -19
mindspore/mint/special/__init__.py +2 -1
mindspore/msobj140.dll +0 -0
mindspore/mspdb140.dll +0 -0
mindspore/mspdbcore.dll +0 -0
mindspore/mspdbst.dll +0 -0
mindspore/mspft140.dll +0 -0
mindspore/msvcdis140.dll +0 -0
mindspore/msvcp140_1.dll +0 -0
mindspore/msvcp140_2.dll +0 -0
mindspore/msvcp140_atomic_wait.dll +0 -0
mindspore/msvcp140_codecvt_ids.dll +0 -0
mindspore/multiprocessing/__init__.py +5 -0
mindspore/nn/__init__.py +2 -0
mindspore/nn/cell.py +142 -21
mindspore/nn/dynamic_lr.py +2 -1
mindspore/nn/layer/activation.py +6 -6
mindspore/nn/layer/basic.py +35 -25
mindspore/nn/layer/channel_shuffle.py +3 -3
mindspore/nn/layer/conv.py +3 -0
mindspore/nn/layer/embedding.py +3 -3
mindspore/nn/layer/normalization.py +8 -7
mindspore/nn/layer/padding.py +4 -3
mindspore/nn/layer/pooling.py +55 -23
mindspore/nn/layer/rnn_cells.py +1 -1
mindspore/nn/layer/rnns.py +2 -1
mindspore/nn/layer/timedistributed.py +5 -5
mindspore/nn/layer/transformer.py +48 -26
mindspore/nn/learning_rate_schedule.py +5 -3
mindspore/nn/loss/loss.py +31 -36
mindspore/nn/optim/ada_grad.py +1 -0
mindspore/nn/optim/adadelta.py +2 -2
mindspore/nn/optim/adam.py +1 -1
mindspore/nn/optim/lars.py +1 -4
mindspore/nn/optim/optimizer.py +1 -1
mindspore/nn/optim/rprop.py +2 -2
mindspore/nn/optim/thor.py +2 -1
mindspore/nn/utils/__init__.py +22 -0
mindspore/nn/utils/init.py +73 -0
mindspore/nn/wrap/cell_wrapper.py +4 -6
mindspore/nn/wrap/loss_scale.py +3 -4
mindspore/numpy/array_creations.py +60 -62
mindspore/numpy/array_ops.py +148 -143
mindspore/numpy/logic_ops.py +41 -42
mindspore/numpy/math_ops.py +361 -359
mindspore/numpy/utils.py +16 -16
mindspore/numpy/utils_const.py +4 -4
mindspore/opencv_core452.dll +0 -0
mindspore/opencv_imgcodecs452.dll +0 -0
mindspore/opencv_imgproc452.dll +0 -0
mindspore/ops/__init__.py +2 -1
mindspore/ops/_grad_experimental/grad_comm_ops.py +107 -8
mindspore/ops/_grad_experimental/grad_debug_ops.py +6 -1
mindspore/ops/_grad_experimental/grad_inner_ops.py +9 -0
mindspore/ops/_grad_experimental/grad_math_ops.py +2 -1
mindspore/ops/_op_impl/cpu/__init__.py +1 -0
mindspore/ops/_op_impl/cpu/raise_op.py +28 -0
mindspore/ops/_vmap/vmap_array_ops.py +20 -19
mindspore/ops/_vmap/vmap_base.py +0 -2
mindspore/ops/_vmap/vmap_grad_nn_ops.py +19 -13
mindspore/ops/_vmap/vmap_math_ops.py +11 -9
mindspore/ops/_vmap/vmap_nn_ops.py +20 -34
mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +149 -12
mindspore/ops/auto_generate/gen_arg_handler.py +0 -61
mindspore/ops/auto_generate/gen_extend_func.py +554 -60
mindspore/ops/auto_generate/gen_ops_def.py +1621 -115
mindspore/ops/auto_generate/gen_ops_prim.py +8027 -3411
mindspore/ops/auto_generate/pyboost_inner_prim.py +183 -79
mindspore/ops/composite/base.py +1 -1
mindspore/ops/composite/multitype_ops/_compile_utils.py +229 -30
mindspore/ops/composite/multitype_ops/pow_impl.py +0 -29
mindspore/ops/function/__init__.py +12 -0
mindspore/ops/function/array_func.py +561 -159
mindspore/ops/function/clip_func.py +64 -0
mindspore/ops/function/debug_func.py +28 -20
mindspore/ops/function/image_func.py +1 -1
mindspore/ops/function/linalg_func.py +5 -4
mindspore/ops/function/math_func.py +1664 -294
mindspore/ops/function/nn_func.py +988 -317
mindspore/ops/function/parameter_func.py +3 -56
mindspore/ops/function/random_func.py +243 -33
mindspore/ops/function/sparse_unary_func.py +1 -1
mindspore/ops/functional.py +18 -5
mindspore/ops/functional_overload.py +897 -0
mindspore/ops/operations/__init__.py +3 -2
mindspore/ops/operations/_embedding_cache_ops.py +4 -4
mindspore/ops/operations/_grad_ops.py +2 -34
mindspore/ops/operations/_infer_ops.py +2 -1
mindspore/ops/operations/_inner_ops.py +38 -8
mindspore/ops/operations/array_ops.py +45 -303
mindspore/ops/operations/comm_ops.py +23 -17
mindspore/ops/operations/custom_ops.py +7 -49
mindspore/ops/operations/debug_ops.py +42 -47
mindspore/ops/operations/inner_ops.py +6 -4
mindspore/ops/operations/linalg_ops.py +3 -2
mindspore/ops/operations/manually_defined/ops_def.py +185 -104
mindspore/ops/operations/math_ops.py +11 -216
mindspore/ops/operations/nn_ops.py +153 -310
mindspore/ops/primitive.py +23 -21
mindspore/ops/tensor_method.py +1669 -0
mindspore/ops_generate/aclnn_kernel_register_auto_cc_generator.py +110 -0
mindspore/ops_generate/add_tensor_docs_generator.py +54 -0
mindspore/ops_generate/arg_handler.py +0 -61
mindspore/ops_generate/auto_grad_impl_cc_generator.py +135 -0
mindspore/ops_generate/auto_grad_reg_cc_generator.py +93 -0
mindspore/ops_generate/base_generator.py +11 -0
mindspore/ops_generate/cpp_create_prim_instance_helper_generator.py +108 -0
mindspore/ops_generate/functional_map_cpp_generator.py +491 -0
mindspore/ops_generate/functional_overload_py_generator.py +110 -0
mindspore/ops_generate/functions_cc_generator.py +233 -0
mindspore/ops_generate/gen_aclnn_implement.py +110 -114
mindspore/ops_generate/gen_constants.py +157 -3
mindspore/ops_generate/gen_ops.py +245 -990
mindspore/ops_generate/gen_pyboost_func.py +97 -998
mindspore/ops_generate/gen_utils.py +119 -33
mindspore/ops_generate/lite_ops_cpp_generator.py +155 -0
mindspore/ops_generate/op_api_proto.py +206 -0
mindspore/ops_generate/op_def_py_generator.py +131 -0
mindspore/ops_generate/op_prim_py_generator.py +480 -0
mindspore/ops_generate/op_proto.py +373 -108
mindspore/ops_generate/op_template_parser.py +436 -0
mindspore/ops_generate/ops_def_cc_generator.py +288 -0
mindspore/ops_generate/ops_def_h_generator.py +74 -0
mindspore/ops_generate/ops_name_h_generator.py +68 -0
mindspore/ops_generate/ops_primitive_h_generator.py +81 -0
mindspore/ops_generate/pyboost_functions_cpp_generator.py +370 -0
mindspore/ops_generate/pyboost_functions_h_generator.py +68 -0
mindspore/ops_generate/pyboost_functions_py_generator.py +148 -0
mindspore/ops_generate/pyboost_grad_function_cpp_generator.py +154 -0
mindspore/ops_generate/pyboost_inner_prim_generator.py +131 -0
mindspore/ops_generate/pyboost_native_grad_functions_generator.py +268 -0
mindspore/ops_generate/pyboost_op_cpp_code_generator.py +851 -0
mindspore/ops_generate/pyboost_overload_functions_cpp_generator.py +344 -0
mindspore/ops_generate/pyboost_utils.py +92 -33
mindspore/ops_generate/template.py +294 -44
mindspore/ops_generate/tensor_func_reg_cpp_generator.py +422 -0
mindspore/parallel/__init__.py +3 -3
mindspore/parallel/_auto_parallel_context.py +44 -34
mindspore/parallel/_cell_wrapper.py +22 -3
mindspore/parallel/_parallel_serialization.py +13 -2
mindspore/parallel/_utils.py +4 -2
mindspore/parallel/algo_parameter_config.py +1 -1
mindspore/parallel/checkpoint_transform.py +44 -0
mindspore/parallel/cluster/process_entity/_api.py +131 -37
mindspore/parallel/cluster/process_entity/_utils.py +41 -6
mindspore/parallel/cluster/run.py +20 -3
mindspore/parallel/parameter_broadcast.py +1 -1
mindspore/parallel/shard.py +3 -0
mindspore/parallel/transform_safetensors.py +119 -253
mindspore/pgodb140.dll +0 -0
mindspore/pgort140.dll +0 -0
mindspore/profiler/__init__.py +17 -4
mindspore/profiler/analysis/__init__.py +0 -0
mindspore/profiler/analysis/parser/__init__.py +0 -0
mindspore/profiler/analysis/parser/ascend_cann_parser.py +166 -0
mindspore/profiler/analysis/parser/base_parser.py +158 -0
mindspore/profiler/analysis/parser/framework_cann_relation_parser.py +45 -0
mindspore/profiler/analysis/parser/ms_framework_parser.py +142 -0
mindspore/profiler/analysis/parser/ms_minddata_parser.py +145 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +261 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +40 -0
mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +84 -0
mindspore/profiler/analysis/parser/timeline_creator/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_creator/base_timeline_creator.py +44 -0
mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +90 -0
mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +76 -0
mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +103 -0
mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +134 -0
mindspore/profiler/analysis/parser/timeline_event/__init__.py +0 -0
mindspore/profiler/analysis/parser/timeline_event/base_event.py +233 -0
mindspore/profiler/analysis/parser/timeline_event/cpu_op_event.py +47 -0
mindspore/profiler/analysis/parser/timeline_event/flow_event.py +36 -0
mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +260 -0
mindspore/profiler/analysis/parser/timeline_event/msprof_event.py +73 -0
mindspore/profiler/analysis/parser/timeline_event/scope_layer_event.py +53 -0
mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +146 -0
mindspore/profiler/analysis/task_manager.py +131 -0
mindspore/profiler/analysis/time_converter.py +84 -0
mindspore/profiler/analysis/viewer/__init__.py +0 -0
mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +333 -0
mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +87 -0
mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +252 -0
mindspore/profiler/analysis/viewer/ascend_memory_viewer.py +313 -0
mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +322 -0
mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +265 -0
mindspore/profiler/analysis/viewer/ascend_timeline_viewer.py +58 -0
mindspore/profiler/analysis/viewer/base_viewer.py +26 -0
mindspore/profiler/analysis/viewer/ms_dataset_viewer.py +97 -0
mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +581 -0
mindspore/profiler/analysis/work_flow.py +73 -0
mindspore/profiler/common/ascend_msprof_exporter.py +138 -0
mindspore/profiler/common/command_executor.py +90 -0
mindspore/profiler/common/constant.py +174 -3
mindspore/profiler/common/file_manager.py +208 -0
mindspore/profiler/common/log.py +130 -0
mindspore/profiler/common/msprof_cmd_tool.py +202 -0
mindspore/profiler/common/path_manager.py +371 -0
mindspore/profiler/common/process_bar.py +168 -0
mindspore/profiler/common/process_pool.py +9 -3
mindspore/profiler/common/profiler_context.py +476 -0
mindspore/profiler/common/profiler_info.py +304 -0
mindspore/profiler/common/profiler_output_path.py +284 -0
mindspore/profiler/common/profiler_parameters.py +210 -0
mindspore/profiler/common/profiler_path_manager.py +120 -0
mindspore/profiler/common/record_function.py +76 -0
mindspore/profiler/common/tlv_decoder.py +76 -0
mindspore/profiler/common/util.py +75 -2
mindspore/profiler/dynamic_profiler.py +270 -37
mindspore/profiler/envprofiler.py +138 -0
mindspore/profiler/mstx.py +199 -0
mindspore/profiler/platform/__init__.py +21 -0
mindspore/profiler/platform/base_profiler.py +40 -0
mindspore/profiler/platform/cpu_profiler.py +124 -0
mindspore/profiler/platform/gpu_profiler.py +74 -0
mindspore/profiler/platform/npu_profiler.py +309 -0
mindspore/profiler/profiler.py +580 -93
mindspore/profiler/profiler_action_controller.py +187 -0
mindspore/profiler/profiler_interface.py +114 -0
mindspore/profiler/schedule.py +208 -0
mindspore/rewrite/api/symbol_tree.py +1 -2
mindspore/run_check/_check_version.py +18 -13
mindspore/runtime/__init__.py +37 -0
mindspore/runtime/device.py +27 -0
mindspore/runtime/event.py +209 -0
mindspore/runtime/executor.py +148 -0
mindspore/runtime/memory.py +392 -0
mindspore/runtime/stream.py +460 -0
mindspore/runtime/thread_bind_core.py +401 -0
mindspore/swresample-4.dll +0 -0
mindspore/swscale-6.dll +0 -0
mindspore/tbbmalloc.dll +0 -0
mindspore/tinyxml2.dll +0 -0
mindspore/train/__init__.py +2 -2
mindspore/train/_utils.py +53 -18
mindspore/train/amp.py +8 -4
mindspore/train/callback/_checkpoint.py +32 -18
mindspore/train/callback/_early_stop.py +1 -1
mindspore/train/callback/_flops_collector.py +105 -69
mindspore/train/callback/_history.py +1 -1
mindspore/train/callback/_summary_collector.py +44 -6
mindspore/train/callback/_tft_register.py +37 -15
mindspore/train/dataset_helper.py +11 -11
mindspore/train/metrics/precision.py +4 -5
mindspore/train/mind_ir_pb2.py +167 -46
mindspore/train/model.py +13 -14
mindspore/train/serialization.py +461 -72
mindspore/train/summary/summary_record.py +1 -2
mindspore/train/train_thor/model_thor.py +1 -1
mindspore/turbojpeg.dll +0 -0
mindspore/utils/__init__.py +4 -2
mindspore/utils/dryrun.py +138 -0
mindspore/utils/runtime_execution_order_check.py +550 -0
mindspore/vcmeta.dll +0 -0
mindspore/vcruntime140.dll +0 -0
mindspore/vcruntime140_1.dll +0 -0
mindspore/version.py +1 -1
{mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/METADATA +3 -4
{mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/RECORD +391 -265
{mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/entry_points.txt +1 -1
mindspore/common/_tensor_overload.py +0 -139
mindspore/mindspore_np_dtype.dll +0 -0
mindspore/profiler/envprofiling.py +0 -254
mindspore/profiler/profiling.py +0 -1926
{mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/WHEEL +0 -0
{mindspore-2.4.1.dist-info → mindspore-2.5.0.dist-info}/top_level.txt +0 -0

mindspore/experimental/llm_boost/atb/boost_base.py CHANGED Viewed

@@ -13,17 +13,32 @@
 # limitations under the License.
 # ============================================================================
 """boost base class"""
+from enum import Enum
 import numpy as np
 import mindspore as ms
 from mindspore import ops, Tensor
+from mindspore import log as logger
 from mindspore.ops import operations as P
 import mindspore.common.dtype as mstype
 from mindspore._c_expression import _set_format
 from mindspore.common.parameter import Parameter
 from mindspore.experimental.llm_boost.utils import get_real_rank, get_real_group_size
 from mindspore.common.initializer import Zero
+FORMAT_NZ = "FRACTAL_NZ"
+BUILDIN_BACKEND_NAME = "ATB"
+class PositionEmbeddingType(int, Enum):
+    ROPE = 0
+    ALIBI = 1
+    ABSOLUTE = 2
+class NormType(int, Enum):
+    RMS_NORM = 0
+    LAYER_NORM = 1
 class AttentionMask:
     """attention mask"""
@@ -31,30 +46,34 @@ class AttentionMask:
     @classmethod
     def static(cls, max_seq_len, dtype=mstype.float16, need_nz=False):
         """cache mask"""
-        bias_cache = Tensor(np.tril(np.ones((max_seq_len, max_seq_len), dtype=np.bool_))).reshape(max_seq_len,
-                                                                                                  max_seq_len)
+        bias_cache = Tensor(
+            np.tril(np.ones((max_seq_len, max_seq_len), dtype=np.bool_))
+        ).reshape(max_seq_len, max_seq_len)
         bias_cache = ~bias_cache
         if dtype == mstype.float16:
             mask_value = Tensor(np.finfo(np.float32).min, mstype.float16)
         else:
             mask_value = Tensor(1)
-        attn_mask = ops.masked_fill(Tensor(np.zeros(
-            (max_seq_len, max_seq_len)), dtype=mstype.float16), bias_cache, mask_value)
+        attn_mask = ops.masked_fill(
+            Tensor(np.zeros((max_seq_len, max_seq_len)), dtype=mstype.float16),
+            bias_cache,
+            mask_value,
+        )
         if need_nz:
             # ND -> NZ
             attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len))
-            attn_mask = ops.reshape(
-                attn_mask, (1, max_seq_len, max_seq_len // 16, 16))
+            attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len // 16, 16))
             attn_mask = ops.transpose(attn_mask, (0, 2, 1, 3)).contiguous()
-            attn_mask = _set_format(attn_mask, "FRACTAL_NZ")
+            attn_mask = _set_format(attn_mask, FORMAT_NZ)
         return attn_mask
-class AtbBoostBase():
+class AtbBoostBase:
     """atb boost base class"""
     def __init__(self, config):
         super().__init__()
+        self.backend_name = BUILDIN_BACKEND_NAME
         self.is_first_iteration = False
         self.config = config
         self.dtype = config.compute_dtype
@@ -68,27 +87,97 @@ class AtbBoostBase():
             self.need_nz = config.need_nz
         self.placeholder = Tensor(np.zeros(1), dtype=self.dtype)
         self.lm_head_indices_fake = Tensor([0], dtype=mstype.int64)
-        self.position_embedding_type = "ROPE"
+        self.position_embedding_type = PositionEmbeddingType.ROPE
         self.add_norm_enable = True
         self.max_decode_length = self.config.max_decode_length
         self.max_base_len = 128
         self.attn_mask = AttentionMask.static(
-            self.max_base_len, dtype=self.dtype, need_nz=self.need_nz)
+            self.max_base_len, dtype=self.dtype, need_nz=self.need_nz
+        )
         self.cast = P.Cast()
         self.reshape = P.Reshape()
         self.kv_quant = None
         self.rank_id = get_real_rank()
         self.device_num = get_real_group_size()
+        self.ascend_weight = []
+        self.k_caches = []
+        self.v_caches = []
     def _convert_tensor_format_and_dtype(self, tensor, dtype=mstype.float16):
         tensor = self.cast(tensor, dtype=dtype)
         if self.need_nz:
-            tensor = _set_format(tensor, "FRACTAL_NZ")
+            tensor = _set_format(tensor, FORMAT_NZ)
         return tensor
+    def _convert_qkv_concat_weight(self, param_dict):
+        """convert qkv concat weight"""
+        for i in range(self.num_layers):
+            # qkv weight concat
+            wq_weight_name = f"model.layers.{i}.attention.wq.weight"
+            wk_weight_name = f"model.layers.{i}.attention.wk.weight"
+            wv_weight_name = f"model.layers.{i}.attention.wv.weight"
+            qkv_concat_weight_name = f"model.layers.{i}.attention.w_qkv.weight"
+            if wq_weight_name not in param_dict:
+                break
+            wq_weight = param_dict[wq_weight_name].asnumpy()
+            wk_weight = param_dict[wk_weight_name].asnumpy()
+            wv_weight = param_dict[wv_weight_name].asnumpy()
+            qkv_weight = np.concatenate((wq_weight, wk_weight, wv_weight), 0)
+            param_dict[qkv_concat_weight_name] = Parameter(
+                qkv_weight, name=qkv_concat_weight_name
+            )
+            # gate hidden weight concat
+            ffn_gate_weight_name = f"model.layers.{i}.feed_forward.w1.weight"
+            ffn_hidden_weight_name = f"model.layers.{i}.feed_forward.w3.weight"
+            gate_hidden_concat_weight_name = (
+                f"model.layers.{i}.feed_forward.w_gate_hidden.weight"
+            )
+            ffn_gate_weight = param_dict[ffn_gate_weight_name].asnumpy()
+            ffn_hidden_weight = param_dict[ffn_hidden_weight_name].asnumpy()
+            gate_hidden_weight = np.concatenate((ffn_gate_weight, ffn_hidden_weight), 0)
+            param_dict[gate_hidden_concat_weight_name] = Parameter(
+                gate_hidden_weight, name=gate_hidden_concat_weight_name
+            )
+            param_dict.pop(wq_weight_name)
+            param_dict.pop(wk_weight_name)
+            param_dict.pop(wv_weight_name)
+            param_dict.pop(ffn_gate_weight_name)
+            param_dict.pop(ffn_hidden_weight_name)
+            logger.info(f"transform: {qkv_concat_weight_name}")
+            logger.info(f"transform: {gate_hidden_concat_weight_name}")
+        for i in range(self.num_layers):
+            # qkv bias concat
+            wq_bias_name = f"model.layers.{i}.attention.wq.bias"
+            wk_bias_name = f"model.layers.{i}.attention.wk.bias"
+            wv_bias_name = f"model.layers.{i}.attention.wv.bias"
+            qkv_concat_bias_name = f"model.layers.{i}.attention.w_qkv.bias"
+            if wq_bias_name not in param_dict:
+                break
+            wq_bias_weight = param_dict[wq_bias_name].asnumpy()
+            wk_bias_weight = param_dict[wk_bias_name].asnumpy()
+            wv_bias_weight = param_dict[wv_bias_name].asnumpy()
+            qkv_bias_weight = np.concatenate(
+                (wq_bias_weight, wk_bias_weight, wv_bias_weight), 0
+            )
+            param_dict[qkv_concat_bias_name] = Parameter(
+                qkv_bias_weight, name=qkv_concat_bias_name
+            )
+            param_dict.pop(wq_bias_name)
+            param_dict.pop(wk_bias_name)
+            param_dict.pop(wv_bias_name)
+            logger.info(f"transform: {qkv_concat_bias_name}")
+        return param_dict
     def set_weights(self, parm_dict, dtype=mstype.float16):
         """set weights for llm boost"""
+        self._convert_qkv_concat_weight(parm_dict)
         embedding_weight_name = "model.tok_embeddings.embedding_weight"
         attention_norm_name = "attention_norm"
         qkv_name = "attention.w_qkv"
@@ -101,45 +190,88 @@ class AtbBoostBase():
         placeholder = Parameter(Tensor(np.zeros(1), dtype=dtype))
         ascend_weight = []
-        ascend_weight.append(
-            self.cast(parm_dict[embedding_weight_name], dtype))
+        ascend_weight.append(self.cast(parm_dict[embedding_weight_name], dtype))
         for i in range(self.num_layers):
-            ascend_weight.append(self._convert_tensor_format_and_dtype(
-                parm_dict[f"model.layers.{i}.{attention_norm_name}.weight"], dtype))
+            ascend_weight.append(
+                self._convert_tensor_format_and_dtype(
+                    parm_dict[f"model.layers.{i}.{attention_norm_name}.weight"], dtype
+                )
+            )
             ascend_weight.extend([placeholder] * 3)
             ascend_weight.append(
-                self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{qkv_name}.weight"], dtype))
-            ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
-                f"model.layers.{i}.{qkv_name}.bias", placeholder), dtype))
+                self._convert_tensor_format_and_dtype(
+                    parm_dict[f"model.layers.{i}.{qkv_name}.weight"], dtype
+                )
+            )
+            ascend_weight.append(
+                self._convert_tensor_format_and_dtype(
+                    parm_dict.get(f"model.layers.{i}.{qkv_name}.bias", placeholder),
+                    dtype,
+                )
+            )
             ascend_weight.extend([placeholder] * 16)
             ascend_weight.append(
-                self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{o_name}.weight"], dtype))
-            ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
-                f"model.layers.{i}.{o_name}.bias", placeholder), dtype))
+                self._convert_tensor_format_and_dtype(
+                    parm_dict[f"model.layers.{i}.{o_name}.weight"], dtype
+                )
+            )
+            ascend_weight.append(
+                self._convert_tensor_format_and_dtype(
+                    parm_dict.get(f"model.layers.{i}.{o_name}.bias", placeholder), dtype
+                )
+            )
             ascend_weight.extend([placeholder] * 4)
             ascend_weight.append(
-                self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{mlp_norm_name}.weight"], dtype))
+                self._convert_tensor_format_and_dtype(
+                    parm_dict[f"model.layers.{i}.{mlp_norm_name}.weight"], dtype
+                )
+            )
             ascend_weight.extend([placeholder] * 3)
             ascend_weight.append(
-                self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{mlp_gate_name}.weight"], dtype))
-            ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
-                f"model.layers.{i}.{mlp_gate_name}.bias", placeholder), dtype))
+                self._convert_tensor_format_and_dtype(
+                    parm_dict[f"model.layers.{i}.{mlp_gate_name}.weight"], dtype
+                )
+            )
+            ascend_weight.append(
+                self._convert_tensor_format_and_dtype(
+                    parm_dict.get(
+                        f"model.layers.{i}.{mlp_gate_name}.bias", placeholder
+                    ),
+                    dtype,
+                )
+            )
             ascend_weight.extend([placeholder] * 10)
             ascend_weight.append(
-                self._convert_tensor_format_and_dtype(parm_dict[f"model.layers.{i}.{mlp_down_name}.weight"], dtype))
-            ascend_weight.append(self._convert_tensor_format_and_dtype(parm_dict.get(
-                f"model.layers.{i}.{mlp_down_name}.bias", placeholder), dtype))
+                self._convert_tensor_format_and_dtype(
+                    parm_dict[f"model.layers.{i}.{mlp_down_name}.weight"], dtype
+                )
+            )
+            ascend_weight.append(
+                self._convert_tensor_format_and_dtype(
+                    parm_dict.get(
+                        f"model.layers.{i}.{mlp_down_name}.bias", placeholder
+                    ),
+                    dtype,
+                )
+            )
             ascend_weight.extend([placeholder] * 4)
         ascend_weight.append(
-            self._convert_tensor_format_and_dtype(parm_dict[f"{norm_out_name}.weight"], dtype))
+            self._convert_tensor_format_and_dtype(
+                parm_dict[f"{norm_out_name}.weight"], dtype
+            )
+        )
         ascend_weight.append(
-            self._convert_tensor_format_and_dtype(parm_dict[f"{lm_head_name}.weight"], dtype))
+            self._convert_tensor_format_and_dtype(
+                parm_dict[f"{lm_head_name}.weight"], dtype
+            )
+        )
+        self.ascend_weight = ascend_weight
         self.atb_encoder_operation.set_weights(ascend_weight)
         self.atb_decoder_operation.set_weights(ascend_weight)
@@ -147,20 +279,47 @@ class AtbBoostBase():
         """set kv_cache for llm boost"""
         if not k_caches or v_caches:
             if self.need_nz:
-                kv_shape = (self.config.num_blocks, self.num_kv_heads*self.head_dim //
-                            self.device_num // 16, self.config.block_size, 16)
-                k_caches = [_set_format(Parameter(Tensor(
-                    shape=kv_shape, dtype=self.dtype, init=Zero())), "FRACTAL_NZ") for _ in range(self.num_layers)]
-                v_caches = [_set_format(Parameter(Tensor(
-                    shape=kv_shape, dtype=self.dtype, init=Zero())), "FRACTAL_NZ") for _ in range(self.num_layers)]
+                kv_shape = (
+                    self.config.num_blocks,
+                    self.num_kv_heads * self.head_dim // self.device_num // 16,
+                    self.config.block_size,
+                    16,
+                )
+                k_caches = [
+                    _set_format(
+                        Parameter(
+                            Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
+                        ),
+                        FORMAT_NZ,
+                    )
+                    for _ in range(self.num_layers)
+                ]
+                v_caches = [
+                    _set_format(
+                        Parameter(
+                            Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
+                        ),
+                        FORMAT_NZ,
+                    )
+                    for _ in range(self.num_layers)
+                ]
             else:
-                kv_shape = (self.config.num_blocks, self.config.block_size,
-                            self.num_kv_heads // self.device_num, self.head_dim)
-                k_caches = [Parameter(Tensor(
-                    shape=kv_shape, dtype=self.dtype, init=Zero())) for _ in range(self.num_layers)]
-                v_caches = [Parameter(Tensor(
-                    shape=kv_shape, dtype=self.dtype, init=Zero())) for _ in range(self.num_layers)]
+                kv_shape = (
+                    self.config.num_blocks,
+                    self.config.block_size,
+                    self.num_kv_heads // self.device_num,
+                    self.head_dim,
+                )
+                k_caches = [
+                    Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
+                    for _ in range(self.num_layers)
+                ]
+                v_caches = [
+                    Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
+                    for _ in range(self.num_layers)
+                ]
+        self.k_caches = k_caches
+        self.v_caches = v_caches
         self.atb_encoder_operation.set_kvcache(k_caches, v_caches)
         self.atb_decoder_operation.set_kvcache(k_caches, v_caches)
@@ -171,11 +330,9 @@ class AtbBoostBase():
     def _execute_operator(self, acl_inputs, acl_param):
         """execute operator."""
         if self.is_first_iteration:
-            acl_model_out = self.atb_encoder_operation.forward(
-                acl_inputs, acl_param)
+            acl_model_out = self.atb_encoder_operation.forward(acl_inputs, acl_param)
         else:
-            acl_model_out = self.atb_decoder_operation.forward(
-                acl_inputs, acl_param)
+            acl_model_out = self.atb_decoder_operation.forward(acl_inputs, acl_param)
         acl_hidden_state = acl_model_out[0]
         return acl_hidden_state
@@ -183,28 +340,46 @@ class AtbBoostBase():
         r"""
         LlmBoost forward.
         """
-        input_ids = boost_inputs["input_ids"]
-        position_ids = boost_inputs["position_ids"]
-        cos_embed = boost_inputs["cos_embed"]
-        sin_embed = boost_inputs["sin_embed"]
-        block_tables = boost_inputs["block_tables"]
-        slot_mapping = boost_inputs["slot_mapping"]
-        batch_valid_length = boost_inputs["batch_valid_length"]
-        lm_head_indices = boost_inputs["lm_head_indices"]
-        seqLen = boost_inputs["seq_lens"]
+        input_ids = boost_inputs.get("input_ids", None)
+        position_ids = boost_inputs.get("position_ids", None)
+        cos_embed = boost_inputs.get("cos_embed", None)
+        sin_embed = boost_inputs.get("sin_embed", None)
+        block_tables = boost_inputs.get("block_tables", None)
+        slot_mapping = boost_inputs.get("slot_mapping", None)
+        batch_valid_length = boost_inputs.get("batch_valid_length", None)
+        lm_head_indices = boost_inputs.get("lm_head_indices", None)
+        seqLen = boost_inputs.get("seq_lens", None)
+        input_ids = self.reshape(input_ids, (-1,))
         if self.is_first_iteration:
             attention_mask = self.attn_mask
         else:
-            position_ids = batch_valid_length - 1
+            if position_ids is None:
+                position_ids = batch_valid_length - 1
             attention_mask = self.placeholder
             lm_head_indices = self.lm_head_indices_fake
-        acl_inputs, acl_param = self._prepare_inputs(prefill=self.is_first_iteration, input_ids=input_ids,
-                                                     position_ids=position_ids, cos_embed=cos_embed,
-                                                     sin_embed=sin_embed, attention_mask=attention_mask,
-                                                     block_tables=block_tables, slots=slot_mapping,
-                                                     input_lengths=batch_valid_length, lm_head_indices=lm_head_indices,
-                                                     seqLen=seqLen)
+        if input_ids is not None and input_ids.dtype != mstype.int64:
+            input_ids = self.cast(input_ids, mstype.int64)
+        if position_ids is not None and position_ids.dtype != mstype.int64:
+            position_ids = self.cast(position_ids, mstype.int64)
+        if batch_valid_length is not None and batch_valid_length.dtype != mstype.int32:
+            batch_valid_length = self.cast(batch_valid_length, mstype.int32)
+        if lm_head_indices is not None and lm_head_indices.dtype != mstype.int64:
+            lm_head_indices = self.cast(lm_head_indices, mstype.int64)
+        acl_inputs, acl_param = self._prepare_inputs(
+            prefill=self.is_first_iteration,
+            input_ids=input_ids,
+            position_ids=position_ids,
+            cos_embed=cos_embed,
+            sin_embed=sin_embed,
+            attention_mask=attention_mask,
+            block_tables=block_tables,
+            slots=slot_mapping,
+            input_lengths=batch_valid_length,
+            lm_head_indices=lm_head_indices,
+            seqLen=seqLen,
+        )
         ms.hal.synchronize()
         logits = self._execute_operator(acl_inputs, acl_param)
         logits = self.cast(logits, mstype.float32)

mindspore/experimental/llm_boost/atb/llama_boost.py CHANGED Viewed

@@ -15,10 +15,16 @@
 """llm boost"""
 import json
 import mindspore.common.dtype as mstype
-from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
+from mindspore.experimental.llm_boost.atb.boost_base import (
+    AtbBoostBase,
+    PositionEmbeddingType,
+    NormType,
+)
 from mindspore._c_expression import LlmBoostBinder
 from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
+CPP_LLAMA_MODEL_CLASS_NAME = "llama_LlamaDecoderModel"
 @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Llama")
 class LlamaBoost(AtbBoostBase):
@@ -30,14 +36,21 @@ class LlamaBoost(AtbBoostBase):
         self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
         self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
         self.atb_encoder_operation = LlmBoostBinder(
-            "ATB", "llama_parallel_DecoderModel")
+            self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
+        )
         self.atb_decoder_operation = LlmBoostBinder(
-            "ATB", "llama_parallel_DecoderModel")
+            self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
+        )
     def init(self):
-        """set param"""
+        """
+        Initialize the object
+        returns True if object needs input manipulation by mindformers
+        """
         coder_param = {
-            "rmsNormEps": self.config.rms_norm_eps,
+            "normEps": self.config.rms_norm_eps,
+            "normType": NormType.RMS_NORM,
             "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
             "hiddenSizePerAttentionHead": self.head_dim,
             "numHiddenLayers": self.num_layers,
@@ -46,35 +59,45 @@ class LlamaBoost(AtbBoostBase):
             "isFA": False,
             "isBF16": self.dtype == mstype.bfloat16,
             "packQuantType": [[1, 1] for _ in range(self.num_layers)],
-            "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
-            "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
+            "linearQuantType": [
+                [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
+            ],
+            "linearTransposeType": [
+                [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
+            ],
             "isEmbeddingParallel": False,
             "isLmHeadParallel": not self.config.parallel_config.vocab_emb_dp,
             "lmHeadTransposeType": 1,
-            "supportSwiGLU": True,
-            "kvQuant": self.kv_quant is not None,
+            "enableSwiGLU": True,
+            "enablekvQuant": self.kv_quant is not None,
             "rank": self.rank_id,
             "worldSize": self.device_num,
-            "backend": "lccl",
+            "backend": self.config.communication_backend,
             "rankTableFile": "",
-            "positionEmbeddingType": self.position_embedding_type,
+            "positionEmbeddingType": PositionEmbeddingType.ROPE,
             "hiddenSize": self.config.hidden_size,
             "gemma": False,
-            "enableAddNorm": True,
-            "supportCompressHead": False,
+            "enableAddNorm": False,
+            "enableCompressHead": False,
+            "isUnpadInputs": True,
         }
         encoder_param = {
-            **coder_param, "isPrefill": True,
-            "supportLcoc": True,
-            "supportSpeculate": False,
-            "skipWordEmbedding": False
+            **coder_param,
+            "isPrefill": True,
+            "enableLcoc": True,
+            "enableSpeculate": False,
+            "skipWordEmbedding": False,
+            "enableSplitFuse": False,
         }
         decoder_param = {
-            **coder_param, "isPrefill": False, "supportLcoc": False,
-            "supportSpeculate": False
+            **coder_param,
+            "isPrefill": False,
+            "enableLcoc": False,
+            "enableSpeculate": False,
         }
         self.atb_encoder_operation.init(json.dumps({**encoder_param}))
         self.atb_decoder_operation.init(json.dumps({**decoder_param}))
+        return True
     def _prepare_inputs(
             self,
@@ -92,14 +115,15 @@ class LlamaBoost(AtbBoostBase):
             **kwargs
     ):
         """prepare inputs"""
-        self.acl_param = json.dumps({
-            "seqLen": seqLen,
-        })
-        self.acl_decoder_operation_inputs[0] = self.cast(
-            input_ids, mstype.int64)
+        self.acl_param = json.dumps(
+            {
+                "seqLen": seqLen,
+            }
+        )
+        self.acl_decoder_operation_inputs[0] = input_ids
         self.acl_decoder_operation_inputs[1] = self.placeholder
-        self.acl_decoder_operation_inputs[2] = self.cast(
-            position_ids, mstype.int32)
+        self.acl_decoder_operation_inputs[2] = position_ids
         self.acl_decoder_operation_inputs[3] = cos_embed
         self.acl_decoder_operation_inputs[4] = sin_embed
         self.acl_decoder_operation_inputs[5] = attention_mask
@@ -108,8 +132,6 @@ class LlamaBoost(AtbBoostBase):
         self.acl_decoder_operation_inputs[8] = self.placeholder
         self.acl_decoder_operation_inputs[9] = self.placeholder
         self.acl_decoder_operation_inputs[10] = self.placeholder
-        self.acl_decoder_operation_inputs[11] = self.cast(
-            input_lengths, mstype.int32)
-        self.acl_decoder_operation_inputs[12] = self.cast(
-            lm_head_indices, mstype.int64)
+        self.acl_decoder_operation_inputs[11] = input_lengths
+        self.acl_decoder_operation_inputs[12] = lm_head_indices
         return self.acl_decoder_operation_inputs, self.acl_param