mindspore 2.7.0rc1__cp311-cp311-win_amd64.whl → 2.7.1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (370) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/__init__.py +5 -2
  3. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  4. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  5. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  6. mindspore/_checkparam.py +2 -2
  7. mindspore/_extends/builtin_operations.py +3 -3
  8. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  9. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  10. mindspore/_extends/parse/__init__.py +3 -3
  11. mindspore/_extends/parse/compile_config.py +24 -1
  12. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +6 -3
  13. mindspore/_extends/parse/parser.py +28 -22
  14. mindspore/_extends/parse/resources.py +1 -1
  15. mindspore/_extends/parse/standard_method.py +23 -2
  16. mindspore/_extends/parse/trope.py +2 -1
  17. mindspore/_extends/pijit/pijit_func_white_list.py +9 -27
  18. mindspore/amp.py +0 -18
  19. mindspore/avcodec-59.dll +0 -0
  20. mindspore/avdevice-59.dll +0 -0
  21. mindspore/avfilter-8.dll +0 -0
  22. mindspore/avformat-59.dll +0 -0
  23. mindspore/avutil-57.dll +0 -0
  24. mindspore/boost/base.py +29 -2
  25. mindspore/common/__init__.py +18 -12
  26. mindspore/common/_decorator.py +3 -2
  27. mindspore/common/_grad_function.py +3 -1
  28. mindspore/common/_tensor_cpp_method.py +1 -1
  29. mindspore/common/_tensor_docs.py +371 -96
  30. mindspore/common/_utils.py +7 -43
  31. mindspore/common/api.py +434 -135
  32. mindspore/common/dtype.py +98 -57
  33. mindspore/common/dump.py +7 -108
  34. mindspore/common/dynamic_shape/__init__.py +0 -0
  35. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +15 -23
  36. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  37. mindspore/common/file_system.py +59 -9
  38. mindspore/common/hook_handle.py +82 -3
  39. mindspore/common/jit_config.py +5 -1
  40. mindspore/common/jit_trace.py +27 -12
  41. mindspore/common/lazy_inline.py +5 -3
  42. mindspore/common/np_dtype.py +3 -3
  43. mindspore/common/parameter.py +17 -127
  44. mindspore/common/recompute.py +4 -13
  45. mindspore/common/tensor.py +50 -217
  46. mindspore/communication/_comm_helper.py +11 -1
  47. mindspore/communication/comm_func.py +138 -4
  48. mindspore/communication/management.py +85 -1
  49. mindspore/config/op_info.config +0 -15
  50. mindspore/context.py +20 -106
  51. mindspore/dataset/__init__.py +1 -1
  52. mindspore/dataset/audio/transforms.py +1 -1
  53. mindspore/dataset/core/config.py +35 -1
  54. mindspore/dataset/engine/datasets.py +338 -319
  55. mindspore/dataset/engine/datasets_user_defined.py +38 -22
  56. mindspore/dataset/engine/datasets_vision.py +1 -1
  57. mindspore/dataset/engine/validators.py +1 -15
  58. mindspore/dataset/transforms/c_transforms.py +2 -2
  59. mindspore/dataset/transforms/transforms.py +3 -3
  60. mindspore/dataset/vision/__init__.py +1 -1
  61. mindspore/dataset/vision/py_transforms.py +8 -8
  62. mindspore/dataset/vision/transforms.py +17 -5
  63. mindspore/dataset/vision/utils.py +632 -21
  64. mindspore/device_context/ascend/op_tuning.py +35 -1
  65. mindspore/dnnl.dll +0 -0
  66. mindspore/{profiler/common/validator → graph}/__init__.py +9 -1
  67. mindspore/graph/custom_pass.py +55 -0
  68. mindspore/include/api/cell.h +28 -4
  69. mindspore/include/api/cfg.h +24 -7
  70. mindspore/include/api/context.h +1 -0
  71. mindspore/include/api/delegate.h +0 -2
  72. mindspore/include/api/dual_abi_helper.h +100 -19
  73. mindspore/include/api/graph.h +14 -1
  74. mindspore/include/api/kernel.h +16 -3
  75. mindspore/include/api/kernel_api.h +9 -1
  76. mindspore/include/api/metrics/accuracy.h +9 -0
  77. mindspore/include/api/model.h +5 -1
  78. mindspore/include/api/model_group.h +4 -0
  79. mindspore/include/api/model_parallel_runner.h +2 -0
  80. mindspore/include/api/status.h +48 -10
  81. mindspore/include/api/types.h +6 -1
  82. mindspore/include/dataset/constants.h +9 -0
  83. mindspore/include/dataset/execute.h +2 -2
  84. mindspore/jpeg62.dll +0 -0
  85. mindspore/mindrecord/__init__.py +3 -3
  86. mindspore/mindrecord/common/exceptions.py +1 -0
  87. mindspore/mindrecord/config.py +1 -1
  88. mindspore/{parallel/mpi → mindrecord/core}/__init__.py +4 -1
  89. mindspore/mindrecord/{shardheader.py → core/shardheader.py} +2 -1
  90. mindspore/mindrecord/{shardindexgenerator.py → core/shardindexgenerator.py} +1 -1
  91. mindspore/mindrecord/{shardreader.py → core/shardreader.py} +2 -1
  92. mindspore/mindrecord/{shardsegment.py → core/shardsegment.py} +2 -2
  93. mindspore/mindrecord/{shardutils.py → core/shardutils.py} +1 -1
  94. mindspore/mindrecord/{shardwriter.py → core/shardwriter.py} +1 -1
  95. mindspore/mindrecord/filereader.py +4 -4
  96. mindspore/mindrecord/filewriter.py +5 -5
  97. mindspore/mindrecord/mindpage.py +2 -2
  98. mindspore/mindrecord/tools/cifar10.py +4 -3
  99. mindspore/mindrecord/tools/cifar100.py +1 -1
  100. mindspore/mindrecord/tools/cifar100_to_mr.py +1 -1
  101. mindspore/mindrecord/tools/cifar10_to_mr.py +6 -6
  102. mindspore/mindrecord/tools/csv_to_mr.py +1 -1
  103. mindspore/mindrecord/tools/imagenet_to_mr.py +1 -1
  104. mindspore/mindrecord/tools/mnist_to_mr.py +1 -1
  105. mindspore/mindrecord/tools/tfrecord_to_mr.py +1 -1
  106. mindspore/mindspore_backend_common.dll +0 -0
  107. mindspore/mindspore_backend_manager.dll +0 -0
  108. mindspore/mindspore_cluster.dll +0 -0
  109. mindspore/mindspore_common.dll +0 -0
  110. mindspore/mindspore_core.dll +0 -0
  111. mindspore/mindspore_cpu.dll +0 -0
  112. mindspore/mindspore_dump.dll +0 -0
  113. mindspore/mindspore_frontend.dll +0 -0
  114. mindspore/mindspore_glog.dll +0 -0
  115. mindspore/mindspore_hardware_abstract.dll +0 -0
  116. mindspore/mindspore_memory_pool.dll +0 -0
  117. mindspore/mindspore_ms_backend.dll +0 -0
  118. mindspore/mindspore_ops.dll +0 -0
  119. mindspore/{mindspore_ops_host.dll → mindspore_ops_cpu.dll} +0 -0
  120. mindspore/mindspore_profiler.dll +0 -0
  121. mindspore/mindspore_pyboost.dll +0 -0
  122. mindspore/mindspore_pynative.dll +0 -0
  123. mindspore/mindspore_runtime_pipeline.dll +0 -0
  124. mindspore/mindspore_runtime_utils.dll +0 -0
  125. mindspore/mindspore_tools.dll +0 -0
  126. mindspore/mint/__init__.py +15 -10
  127. mindspore/mint/distributed/__init__.py +4 -0
  128. mindspore/mint/distributed/distributed.py +392 -69
  129. mindspore/mint/nn/__init__.py +2 -16
  130. mindspore/mint/nn/functional.py +4 -110
  131. mindspore/mint/nn/layer/__init__.py +0 -2
  132. mindspore/mint/nn/layer/_functions.py +1 -2
  133. mindspore/mint/nn/layer/activation.py +0 -6
  134. mindspore/mint/nn/layer/basic.py +0 -47
  135. mindspore/mint/nn/layer/conv.py +10 -10
  136. mindspore/mint/nn/layer/normalization.py +11 -16
  137. mindspore/mint/nn/layer/pooling.py +0 -4
  138. mindspore/nn/__init__.py +1 -3
  139. mindspore/nn/cell.py +231 -239
  140. mindspore/nn/layer/activation.py +4 -2
  141. mindspore/nn/layer/basic.py +56 -14
  142. mindspore/nn/layer/container.py +16 -0
  143. mindspore/nn/layer/embedding.py +4 -169
  144. mindspore/nn/layer/image.py +1 -1
  145. mindspore/nn/layer/normalization.py +2 -1
  146. mindspore/nn/layer/thor_layer.py +4 -85
  147. mindspore/nn/optim/ada_grad.py +0 -1
  148. mindspore/nn/optim/adafactor.py +0 -1
  149. mindspore/nn/optim/adam.py +32 -127
  150. mindspore/nn/optim/adamax.py +0 -1
  151. mindspore/nn/optim/asgd.py +0 -1
  152. mindspore/nn/optim/ftrl.py +8 -102
  153. mindspore/nn/optim/lamb.py +1 -4
  154. mindspore/nn/optim/lars.py +0 -3
  155. mindspore/nn/optim/lazyadam.py +25 -218
  156. mindspore/nn/optim/momentum.py +5 -43
  157. mindspore/nn/optim/optimizer.py +6 -55
  158. mindspore/nn/optim/proximal_ada_grad.py +0 -1
  159. mindspore/nn/optim/rmsprop.py +0 -1
  160. mindspore/nn/optim/rprop.py +0 -1
  161. mindspore/nn/optim/sgd.py +0 -1
  162. mindspore/nn/optim/tft_wrapper.py +2 -4
  163. mindspore/nn/optim/thor.py +0 -2
  164. mindspore/nn/probability/bijector/bijector.py +7 -8
  165. mindspore/nn/probability/bijector/gumbel_cdf.py +2 -2
  166. mindspore/nn/probability/bijector/power_transform.py +20 -21
  167. mindspore/nn/probability/bijector/scalar_affine.py +5 -5
  168. mindspore/nn/probability/bijector/softplus.py +13 -14
  169. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  170. mindspore/nn/wrap/cell_wrapper.py +39 -5
  171. mindspore/nn/wrap/grad_reducer.py +4 -89
  172. mindspore/numpy/array_creations.py +4 -4
  173. mindspore/numpy/fft.py +9 -9
  174. mindspore/numpy/utils_const.py +1 -1
  175. mindspore/{nn/reinforcement → onnx}/__init__.py +5 -8
  176. mindspore/onnx/onnx_export.py +137 -0
  177. mindspore/opencv_core4110.dll +0 -0
  178. mindspore/opencv_imgcodecs4110.dll +0 -0
  179. mindspore/{opencv_imgproc452.dll → opencv_imgproc4110.dll} +0 -0
  180. mindspore/ops/__init__.py +2 -0
  181. mindspore/ops/_grad_experimental/grad_comm_ops.py +38 -2
  182. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  183. mindspore/ops/_op_impl/aicpu/__init__.py +0 -10
  184. mindspore/ops/_op_impl/cpu/__init__.py +1 -5
  185. mindspore/ops/_op_impl/cpu/{buffer_append.py → joinedstr_op.py} +8 -8
  186. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +28 -24
  187. mindspore/ops/auto_generate/gen_extend_func.py +6 -11
  188. mindspore/ops/auto_generate/gen_ops_def.py +385 -154
  189. mindspore/ops/auto_generate/gen_ops_prim.py +5676 -5167
  190. mindspore/ops/communication.py +97 -0
  191. mindspore/ops/composite/__init__.py +5 -2
  192. mindspore/ops/composite/base.py +16 -2
  193. mindspore/ops/composite/multitype_ops/__init__.py +3 -1
  194. mindspore/ops/composite/multitype_ops/_compile_utils.py +150 -8
  195. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  196. mindspore/ops/composite/multitype_ops/add_impl.py +7 -0
  197. mindspore/ops/composite/multitype_ops/mod_impl.py +27 -0
  198. mindspore/ops/function/__init__.py +2 -0
  199. mindspore/ops/function/array_func.py +24 -18
  200. mindspore/ops/function/comm_func.py +3883 -0
  201. mindspore/ops/function/debug_func.py +7 -6
  202. mindspore/ops/function/grad/grad_func.py +4 -12
  203. mindspore/ops/function/math_func.py +89 -86
  204. mindspore/ops/function/nn_func.py +92 -313
  205. mindspore/ops/function/random_func.py +9 -18
  206. mindspore/ops/functional.py +4 -1
  207. mindspore/ops/functional_overload.py +377 -30
  208. mindspore/ops/operations/__init__.py +2 -5
  209. mindspore/ops/operations/_custom_ops_utils.py +7 -9
  210. mindspore/ops/operations/_inner_ops.py +12 -50
  211. mindspore/ops/operations/_rl_inner_ops.py +0 -933
  212. mindspore/ops/operations/array_ops.py +5 -50
  213. mindspore/ops/operations/comm_ops.py +95 -17
  214. mindspore/ops/operations/custom_ops.py +237 -22
  215. mindspore/ops/operations/debug_ops.py +33 -35
  216. mindspore/ops/operations/manually_defined/ops_def.py +39 -318
  217. mindspore/ops/operations/math_ops.py +5 -5
  218. mindspore/ops/operations/nn_ops.py +3 -3
  219. mindspore/ops/operations/sparse_ops.py +0 -83
  220. mindspore/ops/primitive.py +4 -27
  221. mindspore/ops/tensor_method.py +88 -10
  222. mindspore/ops_generate/aclnn/aclnn_kernel_register_auto_cc_generator.py +5 -5
  223. mindspore/ops_generate/aclnn/gen_aclnn_implement.py +8 -8
  224. mindspore/ops_generate/api/functions_cc_generator.py +53 -4
  225. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +25 -11
  226. mindspore/ops_generate/common/gen_constants.py +11 -10
  227. mindspore/ops_generate/common/op_proto.py +18 -1
  228. mindspore/ops_generate/common/template.py +102 -245
  229. mindspore/ops_generate/common/template_utils.py +212 -0
  230. mindspore/ops_generate/gen_custom_ops.py +69 -0
  231. mindspore/ops_generate/op_def/ops_def_cc_generator.py +78 -7
  232. mindspore/ops_generate/op_def_py/base_op_prim_py_generator.py +360 -0
  233. mindspore/ops_generate/op_def_py/custom_op_prim_py_generator.py +140 -0
  234. mindspore/ops_generate/op_def_py/op_def_py_generator.py +54 -7
  235. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -312
  236. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +74 -17
  237. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +22 -5
  238. mindspore/ops_generate/pyboost/gen_pyboost_func.py +0 -16
  239. mindspore/ops_generate/pyboost/op_template_parser.py +3 -2
  240. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +21 -5
  241. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +2 -2
  242. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +30 -10
  243. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +10 -3
  244. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +1 -1
  245. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +19 -9
  246. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +71 -28
  247. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +10 -9
  248. mindspore/ops_generate/pyboost/pyboost_utils.py +27 -16
  249. mindspore/ops_generate/resources/yaml_loader.py +13 -0
  250. mindspore/ops_generate/tensor_py_cc_generator.py +2 -2
  251. mindspore/parallel/_auto_parallel_context.py +5 -15
  252. mindspore/parallel/_cell_wrapper.py +1 -1
  253. mindspore/parallel/_parallel_serialization.py +4 -6
  254. mindspore/parallel/_ps_context.py +2 -2
  255. mindspore/parallel/_utils.py +34 -17
  256. mindspore/parallel/auto_parallel.py +23 -9
  257. mindspore/parallel/checkpoint_transform.py +20 -2
  258. mindspore/parallel/cluster/process_entity/_api.py +28 -33
  259. mindspore/parallel/cluster/process_entity/_utils.py +9 -5
  260. mindspore/parallel/cluster/run.py +5 -3
  261. mindspore/{experimental/llm_boost/ascend_native → parallel/distributed}/__init__.py +21 -22
  262. mindspore/parallel/distributed/distributed_data_parallel.py +393 -0
  263. mindspore/parallel/distributed/flatten_grad_buffer.py +295 -0
  264. mindspore/parallel/function/reshard_func.py +6 -5
  265. mindspore/parallel/nn/parallel_cell_wrapper.py +40 -3
  266. mindspore/parallel/nn/parallel_grad_reducer.py +0 -8
  267. mindspore/parallel/shard.py +7 -21
  268. mindspore/parallel/strategy.py +336 -0
  269. mindspore/parallel/transform_safetensors.py +127 -20
  270. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +13 -9
  271. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +1 -1
  272. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +1 -1
  273. mindspore/profiler/common/constant.py +5 -0
  274. mindspore/profiler/common/file_manager.py +9 -0
  275. mindspore/profiler/common/msprof_cmd_tool.py +40 -4
  276. mindspore/profiler/common/path_manager.py +65 -24
  277. mindspore/profiler/common/profiler_context.py +27 -14
  278. mindspore/profiler/common/profiler_info.py +3 -3
  279. mindspore/profiler/common/profiler_meta_data.py +1 -0
  280. mindspore/profiler/common/profiler_op_analyse.py +10 -6
  281. mindspore/profiler/common/profiler_path_manager.py +13 -0
  282. mindspore/profiler/common/util.py +30 -3
  283. mindspore/profiler/dynamic_profiler.py +91 -46
  284. mindspore/profiler/envprofiler.py +30 -5
  285. mindspore/profiler/experimental_config.py +18 -2
  286. mindspore/profiler/platform/cpu_profiler.py +10 -4
  287. mindspore/profiler/platform/npu_profiler.py +34 -7
  288. mindspore/profiler/profiler.py +193 -145
  289. mindspore/profiler/profiler_action_controller.py +1 -1
  290. mindspore/profiler/profiler_interface.py +2 -2
  291. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  292. mindspore/run_check/_check_version.py +108 -24
  293. mindspore/runtime/__init__.py +9 -6
  294. mindspore/runtime/executor.py +35 -0
  295. mindspore/runtime/memory.py +113 -0
  296. mindspore/runtime/thread_bind_core.py +1 -1
  297. mindspore/swresample-4.dll +0 -0
  298. mindspore/swscale-6.dll +0 -0
  299. mindspore/tinyxml2.dll +0 -0
  300. mindspore/{experimental/llm_boost → tools}/__init__.py +5 -5
  301. mindspore/tools/data_dump.py +130 -0
  302. mindspore/tools/sdc_detect.py +91 -0
  303. mindspore/tools/stress_detect.py +63 -0
  304. mindspore/train/__init__.py +6 -6
  305. mindspore/train/_utils.py +8 -21
  306. mindspore/train/amp.py +6 -7
  307. mindspore/train/callback/_callback.py +2 -1
  308. mindspore/train/callback/_checkpoint.py +1 -17
  309. mindspore/train/callback/_flops_collector.py +10 -6
  310. mindspore/train/callback/_train_fault_tolerance.py +72 -25
  311. mindspore/train/data_sink.py +5 -9
  312. mindspore/train/dataset_helper.py +5 -5
  313. mindspore/train/model.py +41 -230
  314. mindspore/train/serialization.py +160 -401
  315. mindspore/train/train_thor/model_thor.py +2 -2
  316. mindspore/turbojpeg.dll +0 -0
  317. mindspore/utils/__init__.py +6 -3
  318. mindspore/utils/dlpack.py +92 -0
  319. mindspore/utils/dryrun.py +1 -1
  320. mindspore/utils/runtime_execution_order_check.py +10 -0
  321. mindspore/utils/sdc_detect.py +14 -12
  322. mindspore/utils/stress_detect.py +43 -0
  323. mindspore/utils/utils.py +152 -16
  324. mindspore/version.py +1 -1
  325. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/METADATA +3 -2
  326. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/RECORD +330 -344
  327. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  328. mindspore/communication/_hccl_management.py +0 -297
  329. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +0 -207
  330. mindspore/experimental/llm_boost/ascend_native/llm_boost.py +0 -52
  331. mindspore/experimental/llm_boost/atb/__init__.py +0 -23
  332. mindspore/experimental/llm_boost/atb/boost_base.py +0 -385
  333. mindspore/experimental/llm_boost/atb/llama_boost.py +0 -137
  334. mindspore/experimental/llm_boost/atb/qwen_boost.py +0 -124
  335. mindspore/experimental/llm_boost/register.py +0 -130
  336. mindspore/experimental/llm_boost/utils.py +0 -31
  337. mindspore/include/OWNERS +0 -7
  338. mindspore/mindspore_cpu_res_manager.dll +0 -0
  339. mindspore/mindspore_ops_kernel_common.dll +0 -0
  340. mindspore/mindspore_res_manager.dll +0 -0
  341. mindspore/nn/optim/_dist_optimizer_registry.py +0 -111
  342. mindspore/nn/reinforcement/_batch_read_write.py +0 -142
  343. mindspore/nn/reinforcement/_tensors_queue.py +0 -152
  344. mindspore/nn/reinforcement/tensor_array.py +0 -145
  345. mindspore/opencv_core452.dll +0 -0
  346. mindspore/opencv_imgcodecs452.dll +0 -0
  347. mindspore/ops/_op_impl/aicpu/priority_replay_buffer.py +0 -113
  348. mindspore/ops/_op_impl/aicpu/reservoir_replay_buffer.py +0 -96
  349. mindspore/ops/_op_impl/aicpu/sparse_cross.py +0 -42
  350. mindspore/ops/_op_impl/cpu/buffer_get.py +0 -28
  351. mindspore/ops/_op_impl/cpu/buffer_sample.py +0 -28
  352. mindspore/ops/_op_impl/cpu/priority_replay_buffer.py +0 -42
  353. mindspore/ops/operations/_tensor_array.py +0 -359
  354. mindspore/ops/operations/rl_ops.py +0 -288
  355. mindspore/parallel/_offload_context.py +0 -275
  356. mindspore/parallel/_recovery_context.py +0 -115
  357. mindspore/parallel/_transformer/__init__.py +0 -35
  358. mindspore/parallel/_transformer/layers.py +0 -765
  359. mindspore/parallel/_transformer/loss.py +0 -251
  360. mindspore/parallel/_transformer/moe.py +0 -693
  361. mindspore/parallel/_transformer/op_parallel_config.py +0 -222
  362. mindspore/parallel/_transformer/transformer.py +0 -3124
  363. mindspore/parallel/mpi/_mpi_config.py +0 -116
  364. mindspore/profiler/common/validator/validate_path.py +0 -84
  365. mindspore/train/memory_profiling_pb2.py +0 -298
  366. mindspore/utils/hooks.py +0 -81
  367. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  368. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/WHEEL +0 -0
  369. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/entry_points.txt +0 -0
  370. {mindspore-2.7.0rc1.dist-info → mindspore-2.7.1.dist-info}/top_level.txt +0 -0
@@ -1,385 +0,0 @@
1
- # Copyright 2024 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """boost base class"""
16
- from enum import Enum
17
- import numpy as np
18
- import mindspore as ms
19
- from mindspore import ops, Tensor
20
- from mindspore import log as logger
21
- import mindspore.common.dtype as mstype
22
- from mindspore._c_expression import _set_format
23
- from mindspore.common.parameter import Parameter
24
- from mindspore.experimental.llm_boost.utils import get_real_rank, get_real_group_size
25
- from mindspore.common.initializer import Zero
26
-
27
- FORMAT_NZ = "FRACTAL_NZ"
28
- BUILDIN_BACKEND_NAME = "ATB"
29
-
30
-
31
- class PositionEmbeddingType(int, Enum):
32
- ROPE = 0
33
- ALIBI = 1
34
- ABSOLUTE = 2
35
-
36
-
37
- class NormType(int, Enum):
38
- RMS_NORM = 0
39
- LAYER_NORM = 1
40
-
41
-
42
- class AttentionMask:
43
- """attention mask"""
44
-
45
- @classmethod
46
- def static(cls, max_seq_len, dtype=mstype.float16, need_nz=False):
47
- """cache mask"""
48
- bias_cache = Tensor(
49
- np.tril(np.ones((max_seq_len, max_seq_len), dtype=np.bool_))
50
- ).reshape(max_seq_len, max_seq_len)
51
- bias_cache = ~bias_cache
52
- if dtype == mstype.float16:
53
- mask_value = Tensor(np.finfo(np.float32).min, mstype.float16)
54
- else:
55
- mask_value = Tensor(1)
56
- attn_mask = ops.masked_fill(
57
- Tensor(np.zeros((max_seq_len, max_seq_len)), dtype=mstype.float16),
58
- bias_cache,
59
- mask_value,
60
- )
61
- if need_nz:
62
- # ND -> NZ
63
- attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len))
64
- attn_mask = ops.reshape(attn_mask, (1, max_seq_len, max_seq_len // 16, 16))
65
- attn_mask = ops.transpose(attn_mask, (0, 2, 1, 3)).contiguous()
66
- attn_mask = _set_format(attn_mask, FORMAT_NZ)
67
- return attn_mask
68
-
69
-
70
- class AtbBoostBase:
71
- """atb boost base class"""
72
-
73
- def __init__(self, config):
74
- super().__init__()
75
- self.backend_name = BUILDIN_BACKEND_NAME
76
- self.is_first_iteration = False
77
- self.config = config
78
- self.dtype = config.compute_dtype
79
- self.num_heads = config.num_heads
80
- self.num_kv_heads = config.n_kv_heads if config.n_kv_heads else self.num_heads
81
- self.num_layers = config.num_layers
82
- self.n_kv_heads = config.n_kv_heads if config.n_kv_heads else config.num_heads
83
- self.head_dim = config.hidden_size // self.num_heads
84
- self.need_nz = False
85
- if hasattr(config, "need_nz"):
86
- self.need_nz = config.need_nz
87
- self.placeholder = Tensor(np.zeros(1), dtype=self.dtype)
88
- self.lm_head_indices_fake = Tensor([0], dtype=mstype.int64)
89
- self.position_embedding_type = PositionEmbeddingType.ROPE
90
- self.add_norm_enable = True
91
- self.max_decode_length = self.config.max_decode_length
92
- self.max_base_len = 128
93
- self.attn_mask = AttentionMask.static(
94
- self.max_base_len, dtype=self.dtype, need_nz=self.need_nz
95
- )
96
-
97
- self.cast = ops.Cast()
98
- self.reshape = ops.Reshape()
99
- self.kv_quant = None
100
- self.rank_id = get_real_rank()
101
- self.device_num = get_real_group_size()
102
- self.ascend_weight = []
103
- self.k_caches = []
104
- self.v_caches = []
105
-
106
- def _convert_tensor_format_and_dtype(self, tensor, dtype=mstype.float16):
107
- tensor = self.cast(tensor, dtype=dtype)
108
- if self.need_nz:
109
- tensor = _set_format(tensor, FORMAT_NZ)
110
- return tensor
111
-
112
- def _convert_qkv_concat_weight(self, param_dict):
113
- """convert qkv concat weight"""
114
- for i in range(self.num_layers):
115
- # qkv weight concat
116
- wq_weight_name = f"model.layers.{i}.attention.wq.weight"
117
- wk_weight_name = f"model.layers.{i}.attention.wk.weight"
118
- wv_weight_name = f"model.layers.{i}.attention.wv.weight"
119
- qkv_concat_weight_name = f"model.layers.{i}.attention.w_qkv.weight"
120
- if wq_weight_name not in param_dict:
121
- break
122
- wq_weight = param_dict[wq_weight_name].asnumpy()
123
- wk_weight = param_dict[wk_weight_name].asnumpy()
124
- wv_weight = param_dict[wv_weight_name].asnumpy()
125
- qkv_weight = np.concatenate((wq_weight, wk_weight, wv_weight), 0)
126
- param_dict[qkv_concat_weight_name] = Parameter(
127
- qkv_weight, name=qkv_concat_weight_name
128
- )
129
-
130
- # gate hidden weight concat
131
- ffn_gate_weight_name = f"model.layers.{i}.feed_forward.w1.weight"
132
- ffn_hidden_weight_name = f"model.layers.{i}.feed_forward.w3.weight"
133
- gate_hidden_concat_weight_name = (
134
- f"model.layers.{i}.feed_forward.w_gate_hidden.weight"
135
- )
136
-
137
- ffn_gate_weight = param_dict[ffn_gate_weight_name].asnumpy()
138
- ffn_hidden_weight = param_dict[ffn_hidden_weight_name].asnumpy()
139
- gate_hidden_weight = np.concatenate((ffn_gate_weight, ffn_hidden_weight), 0)
140
- param_dict[gate_hidden_concat_weight_name] = Parameter(
141
- gate_hidden_weight, name=gate_hidden_concat_weight_name
142
- )
143
-
144
- param_dict.pop(wq_weight_name)
145
- param_dict.pop(wk_weight_name)
146
- param_dict.pop(wv_weight_name)
147
- param_dict.pop(ffn_gate_weight_name)
148
- param_dict.pop(ffn_hidden_weight_name)
149
- logger.info(f"transform: {qkv_concat_weight_name}")
150
- logger.info(f"transform: {gate_hidden_concat_weight_name}")
151
-
152
- for i in range(self.num_layers):
153
- # qkv bias concat
154
- wq_bias_name = f"model.layers.{i}.attention.wq.bias"
155
- wk_bias_name = f"model.layers.{i}.attention.wk.bias"
156
- wv_bias_name = f"model.layers.{i}.attention.wv.bias"
157
- qkv_concat_bias_name = f"model.layers.{i}.attention.w_qkv.bias"
158
- if wq_bias_name not in param_dict:
159
- break
160
-
161
- wq_bias_weight = param_dict[wq_bias_name].asnumpy()
162
- wk_bias_weight = param_dict[wk_bias_name].asnumpy()
163
- wv_bias_weight = param_dict[wv_bias_name].asnumpy()
164
- qkv_bias_weight = np.concatenate(
165
- (wq_bias_weight, wk_bias_weight, wv_bias_weight), 0
166
- )
167
- param_dict[qkv_concat_bias_name] = Parameter(
168
- qkv_bias_weight, name=qkv_concat_bias_name
169
- )
170
-
171
- param_dict.pop(wq_bias_name)
172
- param_dict.pop(wk_bias_name)
173
- param_dict.pop(wv_bias_name)
174
- logger.info(f"transform: {qkv_concat_bias_name}")
175
- return param_dict
176
-
177
- def set_weights(self, parm_dict, dtype=mstype.float16):
178
- """set weights for llm boost"""
179
- self._convert_qkv_concat_weight(parm_dict)
180
- embedding_weight_name = "model.tok_embeddings.embedding_weight"
181
- attention_norm_name = "attention_norm"
182
- qkv_name = "attention.w_qkv"
183
- o_name = "attention.wo"
184
- mlp_norm_name = "ffn_norm"
185
- mlp_gate_name = "feed_forward.w_gate_hidden"
186
- mlp_down_name = "feed_forward.w2"
187
- norm_out_name = "model.norm_out"
188
- lm_head_name = "lm_head"
189
- placeholder = Parameter(Tensor(np.zeros(1), dtype=dtype))
190
-
191
- ascend_weight = []
192
- ascend_weight.append(self.cast(parm_dict[embedding_weight_name], dtype))
193
- for i in range(self.num_layers):
194
- ascend_weight.append(
195
- self._convert_tensor_format_and_dtype(
196
- parm_dict[f"model.layers.{i}.{attention_norm_name}.weight"], dtype
197
- )
198
- )
199
- ascend_weight.extend([placeholder] * 3)
200
-
201
- ascend_weight.append(
202
- self._convert_tensor_format_and_dtype(
203
- parm_dict[f"model.layers.{i}.{qkv_name}.weight"], dtype
204
- )
205
- )
206
- ascend_weight.append(
207
- self._convert_tensor_format_and_dtype(
208
- parm_dict.get(f"model.layers.{i}.{qkv_name}.bias", placeholder),
209
- dtype,
210
- )
211
- )
212
- ascend_weight.extend([placeholder] * 16)
213
-
214
- ascend_weight.append(
215
- self._convert_tensor_format_and_dtype(
216
- parm_dict[f"model.layers.{i}.{o_name}.weight"], dtype
217
- )
218
- )
219
- ascend_weight.append(
220
- self._convert_tensor_format_and_dtype(
221
- parm_dict.get(f"model.layers.{i}.{o_name}.bias", placeholder), dtype
222
- )
223
- )
224
- ascend_weight.extend([placeholder] * 4)
225
-
226
- ascend_weight.append(
227
- self._convert_tensor_format_and_dtype(
228
- parm_dict[f"model.layers.{i}.{mlp_norm_name}.weight"], dtype
229
- )
230
- )
231
- ascend_weight.extend([placeholder] * 3)
232
-
233
- ascend_weight.append(
234
- self._convert_tensor_format_and_dtype(
235
- parm_dict[f"model.layers.{i}.{mlp_gate_name}.weight"], dtype
236
- )
237
- )
238
- ascend_weight.append(
239
- self._convert_tensor_format_and_dtype(
240
- parm_dict.get(
241
- f"model.layers.{i}.{mlp_gate_name}.bias", placeholder
242
- ),
243
- dtype,
244
- )
245
- )
246
- ascend_weight.extend([placeholder] * 10)
247
-
248
- ascend_weight.append(
249
- self._convert_tensor_format_and_dtype(
250
- parm_dict[f"model.layers.{i}.{mlp_down_name}.weight"], dtype
251
- )
252
- )
253
- ascend_weight.append(
254
- self._convert_tensor_format_and_dtype(
255
- parm_dict.get(
256
- f"model.layers.{i}.{mlp_down_name}.bias", placeholder
257
- ),
258
- dtype,
259
- )
260
- )
261
- ascend_weight.extend([placeholder] * 4)
262
-
263
- ascend_weight.append(
264
- self._convert_tensor_format_and_dtype(
265
- parm_dict[f"{norm_out_name}.weight"], dtype
266
- )
267
- )
268
- ascend_weight.append(
269
- self._convert_tensor_format_and_dtype(
270
- parm_dict[f"{lm_head_name}.weight"], dtype
271
- )
272
- )
273
- self.ascend_weight = ascend_weight
274
- self.atb_encoder_operation.set_weights(ascend_weight)
275
- self.atb_decoder_operation.set_weights(ascend_weight)
276
-
277
- def set_kvcache(self, k_caches=None, v_caches=None):
278
- """set kv_cache for llm boost"""
279
- if not k_caches or v_caches:
280
- if self.need_nz:
281
- kv_shape = (
282
- self.config.num_blocks,
283
- self.num_kv_heads * self.head_dim // self.device_num // 16,
284
- self.config.block_size,
285
- 16,
286
- )
287
- k_caches = [
288
- _set_format(
289
- Parameter(
290
- Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
291
- ),
292
- FORMAT_NZ,
293
- )
294
- for _ in range(self.num_layers)
295
- ]
296
- v_caches = [
297
- _set_format(
298
- Parameter(
299
- Tensor(shape=kv_shape, dtype=self.dtype, init=Zero())
300
- ),
301
- FORMAT_NZ,
302
- )
303
- for _ in range(self.num_layers)
304
- ]
305
- else:
306
- kv_shape = (
307
- self.config.num_blocks,
308
- self.config.block_size,
309
- self.num_kv_heads // self.device_num,
310
- self.head_dim,
311
- )
312
- k_caches = [
313
- Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
314
- for _ in range(self.num_layers)
315
- ]
316
- v_caches = [
317
- Parameter(Tensor(shape=kv_shape, dtype=self.dtype, init=Zero()))
318
- for _ in range(self.num_layers)
319
- ]
320
- self.k_caches = k_caches
321
- self.v_caches = v_caches
322
- self.atb_encoder_operation.set_kvcache(k_caches, v_caches)
323
- self.atb_decoder_operation.set_kvcache(k_caches, v_caches)
324
-
325
- def add_flags(self, is_first_iteration):
326
- """add_flags."""
327
- self.is_first_iteration = is_first_iteration
328
-
329
- def _execute_operator(self, acl_inputs, acl_param):
330
- """execute operator."""
331
- if self.is_first_iteration:
332
- acl_model_out = self.atb_encoder_operation.forward(acl_inputs, acl_param)
333
- else:
334
- acl_model_out = self.atb_decoder_operation.forward(acl_inputs, acl_param)
335
- acl_hidden_state = acl_model_out[0]
336
- return acl_hidden_state
337
-
338
- def forward(self, boost_inputs):
339
- r"""
340
- LlmBoost forward.
341
- """
342
- input_ids = boost_inputs.get("input_ids", None)
343
- position_ids = boost_inputs.get("position_ids", None)
344
- cos_embed = boost_inputs.get("cos_embed", None)
345
- sin_embed = boost_inputs.get("sin_embed", None)
346
- block_tables = boost_inputs.get("block_tables", None)
347
- slot_mapping = boost_inputs.get("slot_mapping", None)
348
- batch_valid_length = boost_inputs.get("batch_valid_length", None)
349
- lm_head_indices = boost_inputs.get("lm_head_indices", None)
350
- seqLen = boost_inputs.get("seq_lens", None)
351
- input_ids = self.reshape(input_ids, (-1,))
352
- if self.is_first_iteration:
353
- attention_mask = self.attn_mask
354
- else:
355
- if position_ids is None:
356
- position_ids = batch_valid_length - 1
357
- attention_mask = self.placeholder
358
- lm_head_indices = self.lm_head_indices_fake
359
-
360
- if input_ids is not None and input_ids.dtype != mstype.int64:
361
- input_ids = self.cast(input_ids, mstype.int64)
362
- if position_ids is not None and position_ids.dtype != mstype.int64:
363
- position_ids = self.cast(position_ids, mstype.int64)
364
- if batch_valid_length is not None and batch_valid_length.dtype != mstype.int32:
365
- batch_valid_length = self.cast(batch_valid_length, mstype.int32)
366
- if lm_head_indices is not None and lm_head_indices.dtype != mstype.int64:
367
- lm_head_indices = self.cast(lm_head_indices, mstype.int64)
368
-
369
- acl_inputs, acl_param = self._prepare_inputs(
370
- prefill=self.is_first_iteration,
371
- input_ids=input_ids,
372
- position_ids=position_ids,
373
- cos_embed=cos_embed,
374
- sin_embed=sin_embed,
375
- attention_mask=attention_mask,
376
- block_tables=block_tables,
377
- slots=slot_mapping,
378
- input_lengths=batch_valid_length,
379
- lm_head_indices=lm_head_indices,
380
- seqLen=seqLen,
381
- )
382
- ms.hal.synchronize()
383
- logits = self._execute_operator(acl_inputs, acl_param)
384
- logits = self.cast(logits, mstype.float32)
385
- return logits
@@ -1,137 +0,0 @@
1
- # Copyright 2024 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """llm boost"""
16
- import json
17
- import mindspore.common.dtype as mstype
18
- from mindspore.experimental.llm_boost.atb.boost_base import (
19
- AtbBoostBase,
20
- PositionEmbeddingType,
21
- NormType,
22
- )
23
- from mindspore._c_expression import LlmBoostBinder
24
- from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
25
-
26
- CPP_LLAMA_MODEL_CLASS_NAME = "llama_LlamaDecoderModel"
27
-
28
-
29
- @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Llama")
30
- class LlamaBoost(AtbBoostBase):
31
- """LlamaBoost class"""
32
-
33
- def __init__(self, config):
34
- super().__init__(config)
35
- self.in_tensor_length = 13
36
- self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
37
- self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
38
- self.atb_encoder_operation = LlmBoostBinder(
39
- self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
40
- )
41
- self.atb_decoder_operation = LlmBoostBinder(
42
- self.backend_name, CPP_LLAMA_MODEL_CLASS_NAME
43
- )
44
-
45
- def init(self):
46
- """
47
- Initialize the object
48
- returns True if object needs input manipulation by mindformers
49
- """
50
-
51
- coder_param = {
52
- "normEps": self.config.rms_norm_eps,
53
- "normType": NormType.RMS_NORM,
54
- "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
55
- "hiddenSizePerAttentionHead": self.head_dim,
56
- "numHiddenLayers": self.num_layers,
57
- "numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
58
- "skipWordEmbedding": False,
59
- "isFA": False,
60
- "isBF16": self.dtype == mstype.bfloat16,
61
- "packQuantType": [[1, 1] for _ in range(self.num_layers)],
62
- "linearQuantType": [
63
- [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
64
- ],
65
- "linearTransposeType": [
66
- [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
67
- ],
68
- "isEmbeddingParallel": False,
69
- "isLmHeadParallel": not self.config.parallel_config.vocab_emb_dp,
70
- "lmHeadTransposeType": 1,
71
- "enableSwiGLU": True,
72
- "enablekvQuant": self.kv_quant is not None,
73
- "rank": self.rank_id,
74
- "worldSize": self.device_num,
75
- "backend": self.config.communication_backend,
76
- "rankTableFile": "",
77
- "positionEmbeddingType": PositionEmbeddingType.ROPE,
78
- "hiddenSize": self.config.hidden_size,
79
- "gemma": False,
80
- "enableAddNorm": False,
81
- "enableCompressHead": False,
82
- "isUnpadInputs": True,
83
- }
84
- encoder_param = {
85
- **coder_param,
86
- "isPrefill": True,
87
- "enableLcoc": True,
88
- "enableSpeculate": False,
89
- "skipWordEmbedding": False,
90
- "enableSplitFuse": False,
91
- }
92
- decoder_param = {
93
- **coder_param,
94
- "isPrefill": False,
95
- "enableLcoc": False,
96
- "enableSpeculate": False,
97
- }
98
- self.atb_encoder_operation.init(json.dumps({**encoder_param}))
99
- self.atb_decoder_operation.init(json.dumps({**decoder_param}))
100
- return True
101
-
102
- def _prepare_inputs(
103
- self,
104
- prefill=None,
105
- input_ids=None,
106
- position_ids=None,
107
- cos_embed=None,
108
- sin_embed=None,
109
- attention_mask=None,
110
- block_tables=None,
111
- slots=None,
112
- input_lengths=None,
113
- lm_head_indices=None,
114
- seqLen=None,
115
- **kwargs
116
- ):
117
- """prepare inputs"""
118
- self.acl_param = json.dumps(
119
- {
120
- "seqLen": seqLen,
121
- }
122
- )
123
-
124
- self.acl_decoder_operation_inputs[0] = input_ids
125
- self.acl_decoder_operation_inputs[1] = self.placeholder
126
- self.acl_decoder_operation_inputs[2] = position_ids
127
- self.acl_decoder_operation_inputs[3] = cos_embed
128
- self.acl_decoder_operation_inputs[4] = sin_embed
129
- self.acl_decoder_operation_inputs[5] = attention_mask
130
- self.acl_decoder_operation_inputs[6] = block_tables
131
- self.acl_decoder_operation_inputs[7] = slots
132
- self.acl_decoder_operation_inputs[8] = self.placeholder
133
- self.acl_decoder_operation_inputs[9] = self.placeholder
134
- self.acl_decoder_operation_inputs[10] = self.placeholder
135
- self.acl_decoder_operation_inputs[11] = input_lengths
136
- self.acl_decoder_operation_inputs[12] = lm_head_indices
137
- return self.acl_decoder_operation_inputs, self.acl_param
@@ -1,124 +0,0 @@
1
- # Copyright 2024 Huawei Technologies Co., Ltd
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ============================================================================
15
- """llm boost"""
16
- import json
17
- import mindspore.common.dtype as mstype
18
- from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase, NormType
19
- from mindspore._c_expression import LlmBoostBinder
20
- from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
-
22
-
23
- CPP_QWEN_MODEL_CLASS_NAME = "qwen_QwenDecoderModel"
24
-
25
-
26
- @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Qwen")
27
- class QwenBoost(AtbBoostBase):
28
- """QwenBoost class"""
29
-
30
- def __init__(self, config):
31
- super().__init__(config)
32
- self.in_tensor_length = 12
33
- self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
34
- self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
35
- self.atb_encoder_operation = LlmBoostBinder(
36
- self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
37
- )
38
- self.atb_decoder_operation = LlmBoostBinder(
39
- self.backend_name, CPP_QWEN_MODEL_CLASS_NAME
40
- )
41
-
42
- def init(self):
43
- """set param"""
44
- param_dict = {
45
- "isFA": False,
46
- "isBF16": self.dtype == mstype.bfloat16,
47
- "withEmbedding": True,
48
- "isEmbeddingParallel": True,
49
- "isLmHeadParallel": True,
50
- "linearTransposeType": [
51
- [1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)
52
- ],
53
- "lmHeadTransposeType": 1,
54
- "enableSwiGLU": not self.need_nz,
55
- "normEps": self.config.rms_norm_eps,
56
- "normType": NormType.RMS_NORM,
57
- "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
58
- "hiddenSizePerAttentionHead": self.head_dim,
59
- "numHiddenLayers": self.num_layers,
60
- "numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
61
- "rank": self.rank_id,
62
- "worldSize": self.device_num,
63
- "backend": self.config.communication_backend,
64
- "packQuantType": [[1, 1] for _ in range(self.num_layers)],
65
- "linearQuantType": [
66
- [0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)
67
- ],
68
- "linearHasBias": [[True, False, False, False]] * self.num_layers,
69
- "enableKvQuant": self.kv_quant is not None,
70
- "enableLora": False,
71
- "isUnpadInputs": True,
72
- "enableAddNorm": False,
73
- }
74
- encoder_param = {
75
- **param_dict,
76
- "isPrefill": True,
77
- "enableLcoc": False,
78
- "enableSplitFuse": False,
79
- }
80
- decoder_param = {
81
- **param_dict,
82
- "isPrefill": False,
83
- "enableLcoc": False,
84
- "enableSpeculate": False,
85
- "enablePrefixCache": False,
86
- }
87
- self.atb_encoder_operation.init(json.dumps({**encoder_param}))
88
- self.atb_decoder_operation.init(json.dumps({**decoder_param}))
89
-
90
- def _prepare_inputs(
91
- self,
92
- prefill=None,
93
- input_ids=None,
94
- position_ids=None,
95
- cos_embed=None,
96
- sin_embed=None,
97
- attention_mask=None,
98
- block_tables=None,
99
- slots=None,
100
- input_lengths=None,
101
- lm_head_indices=None,
102
- seqLen=None,
103
- **kwargs
104
- ):
105
- """prepare inputs"""
106
- self.acl_param = json.dumps(
107
- {
108
- "seqLen": seqLen,
109
- }
110
- )
111
-
112
- self.acl_decoder_operation_inputs[0] = input_ids
113
- self.acl_decoder_operation_inputs[1] = position_ids
114
- self.acl_decoder_operation_inputs[2] = cos_embed
115
- self.acl_decoder_operation_inputs[3] = sin_embed
116
- self.acl_decoder_operation_inputs[4] = attention_mask
117
- self.acl_decoder_operation_inputs[5] = block_tables
118
- self.acl_decoder_operation_inputs[6] = slots
119
- self.acl_decoder_operation_inputs[7] = self.placeholder
120
- self.acl_decoder_operation_inputs[8] = self.placeholder
121
- self.acl_decoder_operation_inputs[9] = self.placeholder
122
- self.acl_decoder_operation_inputs[10] = input_lengths
123
- self.acl_decoder_operation_inputs[11] = lm_head_indices
124
- return self.acl_decoder_operation_inputs, self.acl_param