mindspore 2.3.0__cp310-cp310-win_amd64.whl → 2.4.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (308) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +3 -1
  5. mindspore/_c_dataengine.cp310-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp310-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp310-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +50 -9
  9. mindspore/_extends/parse/compile_config.py +41 -0
  10. mindspore/_extends/parse/parser.py +9 -7
  11. mindspore/_extends/parse/standard_method.py +52 -14
  12. mindspore/_extends/pijit/pijit_func_white_list.py +350 -24
  13. mindspore/amp.py +24 -10
  14. mindspore/atlprov.dll +0 -0
  15. mindspore/avcodec-59.dll +0 -0
  16. mindspore/avdevice-59.dll +0 -0
  17. mindspore/avfilter-8.dll +0 -0
  18. mindspore/avformat-59.dll +0 -0
  19. mindspore/avutil-57.dll +0 -0
  20. mindspore/c1.dll +0 -0
  21. mindspore/c1xx.dll +0 -0
  22. mindspore/c2.dll +0 -0
  23. mindspore/common/__init__.py +6 -4
  24. mindspore/common/_pijit_context.py +190 -0
  25. mindspore/common/_register_for_tensor.py +2 -1
  26. mindspore/common/_tensor_overload.py +139 -0
  27. mindspore/common/api.py +102 -87
  28. mindspore/common/dump.py +5 -6
  29. mindspore/common/generator.py +1 -7
  30. mindspore/common/hook_handle.py +14 -26
  31. mindspore/common/mindir_util.py +2 -2
  32. mindspore/common/parameter.py +46 -13
  33. mindspore/common/recompute.py +39 -9
  34. mindspore/common/sparse_tensor.py +7 -3
  35. mindspore/common/tensor.py +209 -29
  36. mindspore/communication/__init__.py +1 -1
  37. mindspore/communication/_comm_helper.py +38 -3
  38. mindspore/communication/comm_func.py +310 -55
  39. mindspore/communication/management.py +14 -14
  40. mindspore/context.py +123 -22
  41. mindspore/dataset/__init__.py +1 -1
  42. mindspore/dataset/audio/__init__.py +1 -1
  43. mindspore/dataset/core/config.py +7 -0
  44. mindspore/dataset/core/validator_helpers.py +7 -0
  45. mindspore/dataset/engine/cache_client.py +1 -1
  46. mindspore/dataset/engine/datasets.py +72 -44
  47. mindspore/dataset/engine/datasets_audio.py +7 -7
  48. mindspore/dataset/engine/datasets_standard_format.py +53 -3
  49. mindspore/dataset/engine/datasets_text.py +20 -20
  50. mindspore/dataset/engine/datasets_user_defined.py +174 -104
  51. mindspore/dataset/engine/datasets_vision.py +33 -33
  52. mindspore/dataset/engine/iterators.py +29 -0
  53. mindspore/dataset/engine/obs/util.py +7 -0
  54. mindspore/dataset/engine/queue.py +114 -60
  55. mindspore/dataset/engine/serializer_deserializer.py +2 -2
  56. mindspore/dataset/engine/validators.py +34 -14
  57. mindspore/dataset/text/__init__.py +1 -4
  58. mindspore/dataset/transforms/__init__.py +0 -3
  59. mindspore/dataset/utils/line_reader.py +2 -0
  60. mindspore/dataset/vision/__init__.py +1 -4
  61. mindspore/dataset/vision/utils.py +1 -1
  62. mindspore/dataset/vision/validators.py +2 -1
  63. mindspore/dnnl.dll +0 -0
  64. mindspore/dpcmi.dll +0 -0
  65. mindspore/{nn/extend → experimental/es}/__init__.py +4 -11
  66. mindspore/experimental/es/embedding_service.py +883 -0
  67. mindspore/{nn/layer → experimental/es}/embedding_service_layer.py +218 -30
  68. mindspore/experimental/llm_boost/__init__.py +21 -0
  69. mindspore/{nn/extend/layer → experimental/llm_boost/atb}/__init__.py +4 -8
  70. mindspore/experimental/llm_boost/atb/boost_base.py +211 -0
  71. mindspore/experimental/llm_boost/atb/llama_boost.py +115 -0
  72. mindspore/experimental/llm_boost/atb/qwen_boost.py +101 -0
  73. mindspore/experimental/llm_boost/register.py +129 -0
  74. mindspore/experimental/llm_boost/utils.py +31 -0
  75. mindspore/experimental/optim/adamw.py +85 -0
  76. mindspore/experimental/optim/optimizer.py +3 -0
  77. mindspore/hal/__init__.py +3 -3
  78. mindspore/hal/contiguous_tensors_handle.py +175 -0
  79. mindspore/hal/stream.py +18 -0
  80. mindspore/include/api/model_group.h +13 -1
  81. mindspore/include/api/types.h +10 -10
  82. mindspore/include/dataset/config.h +2 -2
  83. mindspore/include/dataset/constants.h +2 -2
  84. mindspore/include/dataset/execute.h +2 -2
  85. mindspore/include/dataset/vision.h +4 -0
  86. mindspore/jpeg62.dll +0 -0
  87. mindspore/log.py +1 -1
  88. mindspore/mindrecord/filewriter.py +68 -51
  89. mindspore/mindspore_backend.dll +0 -0
  90. mindspore/mindspore_common.dll +0 -0
  91. mindspore/mindspore_core.dll +0 -0
  92. mindspore/mindspore_glog.dll +0 -0
  93. mindspore/mindspore_np_dtype.dll +0 -0
  94. mindspore/mindspore_ops.dll +0 -0
  95. mindspore/mint/__init__.py +495 -46
  96. mindspore/mint/distributed/__init__.py +31 -0
  97. mindspore/mint/distributed/distributed.py +254 -0
  98. mindspore/mint/nn/__init__.py +266 -21
  99. mindspore/mint/nn/functional.py +125 -19
  100. mindspore/mint/nn/layer/__init__.py +39 -0
  101. mindspore/mint/nn/layer/activation.py +133 -0
  102. mindspore/mint/nn/layer/normalization.py +477 -0
  103. mindspore/mint/nn/layer/pooling.py +110 -0
  104. mindspore/mint/optim/adamw.py +28 -7
  105. mindspore/mint/special/__init__.py +63 -0
  106. mindspore/msobj140.dll +0 -0
  107. mindspore/mspdb140.dll +0 -0
  108. mindspore/mspdbcore.dll +0 -0
  109. mindspore/mspdbst.dll +0 -0
  110. mindspore/mspft140.dll +0 -0
  111. mindspore/msvcdis140.dll +0 -0
  112. mindspore/msvcp140_1.dll +0 -0
  113. mindspore/msvcp140_2.dll +0 -0
  114. mindspore/msvcp140_atomic_wait.dll +0 -0
  115. mindspore/msvcp140_codecvt_ids.dll +0 -0
  116. mindspore/multiprocessing/__init__.py +2 -1
  117. mindspore/nn/__init__.py +0 -1
  118. mindspore/nn/cell.py +275 -93
  119. mindspore/nn/layer/activation.py +211 -44
  120. mindspore/nn/layer/basic.py +113 -3
  121. mindspore/nn/layer/embedding.py +120 -2
  122. mindspore/nn/layer/normalization.py +101 -5
  123. mindspore/nn/layer/padding.py +34 -48
  124. mindspore/nn/layer/pooling.py +161 -7
  125. mindspore/nn/layer/transformer.py +3 -3
  126. mindspore/nn/loss/__init__.py +2 -2
  127. mindspore/nn/loss/loss.py +84 -6
  128. mindspore/nn/optim/__init__.py +2 -1
  129. mindspore/nn/optim/adadelta.py +1 -1
  130. mindspore/nn/optim/adam.py +1 -1
  131. mindspore/nn/optim/lamb.py +1 -1
  132. mindspore/nn/optim/tft_wrapper.py +127 -0
  133. mindspore/nn/wrap/cell_wrapper.py +12 -23
  134. mindspore/nn/wrap/grad_reducer.py +5 -5
  135. mindspore/nn/wrap/loss_scale.py +17 -3
  136. mindspore/numpy/__init__.py +1 -1
  137. mindspore/numpy/array_creations.py +65 -68
  138. mindspore/numpy/array_ops.py +64 -60
  139. mindspore/numpy/fft.py +610 -75
  140. mindspore/numpy/logic_ops.py +11 -10
  141. mindspore/numpy/math_ops.py +85 -84
  142. mindspore/numpy/utils_const.py +4 -4
  143. mindspore/opencv_core452.dll +0 -0
  144. mindspore/opencv_imgcodecs452.dll +0 -0
  145. mindspore/opencv_imgproc452.dll +0 -0
  146. mindspore/ops/__init__.py +6 -4
  147. mindspore/ops/_grad_experimental/grad_comm_ops.py +47 -3
  148. mindspore/ops/_grad_experimental/grad_math_ops.py +0 -22
  149. mindspore/ops/_vmap/vmap_array_ops.py +2 -4
  150. mindspore/ops/_vmap/vmap_math_ops.py +17 -1
  151. mindspore/ops/_vmap/vmap_nn_ops.py +43 -2
  152. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +85 -7
  153. mindspore/ops/auto_generate/gen_arg_dtype_cast.py +2 -0
  154. mindspore/ops/auto_generate/gen_extend_func.py +734 -13
  155. mindspore/ops/auto_generate/gen_ops_def.py +2420 -381
  156. mindspore/ops/auto_generate/gen_ops_prim.py +5196 -1659
  157. mindspore/ops/auto_generate/pyboost_inner_prim.py +176 -56
  158. mindspore/ops/composite/base.py +85 -48
  159. mindspore/ops/composite/multitype_ops/_compile_utils.py +1 -0
  160. mindspore/ops/composite/multitype_ops/not_in_impl.py +2 -2
  161. mindspore/ops/function/__init__.py +22 -0
  162. mindspore/ops/function/array_func.py +490 -153
  163. mindspore/ops/function/debug_func.py +113 -1
  164. mindspore/ops/function/fft_func.py +15 -2
  165. mindspore/ops/function/grad/grad_func.py +3 -2
  166. mindspore/ops/function/math_func.py +558 -207
  167. mindspore/ops/function/nn_func.py +817 -383
  168. mindspore/ops/function/other_func.py +3 -2
  169. mindspore/ops/function/random_func.py +184 -8
  170. mindspore/ops/function/reshard_func.py +13 -11
  171. mindspore/ops/function/sparse_unary_func.py +1 -1
  172. mindspore/ops/function/vmap_func.py +3 -2
  173. mindspore/ops/functional.py +24 -14
  174. mindspore/ops/op_info_register.py +3 -3
  175. mindspore/ops/operations/__init__.py +6 -1
  176. mindspore/ops/operations/_grad_ops.py +2 -76
  177. mindspore/ops/operations/_infer_ops.py +1 -1
  178. mindspore/ops/operations/_inner_ops.py +71 -94
  179. mindspore/ops/operations/array_ops.py +12 -146
  180. mindspore/ops/operations/comm_ops.py +42 -53
  181. mindspore/ops/operations/custom_ops.py +83 -19
  182. mindspore/ops/operations/debug_ops.py +42 -10
  183. mindspore/ops/operations/manually_defined/_inner.py +12 -0
  184. mindspore/ops/operations/manually_defined/ops_def.py +265 -10
  185. mindspore/ops/operations/math_ops.py +12 -223
  186. mindspore/ops/operations/nn_ops.py +20 -114
  187. mindspore/ops/operations/other_ops.py +7 -4
  188. mindspore/ops/operations/random_ops.py +46 -1
  189. mindspore/ops/primitive.py +18 -6
  190. mindspore/ops_generate/arg_dtype_cast.py +2 -0
  191. mindspore/ops_generate/gen_aclnn_implement.py +11 -11
  192. mindspore/ops_generate/gen_constants.py +36 -0
  193. mindspore/ops_generate/gen_ops.py +67 -52
  194. mindspore/ops_generate/gen_ops_inner_prim.py +1 -1
  195. mindspore/ops_generate/gen_pyboost_func.py +131 -47
  196. mindspore/ops_generate/op_proto.py +10 -3
  197. mindspore/ops_generate/pyboost_utils.py +14 -1
  198. mindspore/ops_generate/template.py +43 -21
  199. mindspore/parallel/__init__.py +3 -1
  200. mindspore/parallel/_auto_parallel_context.py +28 -8
  201. mindspore/parallel/_cell_wrapper.py +83 -0
  202. mindspore/parallel/_parallel_serialization.py +47 -19
  203. mindspore/parallel/_tensor.py +81 -11
  204. mindspore/parallel/_utils.py +13 -1
  205. mindspore/parallel/algo_parameter_config.py +5 -5
  206. mindspore/parallel/checkpoint_transform.py +46 -39
  207. mindspore/parallel/cluster/process_entity/__init__.py +1 -1
  208. mindspore/parallel/cluster/process_entity/_api.py +31 -23
  209. mindspore/parallel/cluster/process_entity/_utils.py +2 -27
  210. mindspore/parallel/parameter_broadcast.py +3 -4
  211. mindspore/parallel/shard.py +162 -31
  212. mindspore/parallel/transform_safetensors.py +993 -0
  213. mindspore/pgodb140.dll +0 -0
  214. mindspore/pgort140.dll +0 -0
  215. mindspore/profiler/__init__.py +2 -1
  216. mindspore/profiler/common/constant.py +29 -0
  217. mindspore/profiler/common/registry.py +47 -0
  218. mindspore/profiler/common/util.py +28 -0
  219. mindspore/profiler/dynamic_profiler.py +694 -0
  220. mindspore/profiler/envprofiling.py +17 -19
  221. mindspore/profiler/parser/ascend_analysis/constant.py +18 -0
  222. mindspore/profiler/parser/ascend_analysis/file_manager.py +25 -4
  223. mindspore/profiler/parser/ascend_analysis/function_event.py +43 -19
  224. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +31 -26
  225. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +56 -10
  226. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +55 -8
  227. mindspore/profiler/parser/ascend_analysis/path_manager.py +313 -0
  228. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +27 -20
  229. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +9 -2
  230. mindspore/profiler/parser/ascend_msprof_exporter.py +5 -4
  231. mindspore/profiler/parser/ascend_timeline_generator.py +27 -25
  232. mindspore/profiler/parser/base_timeline_generator.py +19 -25
  233. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +25 -12
  234. mindspore/profiler/parser/framework_parser.py +1 -391
  235. mindspore/profiler/parser/gpu_analysis/__init__.py +14 -0
  236. mindspore/profiler/parser/gpu_analysis/function_event.py +44 -0
  237. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +89 -0
  238. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +72 -0
  239. mindspore/profiler/parser/memory_usage_parser.py +0 -154
  240. mindspore/profiler/parser/profiler_info.py +78 -6
  241. mindspore/profiler/profiler.py +153 -0
  242. mindspore/profiler/profiling.py +280 -412
  243. mindspore/rewrite/__init__.py +1 -2
  244. mindspore/rewrite/common/namespace.py +4 -4
  245. mindspore/rewrite/symbol_tree/symbol_tree.py +3 -3
  246. mindspore/run_check/_check_version.py +36 -103
  247. mindspore/safeguard/rewrite_obfuscation.py +591 -247
  248. mindspore/swresample-4.dll +0 -0
  249. mindspore/swscale-6.dll +0 -0
  250. mindspore/tbbmalloc.dll +0 -0
  251. mindspore/tinyxml2.dll +0 -0
  252. mindspore/train/__init__.py +4 -3
  253. mindspore/train/_utils.py +28 -2
  254. mindspore/train/amp.py +171 -53
  255. mindspore/train/callback/__init__.py +2 -2
  256. mindspore/train/callback/_callback.py +4 -4
  257. mindspore/train/callback/_checkpoint.py +85 -22
  258. mindspore/train/callback/_cluster_monitor.py +1 -1
  259. mindspore/train/callback/_flops_collector.py +1 -0
  260. mindspore/train/callback/_loss_monitor.py +3 -3
  261. mindspore/train/callback/_on_request_exit.py +134 -31
  262. mindspore/train/callback/_summary_collector.py +5 -5
  263. mindspore/train/callback/_tft_register.py +352 -0
  264. mindspore/train/dataset_helper.py +7 -3
  265. mindspore/train/metrics/metric.py +3 -3
  266. mindspore/train/metrics/roc.py +4 -4
  267. mindspore/train/mind_ir_pb2.py +44 -39
  268. mindspore/train/model.py +134 -58
  269. mindspore/train/serialization.py +336 -112
  270. mindspore/turbojpeg.dll +0 -0
  271. mindspore/utils/__init__.py +21 -0
  272. mindspore/utils/utils.py +60 -0
  273. mindspore/vcmeta.dll +0 -0
  274. mindspore/vcruntime140.dll +0 -0
  275. mindspore/vcruntime140_1.dll +0 -0
  276. mindspore/version.py +1 -1
  277. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/METADATA +6 -2
  278. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/RECORD +281 -275
  279. mindspore/include/c_api/ms/abstract.h +0 -67
  280. mindspore/include/c_api/ms/attribute.h +0 -197
  281. mindspore/include/c_api/ms/base/handle_types.h +0 -43
  282. mindspore/include/c_api/ms/base/macros.h +0 -32
  283. mindspore/include/c_api/ms/base/status.h +0 -33
  284. mindspore/include/c_api/ms/base/types.h +0 -283
  285. mindspore/include/c_api/ms/context.h +0 -102
  286. mindspore/include/c_api/ms/graph.h +0 -160
  287. mindspore/include/c_api/ms/node.h +0 -606
  288. mindspore/include/c_api/ms/tensor.h +0 -161
  289. mindspore/include/c_api/ms/value.h +0 -84
  290. mindspore/mindspore_shared_lib.dll +0 -0
  291. mindspore/nn/extend/basic.py +0 -140
  292. mindspore/nn/extend/embedding.py +0 -143
  293. mindspore/nn/extend/layer/normalization.py +0 -109
  294. mindspore/nn/extend/pooling.py +0 -117
  295. mindspore/nn/layer/embedding_service.py +0 -531
  296. mindspore/ops/_op_impl/aicpu/strided_slice_v2.py +0 -93
  297. mindspore/ops/_op_impl/aicpu/strided_slice_v2_grad.py +0 -66
  298. mindspore/ops/extend/__init__.py +0 -53
  299. mindspore/ops/extend/array_func.py +0 -218
  300. mindspore/ops/extend/math_func.py +0 -76
  301. mindspore/ops/extend/nn_func.py +0 -308
  302. mindspore/ops/silent_check.py +0 -162
  303. mindspore/profiler/parser/msadvisor_analyzer.py +0 -82
  304. mindspore/profiler/parser/msadvisor_parser.py +0 -240
  305. mindspore/train/callback/_mindio_ttp.py +0 -443
  306. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/WHEEL +0 -0
  307. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/entry_points.txt +0 -0
  308. {mindspore-2.3.0.dist-info → mindspore-2.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,115 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """llm boost"""
16
+ import json
17
+ import mindspore.common.dtype as mstype
18
+ from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
19
+ from mindspore._c_expression import LlmBoostBinder
20
+ from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
+
22
+
23
+ @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Llama")
24
+ class LlamaBoost(AtbBoostBase):
25
+ """LlamaBoost class"""
26
+
27
+ def __init__(self, config):
28
+ super().__init__(config)
29
+ self.in_tensor_length = 13
30
+ self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
31
+ self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
32
+ self.atb_encoder_operation = LlmBoostBinder(
33
+ "ATB", "llama_parallel_DecoderModel")
34
+ self.atb_decoder_operation = LlmBoostBinder(
35
+ "ATB", "llama_parallel_DecoderModel")
36
+
37
+ def init(self):
38
+ """set param"""
39
+ coder_param = {
40
+ "rmsNormEps": self.config.rms_norm_eps,
41
+ "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
42
+ "hiddenSizePerAttentionHead": self.head_dim,
43
+ "numHiddenLayers": self.num_layers,
44
+ "numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
45
+ "skipWordEmbedding": False,
46
+ "isFA": False,
47
+ "isBF16": self.dtype == mstype.bfloat16,
48
+ "packQuantType": [[1, 1] for _ in range(self.num_layers)],
49
+ "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
50
+ "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
51
+ "isEmbeddingParallel": False,
52
+ "isLmHeadParallel": not self.config.parallel_config.vocab_emb_dp,
53
+ "lmHeadTransposeType": 1,
54
+ "supportSwiGLU": True,
55
+ "kvQuant": self.kv_quant is not None,
56
+ "rank": self.rank_id,
57
+ "worldSize": self.device_num,
58
+ "backend": "lccl",
59
+ "rankTableFile": "",
60
+ "positionEmbeddingType": self.position_embedding_type,
61
+ "hiddenSize": self.config.hidden_size,
62
+ "gemma": False,
63
+ "enableAddNorm": True,
64
+ "supportCompressHead": False,
65
+ }
66
+ encoder_param = {
67
+ **coder_param, "isPrefill": True,
68
+ "supportLcoc": True,
69
+ "supportSpeculate": False,
70
+ "skipWordEmbedding": False
71
+ }
72
+ decoder_param = {
73
+ **coder_param, "isPrefill": False, "supportLcoc": False,
74
+ "supportSpeculate": False
75
+ }
76
+ self.atb_encoder_operation.init(json.dumps({**encoder_param}))
77
+ self.atb_decoder_operation.init(json.dumps({**decoder_param}))
78
+
79
+ def _prepare_inputs(
80
+ self,
81
+ prefill=None,
82
+ input_ids=None,
83
+ position_ids=None,
84
+ cos_embed=None,
85
+ sin_embed=None,
86
+ attention_mask=None,
87
+ block_tables=None,
88
+ slots=None,
89
+ input_lengths=None,
90
+ lm_head_indices=None,
91
+ seqLen=None,
92
+ **kwargs
93
+ ):
94
+ """prepare inputs"""
95
+ self.acl_param = json.dumps({
96
+ "seqLen": seqLen,
97
+ })
98
+ self.acl_decoder_operation_inputs[0] = self.cast(
99
+ input_ids, mstype.int64)
100
+ self.acl_decoder_operation_inputs[1] = self.placeholder
101
+ self.acl_decoder_operation_inputs[2] = self.cast(
102
+ position_ids, mstype.int32)
103
+ self.acl_decoder_operation_inputs[3] = cos_embed
104
+ self.acl_decoder_operation_inputs[4] = sin_embed
105
+ self.acl_decoder_operation_inputs[5] = attention_mask
106
+ self.acl_decoder_operation_inputs[6] = block_tables
107
+ self.acl_decoder_operation_inputs[7] = slots
108
+ self.acl_decoder_operation_inputs[8] = self.placeholder
109
+ self.acl_decoder_operation_inputs[9] = self.placeholder
110
+ self.acl_decoder_operation_inputs[10] = self.placeholder
111
+ self.acl_decoder_operation_inputs[11] = self.cast(
112
+ input_lengths, mstype.int32)
113
+ self.acl_decoder_operation_inputs[12] = self.cast(
114
+ lm_head_indices, mstype.int64)
115
+ return self.acl_decoder_operation_inputs, self.acl_param
@@ -0,0 +1,101 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """llm boost"""
16
+ import json
17
+ import mindspore.common.dtype as mstype
18
+ from mindspore.experimental.llm_boost.atb.boost_base import AtbBoostBase
19
+ from mindspore._c_expression import LlmBoostBinder
20
+ from mindspore.experimental.llm_boost.register import LlmBoostRegister, LlmBoostType
21
+
22
+
23
+ @LlmBoostRegister.register(LlmBoostType.BUILDIN, "Qwen")
24
+ class QwenBoost(AtbBoostBase):
25
+ """QwenBoost class"""
26
+
27
+ def __init__(self, config):
28
+ super().__init__(config)
29
+ self.in_tensor_length = 12
30
+ self.acl_encoder_operation_inputs = [None] * self.in_tensor_length
31
+ self.acl_decoder_operation_inputs = [None] * self.in_tensor_length
32
+ self.atb_encoder_operation = LlmBoostBinder(
33
+ "ATB", "qwen_DecoderModel")
34
+ self.atb_decoder_operation = LlmBoostBinder(
35
+ "ATB", "qwen_DecoderModel")
36
+
37
+ def init(self):
38
+ """set param"""
39
+ param_dict = {
40
+ "isFA": False,
41
+ "isBF16": self.dtype == mstype.bfloat16,
42
+ "withEmbedding": True,
43
+ "isEmbeddingParallel": True,
44
+ "isLmHeadParallel": True,
45
+ "linearTransposeType": [[1, -1, -1, 1, 1, -1, 1] for i in range(self.num_layers)],
46
+ "lmHeadTransposeType": 1,
47
+ "supportSwiGLU": not self.need_nz,
48
+ "rmsNormEps": self.config.rms_norm_eps,
49
+ "numAttentionHeadsPerRank": self.config.num_heads // self.device_num,
50
+ "hiddenSizePerAttentionHead": self.head_dim,
51
+ "numHiddenLayers": self.num_layers,
52
+ "numKeyValueHeadsPerRank": self.n_kv_heads // self.device_num,
53
+ "rank": self.rank_id,
54
+ "worldSize": self.device_num,
55
+ "backend": "lccl",
56
+ "packQuantType": [[1, 1] for _ in range(self.num_layers)],
57
+ "linearQuantType": [[0, -1, -1, 0, 0, -1, 0] for _ in range(self.num_layers)],
58
+ "kvQuant": self.kv_quant is not None,
59
+ }
60
+ encoder_param = {**param_dict, "isPrefill": True, "supportLcoc": False}
61
+ decoder_param = {**param_dict, "isPrefill": False,
62
+ "supportLcoc": False, "supportSpeculate": False}
63
+ self.atb_encoder_operation.init(json.dumps({**encoder_param}))
64
+ self.atb_decoder_operation.init(json.dumps({**decoder_param}))
65
+
66
+ def _prepare_inputs(
67
+ self,
68
+ prefill=None,
69
+ input_ids=None,
70
+ position_ids=None,
71
+ cos_embed=None,
72
+ sin_embed=None,
73
+ attention_mask=None,
74
+ block_tables=None,
75
+ slots=None,
76
+ input_lengths=None,
77
+ lm_head_indices=None,
78
+ seqLen=None,
79
+ **kwargs
80
+ ):
81
+ """prepare inputs"""
82
+ self.acl_param = json.dumps({
83
+ "seqLen": seqLen,
84
+ })
85
+ self.acl_decoder_operation_inputs[0] = self.cast(
86
+ input_ids, mstype.int64)
87
+ self.acl_decoder_operation_inputs[1] = self.cast(
88
+ position_ids, mstype.int32)
89
+ self.acl_decoder_operation_inputs[2] = cos_embed
90
+ self.acl_decoder_operation_inputs[3] = sin_embed
91
+ self.acl_decoder_operation_inputs[4] = attention_mask
92
+ self.acl_decoder_operation_inputs[5] = block_tables
93
+ self.acl_decoder_operation_inputs[6] = slots
94
+ self.acl_decoder_operation_inputs[7] = self.placeholder
95
+ self.acl_decoder_operation_inputs[8] = self.placeholder
96
+ self.acl_decoder_operation_inputs[9] = self.cast(
97
+ input_lengths, mstype.int32)
98
+ self.acl_decoder_operation_inputs[10] = self.cast(
99
+ lm_head_indices, mstype.int64)
100
+ self.acl_decoder_operation_inputs[11] = self.placeholder
101
+ return self.acl_decoder_operation_inputs, self.acl_param
@@ -0,0 +1,129 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """LlmBoostRegister"""
16
+ import inspect
17
+
18
+
19
+ class LlmBoostType:
20
+ """Class module type for vision pretrain"""
21
+
22
+ def __init__(self):
23
+ pass
24
+
25
+ BUILDIN = 'BuildIn'
26
+
27
+
28
+ class LlmBoostRegister:
29
+ """
30
+ Module class factory.
31
+ """
32
+
33
+ def __init__(self):
34
+ pass
35
+
36
+ registry = {}
37
+
38
+ @classmethod
39
+ def register(cls, boost_type=LlmBoostType.BUILDIN, alias=None):
40
+ """Register class into registry
41
+ Args:
42
+ boost_type:
43
+ boost type name, default LlmBoostType.BUILDIN
44
+ alias (str) : model_name
45
+
46
+ Returns:
47
+ wrapper
48
+ """
49
+
50
+ def wrapper(register_class):
51
+ """Register-Class with wrapper function.
52
+
53
+ Args:
54
+ register_class : class need to register
55
+
56
+ Returns:
57
+ wrapper of register_class
58
+ """
59
+ model_name = alias if alias is not None else register_class.__name__
60
+ if boost_type not in cls.registry:
61
+ cls.registry[boost_type] = {model_name: register_class}
62
+ else:
63
+ cls.registry[boost_type][model_name] = register_class
64
+ return register_class
65
+
66
+ return wrapper
67
+
68
+ @classmethod
69
+ def is_exist(cls, boost_type, model_name=None):
70
+ """Determine whether class name is in the current type group.
71
+
72
+ Args:
73
+ boost_type : Module type
74
+ model_name : model name
75
+
76
+ Returns:
77
+ True/False
78
+ """
79
+ if not model_name:
80
+ return boost_type in cls.registry
81
+ registered = boost_type in cls.registry and model_name in cls.registry.get(
82
+ boost_type)
83
+ return registered
84
+
85
+ @classmethod
86
+ def get_cls(cls, boost_type, model_name=None):
87
+ """Get class
88
+
89
+ Args:
90
+ boost_type : Module type
91
+ model_name : model name
92
+
93
+ Returns:
94
+ register_class
95
+ """
96
+ if not cls.is_exist(boost_type, model_name):
97
+ raise ValueError("Can't find class type {} class name {} \
98
+ in class registry".format(boost_type, model_name))
99
+
100
+ if not model_name:
101
+ raise ValueError(
102
+ "Can't find model. model name = {}".format(model_name))
103
+ register_class = cls.registry.get(boost_type).get(model_name)
104
+ return register_class
105
+
106
+ @classmethod
107
+ def get_instance(cls, boost_type=LlmBoostType.BUILDIN, model_name=None, **kwargs):
108
+ """Get instance.
109
+ Args:
110
+ boost_type : module type
111
+ model_name : model type
112
+ Returns:
113
+ object : The constructed object
114
+ """
115
+ if model_name is None:
116
+ raise ValueError("Class name cannot be None.")
117
+
118
+ if isinstance(model_name, str):
119
+ obj_cls = cls.get_cls(boost_type, model_name)
120
+ elif inspect.isclass(model_name):
121
+ obj_cls = model_name
122
+ else:
123
+ raise ValueError("Can't find boost type {} model name {} \
124
+ in class registry.".format(boost_type, model_name))
125
+
126
+ try:
127
+ return obj_cls(**kwargs)
128
+ except Exception as e:
129
+ raise type(e)('{}: {}'.format(obj_cls.__name__, e))
@@ -0,0 +1,31 @@
1
+ # Copyright 2024 Huawei Technologies Co., Ltd
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ============================================================================
15
+ """LlmBoostRegister"""
16
+ import os
17
+ from mindspore.communication import get_group_size, get_rank
18
+
19
+
20
+ def get_real_rank():
21
+ try:
22
+ return get_rank()
23
+ except RuntimeError:
24
+ return int(os.getenv("RANK_ID", "0"))
25
+
26
+
27
+ def get_real_group_size():
28
+ try:
29
+ return get_group_size()
30
+ except RuntimeError:
31
+ return int(os.getenv("RANK_SIZE", "1"))
@@ -20,10 +20,13 @@ from mindspore.common.parameter import Parameter
20
20
  from mindspore.common.tensor import Tensor
21
21
  import mindspore.common.dtype as mstype
22
22
  from mindspore.experimental.optim.optimizer import Optimizer
23
+ from mindspore import _checkparam as validator
24
+ from mindspore.ops import auto_generate as gen
23
25
  from mindspore import ops
24
26
  from mindspore import jit
25
27
 
26
28
  _adamw_opt = C.MultitypeFuncGraph("adamw_opt")
29
+ _speed_adamw_opt = C.MultitypeFuncGraph("speed_adamw_opt")
27
30
 
28
31
  op_mul = P.Mul()
29
32
  op_pow = P.Pow()
@@ -32,6 +35,30 @@ op_maximum = P.Maximum()
32
35
  hyper_map = C.HyperMap()
33
36
 
34
37
 
38
+ @_speed_adamw_opt.register("Function", "Float", "Float", "Tensor", "Float", "Float", "Bool", "Bool", "Tensor", "Tensor",
39
+ "Tensor", "Tensor", "Tensor", "Tensor")
40
+ def _run_speed_adamw_opt(opt, beta1, beta2, lr, eps, weight_decay, amsgrad, maximize, bias_correction1,
41
+ bias_correction2, parameters, grads, exp_avg, exp_avg_sq):
42
+ """Apply adamw optimizer to the weight parameter."""
43
+ success = True
44
+ opt(parameters, exp_avg, exp_avg_sq, bias_correction1, bias_correction2, lr, weight_decay, beta1, beta2, eps,
45
+ grads, None, amsgrad, maximize)
46
+ return success
47
+
48
+
49
+ def _check_param_value(betas, eps, weight_decay, lr, amsgrad, maximize, prim_name):
50
+ """Check the type of inputs."""
51
+ validator.check_value_type('betas', betas, [tuple], prim_name)
52
+ validator.check("betas size", len(betas), "", [2], validator.IN, prim_name)
53
+ validator.check_value_type("betas[0]", betas[0], [float], prim_name)
54
+ validator.check_value_type("betas[1]", betas[1], [float], prim_name)
55
+ validator.check_value_type("eps", eps, [float], prim_name)
56
+ validator.check_value_type("weight_decay", weight_decay, [float], prim_name)
57
+ validator.check_value_type("lr", lr, [float], prim_name)
58
+ validator.check_value_type("amsgrad", amsgrad, [bool], prim_name)
59
+ validator.check_value_type("maximize", maximize, [bool], prim_name)
60
+
61
+
35
62
  @jit
36
63
  def prepare_func(lr, weight_decay, state_step, beta1, beta2):
37
64
  weight_decay_new = 1 - lr * weight_decay
@@ -203,3 +230,61 @@ class AdamW(Optimizer):
203
230
  grads, start_id, end_id)
204
231
 
205
232
  return True
233
+
234
+
235
+ class SpeedAdamW(Optimizer):
236
+ r"""
237
+ Implements Adam Weight Decay algorithm.
238
+ """
239
+
240
+ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
241
+ weight_decay=1e-2, amsgrad=False, *, maximize=False):
242
+ _check_param_value(betas, eps, weight_decay, lr, amsgrad, maximize, self.cls_name)
243
+ if lr < 0.0:
244
+ raise ValueError("Invalid learning rate: {}".format(lr))
245
+ if eps < 0.0:
246
+ raise ValueError("Invalid epsilon value: {}".format(eps))
247
+ if not 0.0 <= betas[0] < 1.0:
248
+ raise ValueError("Invalid beta parameter at index 0: {}".format(betas[0]))
249
+ if not 0.0 <= betas[1] < 1.0:
250
+ raise ValueError("Invalid beta parameter at index 1: {}".format(betas[1]))
251
+ if weight_decay < 0.0:
252
+ raise ValueError("Invalid weight_decay value: {}".format(weight_decay))
253
+
254
+ defaults = dict(lr=lr, betas=betas, eps=eps,
255
+ weight_decay=weight_decay, amsgrad=amsgrad,
256
+ maximize=maximize)
257
+ self.max_v_group = True
258
+ super(SpeedAdamW, self).__init__(params, defaults)
259
+
260
+ self.exp_avg = self.parameters.clone(prefix="exp_avg", init='zeros')
261
+ self.exp_avg_sq = self.parameters.clone(prefix="exp_avg_sq", init='zeros')
262
+ self.state_step = Parameter(Tensor([0], mstype.float32), "state_step")
263
+ self.increase_tensor = Tensor(1, mstype.float32)
264
+ self.assignadd = P.AssignAdd()
265
+ self.adamw_opt = gen.ApplyAdamW()
266
+
267
+ def construct(self, gradients):
268
+ self.assignadd(self.state_step, self.increase_tensor)
269
+ for group_id, group in enumerate(self.param_groups):
270
+ beta1, beta2 = group['betas']
271
+ maximize = group.get("maximize")
272
+ start_id = self.group_start_id[group_id]
273
+ end_id = self.group_start_id[group_id + 1]
274
+ lr = group.get("lr")
275
+ grads = tuple(gradients[start_id: end_id])
276
+
277
+ bias_correction1 = float(beta1) ** (float(self.state_step) - 1.0)
278
+ bias_correction2 = float(beta2) ** (float(self.state_step) - 1.0)
279
+
280
+ # 当前 ApplyAdamW 仅支持 amsgrad 为 False
281
+ if group.get("amsgrad"):
282
+ raise ValueError("For SpeedAdamW, the value of amsgrad can only be False.")
283
+
284
+ self.hyper_map(F.partial(_speed_adamw_opt, self.adamw_opt, beta1, beta2, lr,
285
+ group.get("eps"), group.get("weight_decay"),
286
+ group.get("amsgrad"), maximize, bias_correction1, bias_correction2),
287
+ self.parameters[start_id: end_id], grads, self.exp_avg[start_id: end_id],
288
+ self.exp_avg_sq[start_id: end_id])
289
+
290
+ return True
@@ -143,6 +143,9 @@ class Optimizer(Cell):
143
143
  self.lrs.append(lr)
144
144
  param_group["lr"] = lr
145
145
  param_group["weight_decay"] = weight_decay
146
+ if "amsgrad" in param_group and param_group.get("amsgrad") and hasattr(self, 'max_v_group'):
147
+ param_items = ParameterTuple(tuple(param_group.get("params")))
148
+ param_group["max_exp_avg_sq"] = param_items.clone(prefix="max_exp_avg_sq", init='zeros')
146
149
  self.param_groups.append(param_group)
147
150
  self.group_start_id.append(self.group_start_id[-1] + len(param_group.get("params")))
148
151
 
mindspore/hal/__init__.py CHANGED
@@ -22,7 +22,7 @@ resources at the Python layer. Currently, these interfaces take effect only in P
22
22
  from mindspore.hal.device import is_initialized, is_available, device_count, get_device_capability,\
23
23
  get_device_properties, get_device_name, get_arch_list
24
24
  from mindspore.hal.stream import Stream, synchronize, set_cur_stream, current_stream, default_stream,\
25
- StreamCtx
25
+ communication_stream, StreamCtx
26
26
  from mindspore.hal.event import Event
27
27
  from mindspore.hal.memory import memory_stats, memory_reserved, max_memory_reserved, empty_cache,\
28
28
  reset_peak_memory_stats, memory_summary, memory_allocated,\
@@ -31,8 +31,8 @@ from mindspore.hal.memory import memory_stats, memory_reserved, max_memory_reser
31
31
  __all__ = [
32
32
  "is_initialized", "is_available", "device_count", "get_device_capability",
33
33
  "get_device_properties", "get_device_name", "get_arch_list",
34
- "Event", "Stream", "synchronize", "set_cur_stream", "current_stream", "default_stream", "StreamCtx",
35
- "memory_stats", "memory_reserved", "max_memory_reserved", "empty_cache", "reset_peak_memory_stats",
34
+ "Event", "communication_stream", "Stream", "synchronize", "set_cur_stream", "current_stream", "default_stream",
35
+ "StreamCtx", "memory_stats", "memory_reserved", "max_memory_reserved", "empty_cache", "reset_peak_memory_stats",
36
36
  "memory_summary", "memory_allocated", "max_memory_allocated", "reset_max_memory_reserved",
37
37
  "reset_max_memory_allocated"
38
38
  ]