mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (455) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +64 -83
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +47 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +177 -52
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +338 -208
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +2 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +84 -133
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +47 -38
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +69 -23
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +5 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +1 -0
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +4 -44
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +425 -19
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +125 -101
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +488 -620
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +86 -85
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +2 -4
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/lamb.py +1 -3
  188. mindspore/nn/optim/optimizer.py +1 -1
  189. mindspore/nn/optim/tft_wrapper.py +2 -3
  190. mindspore/nn/optim/thor.py +2 -2
  191. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  192. mindspore/nn/probability/distribution/exponential.py +2 -1
  193. mindspore/nn/probability/distribution/poisson.py +2 -1
  194. mindspore/nn/sparse/sparse.py +3 -3
  195. mindspore/nn/wrap/cell_wrapper.py +73 -42
  196. mindspore/nn/wrap/grad_reducer.py +37 -52
  197. mindspore/nn/wrap/loss_scale.py +72 -74
  198. mindspore/numpy/array_creations.py +7 -7
  199. mindspore/numpy/fft.py +1 -1
  200. mindspore/numpy/math_ops.py +1 -1
  201. mindspore/numpy/utils_const.py +1 -1
  202. mindspore/opencv_core452.dll +0 -0
  203. mindspore/opencv_imgcodecs452.dll +0 -0
  204. mindspore/opencv_imgproc452.dll +0 -0
  205. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  206. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  207. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  208. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  209. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  210. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  211. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  212. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +29 -10
  213. mindspore/ops/auto_generate/gen_extend_func.py +5 -55
  214. mindspore/ops/auto_generate/gen_ops_def.py +753 -273
  215. mindspore/ops/auto_generate/gen_ops_prim.py +1687 -958
  216. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  217. mindspore/ops/composite/__init__.py +10 -0
  218. mindspore/ops/composite/base.py +9 -5
  219. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  220. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  221. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  222. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  223. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  224. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  225. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  226. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  227. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  228. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  229. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  230. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  231. mindspore/ops/function/__init__.py +4 -1
  232. mindspore/ops/function/_add_attr_func.py +11 -6
  233. mindspore/ops/function/array_func.py +17 -100
  234. mindspore/ops/function/debug_func.py +8 -5
  235. mindspore/ops/function/grad/grad_func.py +5 -13
  236. mindspore/ops/function/math_func.py +65 -399
  237. mindspore/ops/function/nn_func.py +44 -61
  238. mindspore/ops/function/other_func.py +4 -1
  239. mindspore/ops/function/random_func.py +31 -4
  240. mindspore/ops/functional.py +2 -3
  241. mindspore/ops/functional_overload.py +486 -18
  242. mindspore/ops/op_info_register.py +21 -0
  243. mindspore/ops/operations/__init__.py +5 -2
  244. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  245. mindspore/ops/operations/_inner_ops.py +14 -18
  246. mindspore/ops/operations/_sequence_ops.py +1 -1
  247. mindspore/ops/operations/array_ops.py +4 -50
  248. mindspore/ops/operations/comm_ops.py +186 -41
  249. mindspore/ops/operations/custom_ops.py +244 -175
  250. mindspore/ops/operations/debug_ops.py +55 -4
  251. mindspore/ops/operations/image_ops.py +13 -13
  252. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  253. mindspore/ops/operations/math_ops.py +8 -9
  254. mindspore/ops/operations/nn_ops.py +6 -7
  255. mindspore/ops/primitive.py +9 -20
  256. mindspore/ops/tensor_method.py +52 -11
  257. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  258. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  259. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  260. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  261. mindspore/ops_generate/common/base_generator.py +14 -0
  262. mindspore/ops_generate/common/gen_constants.py +7 -2
  263. mindspore/ops_generate/common/gen_utils.py +0 -19
  264. mindspore/ops_generate/common/op_proto.py +11 -4
  265. mindspore/ops_generate/common/template.py +88 -11
  266. mindspore/ops_generate/gen_ops.py +1 -1
  267. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  268. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  269. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  270. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  271. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  272. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  273. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  274. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  275. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  276. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  277. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  278. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  279. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  280. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  281. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  282. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  283. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  284. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  285. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  286. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  287. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  288. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  289. mindspore/parallel/_auto_parallel_context.py +9 -17
  290. mindspore/parallel/_cell_wrapper.py +106 -40
  291. mindspore/parallel/_parallel_serialization.py +4 -3
  292. mindspore/parallel/_ps_context.py +4 -6
  293. mindspore/parallel/_tensor.py +167 -12
  294. mindspore/parallel/_transformer/moe.py +1 -1
  295. mindspore/parallel/_transformer/transformer.py +17 -12
  296. mindspore/parallel/_utils.py +5 -11
  297. mindspore/parallel/auto_parallel.py +33 -12
  298. mindspore/parallel/checkpoint_convert.py +3 -3
  299. mindspore/parallel/checkpoint_transform.py +5 -1
  300. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  301. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  302. mindspore/parallel/cluster/run.py +48 -7
  303. mindspore/parallel/function/__init__.py +8 -1
  304. mindspore/parallel/function/reshard_func.py +7 -6
  305. mindspore/parallel/nn/__init__.py +15 -2
  306. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  307. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  308. mindspore/parallel/shard.py +9 -23
  309. mindspore/parallel/transform_safetensors.py +468 -174
  310. mindspore/pgodb140.dll +0 -0
  311. mindspore/pgort140.dll +0 -0
  312. mindspore/profiler/__init__.py +2 -1
  313. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  314. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  315. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  316. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  317. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  318. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  319. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  321. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  322. mindspore/profiler/analysis/task_manager.py +1 -1
  323. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  324. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  325. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  326. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  327. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  328. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  329. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  330. mindspore/profiler/common/constant.py +16 -0
  331. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  332. mindspore/profiler/common/path_manager.py +9 -0
  333. mindspore/profiler/common/profiler_context.py +50 -29
  334. mindspore/profiler/common/profiler_info.py +0 -16
  335. mindspore/profiler/common/profiler_meta_data.py +1 -0
  336. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  337. mindspore/profiler/common/profiler_output_path.py +23 -8
  338. mindspore/profiler/common/profiler_parameters.py +128 -35
  339. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  340. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  341. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  342. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  343. mindspore/profiler/dynamic_profiler.py +374 -338
  344. mindspore/profiler/envprofiler.py +42 -12
  345. mindspore/profiler/experimental_config.py +112 -7
  346. mindspore/profiler/mstx.py +33 -12
  347. mindspore/profiler/platform/__init__.py +2 -3
  348. mindspore/profiler/platform/cpu_profiler.py +10 -4
  349. mindspore/profiler/platform/npu_profiler.py +30 -20
  350. mindspore/profiler/profiler.py +218 -154
  351. mindspore/profiler/profiler_action_controller.py +65 -77
  352. mindspore/profiler/profiler_interface.py +2 -2
  353. mindspore/profiler/schedule.py +10 -4
  354. mindspore/rewrite/common/config.py +1 -0
  355. mindspore/rewrite/common/namer.py +1 -0
  356. mindspore/rewrite/common/namespace.py +1 -0
  357. mindspore/rewrite/node/node.py +31 -11
  358. mindspore/rewrite/parsers/assign_parser.py +1 -1
  359. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  360. mindspore/run_check/_check_version.py +7 -10
  361. mindspore/runtime/__init__.py +8 -6
  362. mindspore/runtime/event.py +10 -4
  363. mindspore/runtime/executor.py +87 -45
  364. mindspore/runtime/memory.py +22 -30
  365. mindspore/runtime/thread_bind_core.py +299 -165
  366. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  367. mindspore/swresample-4.dll +0 -0
  368. mindspore/swscale-6.dll +0 -0
  369. mindspore/tbbmalloc.dll +0 -0
  370. mindspore/tinyxml2.dll +0 -0
  371. mindspore/train/_utils.py +9 -5
  372. mindspore/train/amp.py +43 -23
  373. mindspore/train/callback/__init__.py +5 -5
  374. mindspore/train/callback/_callback.py +2 -1
  375. mindspore/train/callback/_checkpoint.py +4 -14
  376. mindspore/train/callback/_flops_collector.py +11 -7
  377. mindspore/train/callback/_landscape.py +0 -1
  378. mindspore/train/callback/_train_fault_tolerance.py +72 -18
  379. mindspore/train/data_sink.py +15 -6
  380. mindspore/train/dataset_helper.py +14 -5
  381. mindspore/train/model.py +49 -47
  382. mindspore/train/serialization.py +168 -126
  383. mindspore/train/summary/summary_record.py +13 -2
  384. mindspore/train/train_thor/model_thor.py +2 -2
  385. mindspore/turbojpeg.dll +0 -0
  386. mindspore/utils/__init__.py +3 -2
  387. mindspore/utils/dryrun.py +0 -6
  388. mindspore/utils/runtime_execution_order_check.py +162 -78
  389. mindspore/utils/sdc_detect.py +68 -0
  390. mindspore/utils/utils.py +14 -17
  391. mindspore/vcmeta.dll +0 -0
  392. mindspore/vcruntime140.dll +0 -0
  393. mindspore/vcruntime140_1.dll +0 -0
  394. mindspore/version.py +1 -1
  395. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  396. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/RECORD +400 -439
  397. mindspore/_deprecated/jit.py +0 -198
  398. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  399. mindspore/communication/_hccl_management.py +0 -297
  400. mindspore/experimental/es/embedding_service.py +0 -891
  401. mindspore/experimental/es/embedding_service_layer.py +0 -581
  402. mindspore/profiler/common/validator/__init__.py +0 -14
  403. mindspore/profiler/common/validator/validate_path.py +0 -84
  404. mindspore/profiler/parser/__init__.py +0 -14
  405. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  406. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  407. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  408. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  409. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  410. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  411. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  412. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  413. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  414. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  415. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  416. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  417. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  418. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  419. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  420. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  421. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  422. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  423. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  424. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  425. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  426. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  427. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  428. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  429. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  430. mindspore/profiler/parser/container.py +0 -229
  431. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  432. mindspore/profiler/parser/flops_parser.py +0 -531
  433. mindspore/profiler/parser/framework_enum.py +0 -111
  434. mindspore/profiler/parser/framework_parser.py +0 -464
  435. mindspore/profiler/parser/framework_struct.py +0 -61
  436. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  437. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  438. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  439. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  440. mindspore/profiler/parser/hccl_parser.py +0 -573
  441. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  442. mindspore/profiler/parser/integrator.py +0 -526
  443. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  444. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  445. mindspore/profiler/parser/minddata_parser.py +0 -186
  446. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  447. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  448. mindspore/profiler/parser/optime_parser.py +0 -250
  449. mindspore/profiler/parser/profiler_info.py +0 -213
  450. mindspore/profiler/parser/step_trace_parser.py +0 -666
  451. mindspore/utils/hooks.py +0 -81
  452. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  453. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  454. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  455. {mindspore-2.6.0.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -250,11 +250,11 @@ def add_ext(input, other, alpha=1):
250
250
  input (Union[Tensor, number.Number, bool]): The first input is a number.Number or
251
251
  a bool or a tensor whose data type is
252
252
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
253
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
253
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
254
254
  other (Union[Tensor, number.Number, bool]): The second input, is a number.Number or
255
255
  a bool or a tensor whose data type is
256
256
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
257
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
257
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
258
258
  alpha (number.Number): A scaling factor applied to `other`, default 1.
259
259
 
260
260
  Returns:
@@ -312,7 +312,7 @@ def add(input, other):
312
312
 
313
313
  Note:
314
314
  - The two inputs can not be bool type at the same time,
315
- [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
315
+ [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
316
316
  - Support broadcast, support implicit type conversion and type promotion.
317
317
  - When the input is a tensor, the dimension should be greater than or equal to 1.
318
318
 
@@ -442,8 +442,7 @@ def apply_rotary_pos_emb_(query, key, cos, sin, position_ids, cos_format=0):
442
442
  r"""
443
443
 
444
444
  """
445
- apply_rotary_pos_emb_op = _get_cache_prim(ApplyRotaryPosEmb)(cos_format)
446
- return apply_rotary_pos_emb_op(query, key, cos, sin, position_ids)
445
+ return apply_rotary_pos_emb_impl(query, key, cos, sin, position_ids, cos_format)
447
446
 
448
447
 
449
448
  def argmax_ext(input, dim=None, keepdim=False):
@@ -1183,7 +1182,7 @@ def broadcast_to(input, shape):
1183
1182
 
1184
1183
  Args:
1185
1184
  input (Tensor): The input tensor.
1186
- shape (tuple): The target shape.
1185
+ shape (tuple[int]): The target shape.
1187
1186
 
1188
1187
  Returns:
1189
1188
  Tensor
@@ -1827,7 +1826,7 @@ def correlate(a, v, pad_mode='valid'):
1827
1826
 
1828
1827
  Note:
1829
1828
  - `correlate` is currently only used in `mindscience` scientific computing scenarios and
1830
- dose not support other usage scenarios.
1829
+ does not support other usage scenarios.
1831
1830
  - `correlate` is not supported on Windows platform yet.
1832
1831
 
1833
1832
  Args:
@@ -1986,6 +1985,112 @@ def count_nonzero(input, dim=None):
1986
1985
  return count_nonzero_op(input, dim)
1987
1986
 
1988
1987
 
1988
+ def cross_entropy_loss_grad(grad_loss, log_prob, target, weight=None, grad_zloss=None, lse_for_zloss=None, reduction='mean', ignore_index=-100, label_smoothing=0.0, lse_square_scale_for_zloss=0.0):
1989
+ r"""
1990
+
1991
+ """
1992
+ return cross_entropy_loss_grad_op(grad_loss, log_prob, target, weight, grad_zloss, lse_for_zloss, reduction, ignore_index, label_smoothing, lse_square_scale_for_zloss)
1993
+
1994
+
1995
+ def cross_entropy_loss(input, target, weight=None, reduction='mean', ignore_index=-100, label_smoothing=0.0, lse_square_scale_for_zloss=0.0, return_zloss=False):
1996
+ r"""
1997
+ Computes the cross entropy loss between input and target.
1998
+
1999
+ Assume the number of classes :math:`C` in the range :math:`[0, C)`,
2000
+ the loss with reduction=none can be described as:
2001
+
2002
+ .. math::
2003
+
2004
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
2005
+ l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
2006
+ \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
2007
+
2008
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, :math:`N` is the batch size,
2009
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
2010
+
2011
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
2012
+
2013
+ .. math::
2014
+
2015
+ \ell(x, y) = \begin{cases}
2016
+ \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
2017
+ \text{if reduction} = \text{'mean',}\\
2018
+ \sum_{n=1}^N l_n, &
2019
+ \text{if reduction} = \text{'sum'.}
2020
+ \end{cases}
2021
+
2022
+ .. warning::
2023
+ This is an experimental API that is subject to change or deletion.
2024
+
2025
+ Inputs:
2026
+ - **input** (Tensor) - Tensor of shape of :math:`(N, C)` where `C = number of classes`, data type must be bfloat16, float16 or float32.
2027
+ - **target** (Tensor) - For class indices, tensor of shape :math:`(N)`, data type must be int64. The value must be in range [0, C).
2028
+ - **weight** (Tensor, optional) - A rescaling weight applied to the loss of each batch element.
2029
+ If not None, the shape is :math:`(C,)`, data type must be float32. Default: ``None`` .
2030
+ - **reduction** (str, optional) - Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2031
+ ``'sum'`` . Default: ``'mean'`` .
2032
+
2033
+ - ``'none'``: no reduction will be applied.
2034
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
2035
+ - ``'sum'``: the output elements will be summed.
2036
+
2037
+ - **ignore_index** (int, optional) - Specifies a target value that is ignored and does not contribute to the input
2038
+ gradient. When set to negative values, no target value is ignored. It should be int64.
2039
+ Default: ``-100`` .
2040
+ - **label_smoothing** (float, optional) - Label smoothing values, a regularization tool used to prevent the model
2041
+ from overfitting when calculating Loss. This value must be 0.0 currently. Default: ``0.0`` .
2042
+ - **lse_square_scale_for_zloss** (float, optional) - The value range is [0.0, 1.0), not enabled for now, can only be 0.0. Default: ``0.0`` .
2043
+ - **return_zloss** (float, optional) - Not enabled for now, can only be ``False``. Default: ``False`` .
2044
+
2045
+ Outputs:
2046
+ A tuple consisting of 4 Tensors.
2047
+
2048
+ - **loss** (Tensor) - loss between `input` and `target`, the dtype is the same as `input`.
2049
+
2050
+ - If `reduction` is ``'none'`` , the shape is :math:`(N,)` .
2051
+ - If `reduction` is ``'sum'` or ``'mean'`, the shape is :math:`(1,)` .
2052
+
2053
+ - **log_prob** (Tensor) - the shape is :math:`(N, C)` with the same dtype as `input`.
2054
+ - **zloss** (Tensor) - the shape is :math:`(N,)` if `return_zloss` is True, or the shape is :math:`(0,)` with the same dtype as `input`. This parameter is disabled for now.
2055
+ - **lse_for_zloss** (Tensor) - the shape is :math:`(N,)` if `lse_square_scale_for_zloss` is not 0.0, or the shape is :math:`(0,)` with the same dtype as `input`. This parameter is disabled for now.
2056
+
2057
+
2058
+ Raises:
2059
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
2060
+ TypeError: If `input`, `target` or `weight` is not a Tensor.
2061
+
2062
+ Supported Platforms:
2063
+ ``Ascend``
2064
+
2065
+ Examples:
2066
+ >>> import mindspore
2067
+ >>> import numpy as np
2068
+ >>> from mindspore import Tensor, nn, ops
2069
+ >>>
2070
+ >>>
2071
+ >>> class Net(nn.Cell):
2072
+ ... def __init__(self):
2073
+ ... super(Net, self).__init__()
2074
+ ... self.cross_entropy_loss = ops.auto_generate.CrossEntropyLoss()
2075
+ ...
2076
+ ... def construct(self, input, target, weight):
2077
+ ... result = self.cross_entropy_loss(input, target, weight)
2078
+ ... return result
2079
+ ...
2080
+ >>>
2081
+ >>> net = Net()
2082
+ >>> input = Tensor(np.array([[0.2, 0.7, 0.1], [0.2, 0.7, 0.1]]), mindspore.float32)
2083
+ >>> target = Tensor(np.array([0, 1]), mindspore.int64)
2084
+ >>> weight = Tensor(np.array([1, 0.5, 0.5]), mindspore.float32)
2085
+ >>> output = net(input, target, weight)
2086
+ >>> print(output[:2])
2087
+ (Tensor(shape=[1], dtype=Float32, value= [ 1.10128295e+00]), Tensor(shape=[2, 3], dtype=Float32, value=
2088
+ [[-1.26794958e+00, -7.67949641e-01, -1.36794960e+00],
2089
+ [-1.26794958e+00, -7.67949641e-01, -1.36794960e+00]]))
2090
+ """
2091
+ return cross_entropy_loss_op(input, target, weight, reduction, ignore_index, label_smoothing, lse_square_scale_for_zloss, return_zloss)
2092
+
2093
+
1989
2094
  def cummax(input, axis):
1990
2095
  r"""
1991
2096
  Return the cumulative maximum values and their indices along the given axis of the tensor.
@@ -2037,7 +2142,7 @@ def cummin_ext(input, dim):
2037
2142
  \end{array}
2038
2143
 
2039
2144
  .. note::
2040
- O2 mode is not supported in Ascend.
2145
+ GE backend is not supported in Ascend.
2041
2146
 
2042
2147
  Args:
2043
2148
  input (Tensor): The input Tensor, The dimension must be greater than 0.
@@ -2117,61 +2222,6 @@ def cumsum_ext(input, dim, dtype=None):
2117
2222
  return cumsum_ext_op(input, dim, dtype)
2118
2223
 
2119
2224
 
2120
- def decoder_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len):
2121
- r"""
2122
- The DecoderKVCache is used for decoding the KVCache of transformer network.
2123
-
2124
- Args:
2125
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
2126
- When format is BHSD, cache tensor of shape
2127
- :math:`(batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
2128
- When format is BSD, cache tensor of shape
2129
- :math:`(batch\_size, max\_seq\_length, hidden\_size)`.
2130
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
2131
- When format is BHSD, update tensor of shape
2132
- :math:`(batch\_size, num\_head, update\_seq\_length, size\_pre\_head)`.
2133
- When format is BSD, update tensor of shape
2134
- :math:`(batch\_size, update\_seq\_length, hidden\_size)`.
2135
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
2136
- Valid_seq_len tensor of shape :math:`(batch\_size)`.
2137
- batch_index (Tensor): The batch_index tensor with data type of int64.
2138
- Batch_index tensor of shape :math:`(batch\_size)`. Indicate that which batch of cache tensor is going to be update. Not abel for now.
2139
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
2140
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2141
- New_max_seq_len tensor of shape :math:`(1)`.
2142
- Indicate that user want to change the shape of cache tensor from
2143
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`. to
2144
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`.
2145
- to update the cache tensor. This will not real change the shape of `cache` tensor.
2146
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2147
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
2148
-
2149
- Outputs:
2150
- With same data type and same shape as `cache` tensor.
2151
-
2152
- Supported Platforms:
2153
- ``Ascend``
2154
-
2155
- Examples:
2156
- >>> from mindspore.ops.operations import _inner_ops
2157
- >>> b = 4
2158
- >>> h = 40
2159
- >>> max_s = 1024
2160
- >>> s = 1
2161
- >>> d = 128
2162
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
2163
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
2164
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=b).astype(np.int64))
2165
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=b, replace=False).astype(np.int64))
2166
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2167
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2168
- >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
2169
- >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
2170
- >>> print(cache)
2171
- """
2172
- return decoder_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
2173
-
2174
-
2175
2225
  def dense(input, weight, bias=None):
2176
2226
  r"""
2177
2227
  Applies the dense connected operation to the `input`. The dense function is defined as:
@@ -3053,7 +3103,7 @@ def fft2(input, s=None, dim=(-2, -1), norm=None):
3053
3103
 
3054
3104
  Note:
3055
3105
  - `fft2` is currently only used in `mindscience` scientific computing scenarios and
3056
- dose not support other usage scenarios.
3106
+ does not support other usage scenarios.
3057
3107
  - `fft2` is not supported on Windows platform yet.
3058
3108
 
3059
3109
  Args:
@@ -3117,7 +3167,7 @@ def fftfreq(n, d=1.0, dtype=None):
3117
3167
 
3118
3168
  Note:
3119
3169
  - `fftfreq` is currently only used in `mindscience` scientific computing scenarios and
3120
- dose not support other usage scenarios.
3170
+ does not support other usage scenarios.
3121
3171
  - `fftfreq` is not supported on Windows platform yet.
3122
3172
 
3123
3173
  Args:
@@ -3150,7 +3200,7 @@ def fftn(input, s=None, dim=None, norm=None):
3150
3200
 
3151
3201
  Note:
3152
3202
  - `fftn` is currently only used in `mindscience` scientific computing scenarios and
3153
- dose not support other usage scenarios.
3203
+ does not support other usage scenarios.
3154
3204
  - `fftn` is not supported on Windows platform yet.
3155
3205
 
3156
3206
  Args:
@@ -3210,7 +3260,7 @@ def fftshift(input, dim=None):
3210
3260
 
3211
3261
  Note:
3212
3262
  - `fftshift` is currently only used in `mindscience` scientific computing scenarios and
3213
- dose not support other usage scenarios.
3263
+ does not support other usage scenarios.
3214
3264
  - `fftshift` is not supported on Windows platform yet.
3215
3265
 
3216
3266
  Args:
@@ -3246,7 +3296,7 @@ def fft(input, n=None, dim=-1, norm=None):
3246
3296
 
3247
3297
  Note:
3248
3298
  - `fft` is currently only used in `mindscience` scientific computing scenarios and
3249
- dose not support other usage scenarios.
3299
+ does not support other usage scenarios.
3250
3300
  - `fft` is not supported on Windows platform yet.
3251
3301
 
3252
3302
  Args:
@@ -3505,11 +3555,11 @@ def frac_ext(input):
3505
3555
  return frac_op(input)
3506
3556
 
3507
3557
 
3508
- def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5):
3558
+ def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5, mapping_num=None, mapping_table=None, enable_expert_mapping=False):
3509
3559
  r"""
3510
3560
 
3511
3561
  """
3512
- return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale)
3562
+ return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale, mapping_num, mapping_table, enable_expert_mapping)
3513
3563
 
3514
3564
 
3515
3565
  def gather_d(x, dim, index):
@@ -3582,7 +3632,7 @@ def gather(input_params, input_indices, axis, batch_dims=0):
3582
3632
  - The value of input_indices must be in the range of `[0, input_param.shape[axis])`.
3583
3633
  On CPU and GPU, an error is raised if an out of bound indice is found. On Ascend, the results may be
3584
3634
  undefined.
3585
- - The data type of input_params cannot be `mindspore.bool_` .
3635
+ - The data type of input_params cannot be `mindspore.bool` .
3586
3636
  - The shape of returned tensor is :math:`input\_params.shape[:axis] + input\_indices.shape[batch\_dims:] + input\_params.shape[axis + 1:]` .
3587
3637
 
3588
3638
  Args:
@@ -3785,7 +3835,7 @@ def hfft2(input, s=None, dim=(-2, -1), norm=None):
3785
3835
 
3786
3836
  Note:
3787
3837
  - `hfft2` is currently only used in `mindscience` scientific computing scenarios and
3788
- dose not support other usage scenarios.
3838
+ does not support other usage scenarios.
3789
3839
  - `hfft2` is not supported on Windows platform yet.
3790
3840
 
3791
3841
  Args:
@@ -3846,7 +3896,7 @@ def hfftn(input, s=None, dim=None, norm=None):
3846
3896
 
3847
3897
  Note:
3848
3898
  - `hfftn` is currently only used in `mindscience` scientific computing scenarios and
3849
- dose not support other usage scenarios.
3899
+ does not support other usage scenarios.
3850
3900
  - `hfftn` is not supported on Windows platform yet.
3851
3901
 
3852
3902
  Args:
@@ -3907,7 +3957,7 @@ def hfft(input, n=None, dim=-1, norm=None):
3907
3957
 
3908
3958
  Note:
3909
3959
  - `hfft` is currently only used in `mindscience` scientific computing scenarios and
3910
- dose not support other usage scenarios.
3960
+ does not support other usage scenarios.
3911
3961
  - `hfft` is not supported on Windows platform yet.
3912
3962
 
3913
3963
  Args:
@@ -4168,7 +4218,7 @@ def ifft2(input, s=None, dim=(-2, -1), norm=None):
4168
4218
 
4169
4219
  Note:
4170
4220
  - `ifft2` is currently only used in `mindscience` scientific computing scenarios and
4171
- dose not support other usage scenarios.
4221
+ does not support other usage scenarios.
4172
4222
  - `ifft2` is not supported on Windows platform yet.
4173
4223
 
4174
4224
  Args:
@@ -4228,7 +4278,7 @@ def ifftn(input, s=None, dim=None, norm=None):
4228
4278
 
4229
4279
  Note:
4230
4280
  - `ifftn` is currently only used in `mindscience` scientific computing scenarios and
4231
- dose not support other usage scenarios.
4281
+ does not support other usage scenarios.
4232
4282
  - `ifftn` is not supported on Windows platform yet.
4233
4283
 
4234
4284
  Args:
@@ -4288,7 +4338,7 @@ def ifftshift(input, dim=None):
4288
4338
 
4289
4339
  Note:
4290
4340
  - `ifftshift` is currently only used in `mindscience` scientific computing scenarios and
4291
- dose not support other usage scenarios.
4341
+ does not support other usage scenarios.
4292
4342
  - `ifftshift` is not supported on Windows platform yet.
4293
4343
 
4294
4344
  Args:
@@ -4324,7 +4374,7 @@ def ifft(input, n=None, dim=-1, norm=None):
4324
4374
 
4325
4375
  Note:
4326
4376
  - `ifft` is currently only used in `mindscience` scientific computing scenarios and
4327
- dose not support other usage scenarios.
4377
+ does not support other usage scenarios.
4328
4378
  - `ifft` is not supported on Windows platform yet.
4329
4379
 
4330
4380
  Args:
@@ -4380,7 +4430,7 @@ def ihfft2(input, s=None, dim=(-2, -1), norm=None):
4380
4430
 
4381
4431
  Note:
4382
4432
  - `ihfft2` is currently only used in `mindscience` scientific computing scenarios and
4383
- dose not support other usage scenarios.
4433
+ does not support other usage scenarios.
4384
4434
  - `ihfft2` is not supported on Windows platform yet.
4385
4435
 
4386
4436
  Args:
@@ -4441,7 +4491,7 @@ def ihfftn(input, s=None, dim=None, norm=None):
4441
4491
 
4442
4492
  Note:
4443
4493
  - `ihfftn` is currently only used in `mindscience` scientific computing scenarios and
4444
- dose not support other usage scenarios.
4494
+ does not support other usage scenarios.
4445
4495
  - `ihfftn` is not supported on Windows platform yet.
4446
4496
 
4447
4497
  Args:
@@ -4502,7 +4552,7 @@ def ihfft(input, n=None, dim=-1, norm=None):
4502
4552
 
4503
4553
  Note:
4504
4554
  - `ihfft` is currently only used in `mindscience` scientific computing scenarios and
4505
- dose not support other usage scenarios.
4555
+ does not support other usage scenarios.
4506
4556
  - `ihfft` is not supported on Windows platform yet.
4507
4557
 
4508
4558
  Args:
@@ -4623,56 +4673,6 @@ def unfold_ext(input, kernel_size, dilation=1, padding=0, stride=1):
4623
4673
  return im2col_ext_op(input, kernel_size, dilation, padding, stride)
4624
4674
 
4625
4675
 
4626
- def index_add_ext(input, dim, index, source, alpha=1):
4627
- r"""
4628
- Accumulate the elements of `alpha` times `source` into the `input` by adding to the index in the order given in `index`. For example, if ``dim == 0`` , ``index[i] == j`` , and ``alpha = -1`` , then the `i` th row of `source` is subtracted from the `j` th row of `input` . The `dim` th dimension of `source` must have the same size as the length of `index` , and all other dimensions must match `input`, or an error will be raised. For a 3-D tensor, the output is defined as follows:
4629
-
4630
- .. math::
4631
- \begin{array}{ll}
4632
- input[index[i],\ :,\ :]\ +=\ alpha * source[i,\ :,\ :] \qquad \#if\ dim == 0 \\
4633
- input[:,\ \ index[i],\ :]\ +=\ alpha * source[:,\ \ i,\ :] \qquad \#if\ dim == 1 \\
4634
- input[:,\ :,\ \ index[i]]\ +=\ alpha * source[:,\ :,\ \ i] \qquad\#if\ dim == 2 \\
4635
- \end{array}
4636
-
4637
- .. warning::
4638
- This is an experimental API that is subject to change or deletion.
4639
-
4640
- Args:
4641
- input (Tensor): The input Tensor.
4642
- dim (int): The dimension along which to index.
4643
- index (Tensor): Add the value of "input Tensor" and `source` along the dimension of the `dim` according to the specified index value, with data type int32. The `index` must be 1D with the same size as the size of `source` in the `dim` dimension. The values of `index` should be in [0, b), where the b is the size of "input Tensor" in the `dim` dimension.
4644
- source (Tensor): The input tensor with the value to add. Must have same data type as "input Tensor". The shape must be the same as "input Tensor" except the `dim` th dimension.
4645
- alpha (number, optional): The scalar multiplier for source. Default: ``1``.
4646
-
4647
- Returns:
4648
- Tensor, has the same shape and dtype as `input`.
4649
-
4650
- Raises:
4651
- TypeError: If neither `index` nor `source` is a Tensor.
4652
- ValueError: If the value of `dim` is out of the dimension range of `source` shape.
4653
- ValueError: If `index` rank is not the same as `source` rank.
4654
- ValueError: If shape of `index` is not 1D or size of `index` is not equal to dimension of source[dim].
4655
- ValueError: If the shape of `source` is not the same as that of `input` except the `dim` axis.
4656
-
4657
- Supported Platforms:
4658
- ``Ascend``
4659
-
4660
- Examples:
4661
- >>> import numpy as np
4662
- >>> import mindspore
4663
- >>> from mindspore import Tensor, ops
4664
- >>> x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
4665
- >>> index = Tensor(np.array([0, 2]), mindspore.int32)
4666
- >>> y = Tensor(np.array([[0.5, 1.0], [1.0, 1.5], [2.0, 2.5]]), mindspore.float32)
4667
- >>> output = ops.auto_generate.index_add_ext(x, 1, index, y, alpha=1)
4668
- >>> print(output)
4669
- [[ 1.5 2. 4. ]
4670
- [ 5. 5. 7.5]
4671
- [ 9. 8. 11.5]]
4672
- """
4673
- return index_add_ext_op(input, dim, index, source, alpha)
4674
-
4675
-
4676
4676
  def index_fill_scalar(input, dim, index, value):
4677
4677
  r"""
4678
4678
 
@@ -4728,7 +4728,7 @@ def index(input, indices):
4728
4728
  [2 6 5]
4729
4729
  >>> input2 = Tensor(np.arange(4 * 3 * 3).reshape(4, 3, 3), mindspore.int32)
4730
4730
  >>> indices3 = Tensor(np.array([1, 0]), mindspore.int32)
4731
- >>> indices4 = Tensor(np.array([1, 1, 0]), mindspore.bool_)
4731
+ >>> indices4 = Tensor(np.array([1, 1, 0]), mindspore.bool)
4732
4732
  >>> output2 = ops.auto_generate.index(input2, [indices3, indices4])
4733
4733
  >>> print(output2)
4734
4734
  [[ 9 10 11]
@@ -4783,6 +4783,13 @@ def index_select_ext(input, dim, index):
4783
4783
  return index_select_op(input, dim, index)
4784
4784
 
4785
4785
 
4786
+ def inner_moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
4787
+ r"""
4788
+
4789
+ """
4790
+ return inner_moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
4791
+
4792
+
4786
4793
  def inplace_adds_ext(input, other, alpha=1):
4787
4794
  r"""
4788
4795
 
@@ -4797,6 +4804,20 @@ def inplace_add_ext(input, other, alpha=1):
4797
4804
  return inplace_add_ext_op(input, other, alpha)
4798
4805
 
4799
4806
 
4807
+ def inplace_bernoulli_scalar(input, p, seed, offset):
4808
+ r"""
4809
+
4810
+ """
4811
+ return inplace_bernoulli_scalar_op(input, p, seed, offset)
4812
+
4813
+
4814
+ def inplace_bernoulli_tensor(input, p, seed, offset):
4815
+ r"""
4816
+
4817
+ """
4818
+ return inplace_bernoulli_tensor_op(input, p, seed, offset)
4819
+
4820
+
4800
4821
  def inplace_clamp_scalar(input, min=None, max=None):
4801
4822
  r"""
4802
4823
 
@@ -4811,11 +4832,11 @@ def inplace_clamp_tensor(input, min=None, max=None):
4811
4832
  return inplace_clamp_tensor_op(input, min, max)
4812
4833
 
4813
4834
 
4814
- def inplace_copy(input, src):
4835
+ def inplace_copy(input, src, non_blocking=False):
4815
4836
  r"""
4816
4837
 
4817
4838
  """
4818
- return inplace_copy_op(input, src)
4839
+ return inplace_copy_op(input, src, non_blocking)
4819
4840
 
4820
4841
 
4821
4842
  def divmod_scalar_(input, other, rounding_mode=None):
@@ -5090,6 +5111,51 @@ def masked_fill_tensor_(input, mask, value):
5090
5111
  return inplace_masked_fill_tensor_op(input, mask, value)
5091
5112
 
5092
5113
 
5114
+ def matmul_add_(x, weight, C):
5115
+ r"""
5116
+ Fusion Operator of Transpose, Matmul, and InplaceAdd.
5117
+
5118
+ .. warning::
5119
+ - This is an experimental API that is subject to change or deletion.
5120
+ - This API is only supported in Atlas A2 training series for now.
5121
+ - This API is only supported on GRAPH mode.
5122
+
5123
+ Args:
5124
+ x (Tensor): Matrix A in matrix multiplication, with shape :math:`(k, m)` or :math:`(batch, k, m)`,
5125
+ whose type should be float16 or bfloat16.
5126
+ weight (Tensor): Matrix B in matrix multiplication, with shape :math:`(k, n)` or :math:`(batch, k, n)`,
5127
+ whose type should be float16 or bfloat16.
5128
+ C (Tensor): A Tensor acting as both input and output, with type of float32.
5129
+ It's shape should be :math:`(m, n)` or :math:`(batch, m, n)`.
5130
+
5131
+ Returns:
5132
+ Tensor, has the same shape and data type as `C`.
5133
+
5134
+ Raises:
5135
+ TypeError: If the dtype of `weight` is not the same as `x`.
5136
+ ValueError: If the ranks of `x` , `weight` and `C` are not the same.
5137
+
5138
+ Supported Platforms:
5139
+ ``Ascend``
5140
+
5141
+ Examples:
5142
+ >>> import mindspore
5143
+ >>> import numpy as np
5144
+ >>> from mindspore import Tensor, ops, nn, context
5145
+ >>> context.set_context(mode=context.GRAPH_MODE, jit_config={"jit_level": "O0"})
5146
+ >>> class Net(nn.Cell):
5147
+ ... def construct(self, x, weight, C):
5148
+ ... return ops.auto_generate.inplace_matmul_add_op(x, weight, C)
5149
+ >>> x = Tensor(np.random.randn(10, 20), mindspore.float16)
5150
+ >>> weight = Tensor(np.random.randn(10, 8), mindspore.float16)
5151
+ >>> C = Tensor(np.random.randn(20, 8), mindspore.float32)
5152
+ >>> output = Net()(x, weight, C)
5153
+ >>> print(output.shape)
5154
+ (20, 8)
5155
+ """
5156
+ return inplace_matmul_add_op(x, weight, C)
5157
+
5158
+
5093
5159
  def inplace_muls(input, other):
5094
5160
  r"""
5095
5161
 
@@ -5118,6 +5184,52 @@ def inplace_scatter_add(input, dim, index, src):
5118
5184
  return inplace_scatter_add_op(input, dim, index, src)
5119
5185
 
5120
5186
 
5187
+ def inplace_silu(input):
5188
+ r"""
5189
+ Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
5190
+
5191
+ .. math::
5192
+
5193
+ \text{SiLU}(x) = x * \sigma(x),
5194
+
5195
+ where :math:`x` is an element of the input, :math:`\sigma(x)` is Sigmoid function.
5196
+
5197
+ .. math::
5198
+
5199
+ \text{sigma}(x_i) = \frac{1}{1 + \exp(-x_i)},
5200
+
5201
+ SiLU Function Graph:
5202
+
5203
+ .. image:: ../images/SiLU.png
5204
+ :align: center
5205
+
5206
+ Args:
5207
+ input (Tensor): `input` is :math:`x` in the preceding formula. Input with the data type
5208
+ float16 or float32.
5209
+ inplace (bool, optional): If it is ``True``, enable the in place update function.
5210
+ Default value: ``False``.
5211
+
5212
+ Returns:
5213
+ Tensor, with the same type and shape as the `input`.
5214
+
5215
+ Raises:
5216
+ TypeError: If dtype of `input` is neither float16 nor float32.
5217
+
5218
+ Supported Platforms:
5219
+ ``Ascend`` ``GPU`` ``CPU``
5220
+
5221
+ Examples:
5222
+ >>> import mindspore
5223
+ >>> from mindspore import Tensor, mint
5224
+ >>> import numpy as np
5225
+ >>> input = Tensor(np.array([-1, 2, -3, 2, -1]), mindspore.float16)
5226
+ >>> output = mint.nn.functional.silu(input, inplace=True)
5227
+ >>> print(output)
5228
+ [-0.269 1.762 -0.1423 1.762 -0.269]
5229
+ """
5230
+ return inplace_silu_op(input)
5231
+
5232
+
5121
5233
  def inplace_stop_gradient(input):
5122
5234
  r"""
5123
5235
 
@@ -5159,9 +5271,6 @@ def inplace_threshold(input, threshold, value):
5159
5271
  \text{value}, &\text{ otherwise }
5160
5272
  \end{cases}
5161
5273
 
5162
- .. warning::
5163
- This is an experimental API that is subject to change or deletion.
5164
-
5165
5274
  Args:
5166
5275
  input (Tensor): The input Tensor.
5167
5276
  threshold (Union[int, float]): The value of the threshold.
@@ -5202,7 +5311,7 @@ def irfft2(input, s=None, dim=(-2, -1), norm=None):
5202
5311
 
5203
5312
  Note:
5204
5313
  - `irfft2` is currently only used in `mindscience` scientific computing scenarios and
5205
- dose not support other usage scenarios.
5314
+ does not support other usage scenarios.
5206
5315
  - `irfft2` is not supported on Windows platform yet.
5207
5316
 
5208
5317
  Args:
@@ -5260,7 +5369,7 @@ def irfftn(input, s=None, dim=None, norm=None):
5260
5369
 
5261
5370
  Note:
5262
5371
  - `irfftn` is currently only used in `mindscience` scientific computing scenarios and
5263
- dose not support other usage scenarios.
5372
+ does not support other usage scenarios.
5264
5373
  - `irfftn` is not supported on Windows platform yet.
5265
5374
 
5266
5375
  Args:
@@ -5319,7 +5428,7 @@ def irfft(input, n=None, dim=-1, norm=None):
5319
5428
 
5320
5429
  Note:
5321
5430
  - `irfft` is currently only used in `mindscience` scientific computing scenarios and
5322
- dose not support other usage scenarios.
5431
+ does not support other usage scenarios.
5323
5432
  - `irfft` is not supported on Windows platform yet.
5324
5433
 
5325
5434
  Args:
@@ -5502,6 +5611,13 @@ def kthvalue(input, k, dim=-1, keepdim=False):
5502
5611
  return kthvalue_op(input, k, dim, keepdim)
5503
5612
 
5504
5613
 
5614
+ def kv_scale_cache(key_scale, value_scale, key_value_scale_cache, batch_valid_length, cache_mode):
5615
+ r"""
5616
+
5617
+ """
5618
+ return kv_scale_cache_op(key_scale, value_scale, key_value_scale_cache, batch_valid_length, cache_mode)
5619
+
5620
+
5505
5621
  def l1_loss_ext(input, target, reduction='mean'):
5506
5622
  r"""
5507
5623
  Calculate the mean absolute error between the `input` value and the `target` value.
@@ -6157,7 +6273,7 @@ def masked_fill(input_x, mask, value):
6157
6273
  Examples:
6158
6274
  >>> import mindspore
6159
6275
  >>> input_x = mindspore.tensor([1., 2., 3., 4.], mindspore.float32)
6160
- >>> mask = mindspore.tensor([True, True, False, True], mindspore.bool_)
6276
+ >>> mask = mindspore.tensor([True, True, False, True], mindspore.bool)
6161
6277
  >>> output = mindspore.ops.masked_fill(input_x, mask, 0.5)
6162
6278
  >>> print(output)
6163
6279
  [0.5 0.5 3. 0.5]
@@ -6165,6 +6281,13 @@ def masked_fill(input_x, mask, value):
6165
6281
  return masked_fill_op(input_x, mask, value)
6166
6282
 
6167
6283
 
6284
+ def masked_scatter(input, mask, source):
6285
+ r"""
6286
+
6287
+ """
6288
+ return masked_scatter_op(input, mask, source)
6289
+
6290
+
6168
6291
  def masked_select(input, mask):
6169
6292
  r"""
6170
6293
  Return a new 1-D tensor which indexes the `input` tensor according to the boolean `mask`.
@@ -6184,7 +6307,7 @@ def masked_select(input, mask):
6184
6307
  Examples:
6185
6308
  >>> import mindspore
6186
6309
  >>> x = mindspore.tensor([1, 2, 3, 4], mindspore.int64)
6187
- >>> mask = mindspore.tensor([1, 0, 1, 0], mindspore.bool_)
6310
+ >>> mask = mindspore.tensor([1, 0, 1, 0], mindspore.bool)
6188
6311
  >>> output = mindspore.ops.masked_select(x, mask)
6189
6312
  >>> print(output)
6190
6313
  [1 3]
@@ -6561,6 +6684,20 @@ def mish_ext(input):
6561
6684
  return mish_ext_op(input)
6562
6685
 
6563
6686
 
6687
+ def mla(query, q_rope, kv_cache, k_rope, block_tables, attn_mask=None, deq_scale_qk=None, deq_scale_pv=None, q_seq_lens=None, context_lens=None, head_num=32, scale_value=0.0, kv_head_num=1, mask_mode='MASK_NONE', is_ring=0):
6688
+ r"""
6689
+
6690
+ """
6691
+ return mla_op(query, q_rope, kv_cache, k_rope, block_tables, attn_mask, deq_scale_qk, deq_scale_pv, q_seq_lens, context_lens, head_num, scale_value, kv_head_num, mask_mode, is_ring)
6692
+
6693
+
6694
+ def mla_preprocess(input1, gamma1, beta1, quant_scale1, quant_offset1, wdqkv, bias1, gamma2, beta2, quant_scale2, quant_offset2, gamma3, sin1, cos1, sin2, cos2, key_cache, slot_mapping, wuq, bias2, slot_wuk, de_scale1, de_scale2, ctkv_scale, qnope_scale, krope_cache, param_cache_mode=0):
6695
+ r"""
6696
+
6697
+ """
6698
+ return mla_preprocess_op(input1, gamma1, beta1, quant_scale1, quant_offset1, wdqkv, bias1, gamma2, beta2, quant_scale2, quant_offset2, gamma3, sin1, cos1, sin2, cos2, key_cache, slot_mapping, wuq, bias2, slot_wuk, de_scale1, de_scale2, ctkv_scale, qnope_scale, krope_cache, param_cache_mode)
6699
+
6700
+
6564
6701
  def mm_ext(input, mat2):
6565
6702
  r"""
6566
6703
  Returns the matrix product of two arrays.
@@ -6605,6 +6742,254 @@ def mm_ext(input, mat2):
6605
6742
  return mm_ext_op(input, mat2)
6606
6743
 
6607
6744
 
6745
+ def moe_distribute_combine(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts=None, x_active_mask=None, activate_scale=None, weight_scale=None, group_list=None, expand_scales=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_export_rank_num=0, global_bs=0, out_dtype=0, common_quant_mode=0, group_list_type=0):
6746
+ r"""
6747
+ Parallel communication for Mixture of Experts (MoE). When Tensor Parallelism (TP) communication exists,
6748
+ it first ReduceScatter performs communication followed by Expert Parallelism (EP) AllToAllV communication.
6749
+ Otherwise, only EP AllToAllV communication is performed. Finally multiply the received data by weight and
6750
+ add them up.
6751
+
6752
+ Notes:
6753
+ This function must be used in conjunction with function `moe_distribute_dispatch`.
6754
+ - A: Maximum tokens to dispatch per rank:
6755
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6756
+ - For MoE experts:
6757
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6758
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6759
+ - H (hidden size): Dimension of each token's hidden state
6760
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6761
+ - Ascend 910_93: H = 7168
6762
+ - BS (batch sequence size): Number of tokens processed per rank
6763
+ - Ascend 910B: 0 < BS <= 256
6764
+ - Ascend 910_93: 0 < BS <= 512
6765
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6766
+ - server_num: Number of server nodes (supports 2, 4, 8)
6767
+ - local_expert_num: Number of experts per rank:
6768
+ - Shared expert ranks: local_expert_num = 1
6769
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6770
+ (TP communication not supported when localExpertNum > 1)
6771
+
6772
+ Inputs:
6773
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6774
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6775
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6776
+ Format: ND, non-contiguous allowed.
6777
+ - **expert_idx** (Tensor) - Token counts per expert, it's the output of dispatch operation.
6778
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6779
+ - **ep_send_counts** (Tensor) - Tokens that each EP rank needs to send, it's the output of dispatch operation.
6780
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6781
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6782
+ Format: ND, non-contiguous allowed.
6783
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6784
+ - **ep_world_size** (int) - EP domain size.
6785
+ - Ascend 910B: Supports 16, 32, 64.
6786
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6787
+ - **ep_rank_id** (int) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6788
+ - **moe_expert_num** (int) - Number of MoE experts (0 < moe_expert_num <= 256),
6789
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6790
+ - **tp_send_counts** (Tensor) - Tokens that each TP rank needs to send (when TP exists). It's the output of dispatch operation. Default: ``None``.
6791
+ - Ascend 910B: Not supported.
6792
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6793
+ - **x_active_mask** (Tensor) - Reserved parameter. Default: ``None``.
6794
+ - **activate_scale** (Tensor) - Reserved parameter. Default: ``None``.
6795
+ - **weight_scale** (Tensor) - Reserved parameter. Default: ``None``.
6796
+ - **group_list** (Tensor) - Reserved parameter. Default: ``None``.
6797
+ - **expand_scales** (Tensor) - Output of dispatch operation. Default: ``None``.
6798
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6799
+ - Ascend 910_93: Unsupported.
6800
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp. Default: ``None``.
6801
+ - **group_tp** (str) - TP communication domain name. Default: ``None``.
6802
+ - Ascend 910B: Unsupported (pass empty string).
6803
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6804
+ - **tp_world_size** (int) - TP domain size. Default: ``0``.
6805
+ - Ascend 910B: Unsupported (pass 0).
6806
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6807
+ - **tp_rank_id** (int) - Local rank ID in TP domain. Default: ``0``.
6808
+ - Ascend 910B: Unsupported (pass 0).
6809
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6810
+ - **expert_shard_type** (int) - Shared expert distribution type. Default: ``0``.
6811
+ - Ascend 910B: Unsupported (pass 0).
6812
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6813
+ - **shared_expert_num** (int) - Number of shared experts. Default: ``0``.
6814
+ - Ascend 910B: Unsupported (pass 0).
6815
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6816
+ - **shared_expert_rank_num** (int) - Number of ranks hosting shared experts. Default: ``0``.
6817
+ - Ascend 910B: Unsupported (pass 0).
6818
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6819
+ - **global_bs** (int) - Global batch size across EP domain. Default: ``0``.
6820
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6821
+ - Ascend 910_93: 0 or BS*ep_world_size.
6822
+ - **out_dtype** (int) - Specify the type of output x. Reserved parameter (pass 0 in current version). Default: ``0``.
6823
+ - **common_quant_mode** (int) - Communication quantification type. Reserved parameter (pass 0 in current version). Default: ``0``.
6824
+ - **group_list_type** (int) - The format of group_list. Reserved parameter (pass 0 in current version). Default: ``0``.
6825
+
6826
+ Outputs:
6827
+ - **x** (Tensor) - Processed tokens. 2D tensor [BS, H] with dtype matching input `expand_x`.
6828
+
6829
+ Raises:
6830
+ TypeError: If input dtypes don't match specifications.
6831
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6832
+ RuntimeError: If communication domain configuration is invalid.
6833
+
6834
+ Supported Platforms:
6835
+ ``Ascend``
6836
+
6837
+ Examples:
6838
+ >>> # EP-only communication example (Ascend 910B)
6839
+ >>> import mindspore as ms
6840
+ >>> from mindspore import Tensor
6841
+ >>> from mindspore import ops
6842
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6843
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch, moe_distribute_combine
6844
+ >>> import numpy as np
6845
+ >>> bs = 8
6846
+ >>> h = 7168
6847
+ >>> k = 8
6848
+ >>> ep_world_size = 16
6849
+ >>> moe_expert_num = 16
6850
+ >>> global_bs = bs * ep_world_size
6851
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6852
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6853
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6854
+ >>> init()
6855
+ >>> rank_id = get_rank()
6856
+ >>> expand_x, _, expand_idx, _, ep_recv_count, _, expand_scale = moe_distribute_dispatch(
6857
+ ... x, expert_ids, expert_scales, ep_world_size, rank_id, moe_expert_num,
6858
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6859
+ >>> out_x = moe_distribute_combine(
6860
+ ... expand_x, expert_ids, expand_idx, ep_recv_count, expert_scales, ep_world_size, rank_id,
6861
+ ... moe_expert_num, group_ep=GlobalComm.WORLD_COMM_GROUP)
6862
+ >>> print(out_x.shape)
6863
+ (8, 7168)
6864
+ """
6865
+ return moe_distribute_combine_op(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts, x_active_mask, activate_scale, weight_scale, group_list, expand_scales, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_export_rank_num, global_bs, out_dtype, common_quant_mode, group_list_type)
6866
+
6867
+
6868
+ def moe_distribute_dispatch(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales=None, scales=None, x_active_mask=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_expert_rank_num=0, quant_mode=0, global_bs=0, expert_token_nums_type=0):
6869
+ r"""
6870
+ Performs token data quantization (optional) and parallel communication for Mixture of Experts (MoE).
6871
+ When Tensor Parallelism (TP) communication exists, it first performs Expert Parallelism (EP) AllToAllV
6872
+ communication followed by TP AllGatherV communication. Otherwise, only EP AllToAllV communication is performed.
6873
+
6874
+ Notes:
6875
+ - A: Maximum tokens to dispatch per rank:
6876
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6877
+ - For MoE experts:
6878
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6879
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6880
+ - H (hidden size): Dimension of each token's hidden state
6881
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6882
+ - Ascend 910_93: H = 7168
6883
+ - BS (batch sequence size): Number of tokens processed per rank
6884
+ - Ascend 910B: 0 < BS <= 256
6885
+ - Ascend 910_93: 0 < BS <= 512
6886
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6887
+ - server_num: Number of server nodes (supports 2, 4, 8)
6888
+ - local_expert_num: Number of experts per rank:
6889
+ - Shared expert ranks: local_expert_num = 1
6890
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6891
+ (TP communication not supported when localExpertNum > 1)
6892
+
6893
+ Inputs:
6894
+ - **x** (Tensor) - Input token data to be sent. 2D tensor with shape [BS, H].
6895
+ Supported dtypes: float16, bfloat16. Format: ND, non-contiguous allowed.
6896
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6897
+ Format: ND, non-contiguous allowed.
6898
+ - **ep_world_size** (int64) - EP domain size.
6899
+ - Ascend 910B: Supports 16, 32, 64.
6900
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6901
+ - **ep_rank_id** (int64) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6902
+ - **moe_expert_num** (int64) - Number of MoE experts (0 < moe_expert_num <= 256),
6903
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6904
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6905
+ - Ascend 910B: 2D float32 tensor [BS, K], ND format, non-contiguous allowed.
6906
+ - Ascend 910_93: Unsupported (pass nullptr).
6907
+ - **scales** (Tensor) - Expert weights. 2D float32 tensor with shape [shared_expert_num + moe_expert_num, H].
6908
+ Pass nullptr for non-quantized scenarios. Format: ND, non-contiguous allowed.
6909
+ Note: On Ascend 910B, must be nullptr when HCCL_INTRA_PCIE_ENABLE=1 and HCCL_INTRA_ROCE_ENABLE=0.
6910
+ - **x_active_mask** (Tensor) - Reserved parameter (pass nullptr in current version).
6911
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp.
6912
+ - **group_tp** (str) - TP communication domain name.
6913
+ - Ascend 910B: Unsupported (pass empty string).
6914
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6915
+ - **tp_world_size** (int64) - TP domain size.
6916
+ - Ascend 910B: Unsupported (pass 0).
6917
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6918
+ - **tp_rank_id** (int64) - Local rank ID in TP domain.
6919
+ - Ascend 910B: Unsupported (pass 0).
6920
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6921
+ - **expert_shard_type** (int64) - Shared expert distribution type.
6922
+ - Ascend 910B: Unsupported (pass 0).
6923
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6924
+ - **shared_expert_num** (int64) - Number of shared experts.
6925
+ - Ascend 910B: Unsupported (pass 0).
6926
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6927
+ - **shared_expert_rank_num** (int64) - Number of ranks hosting shared experts.
6928
+ - Ascend 910B: Unsupported (pass 0).
6929
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6930
+ - **quant_mode** (int64) - Quantization mode: 0 (none), 2 (dynamic quantization).
6931
+ - **global_bs** (int64) - Global batch size across EP domain.
6932
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6933
+ - Ascend 910_93: 0 or BS*ep_world_size.
6934
+ - **expert_token_nums_type** (int64) - Semantic meaning of expert_token_nums output:
6935
+ 0 (prefix sums), 1 (raw counts).
6936
+
6937
+ Outputs:
6938
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6939
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6940
+ - **dynamic_scales** (Tensor) - Dynamic quantization scales (when quant_mode=2).
6941
+ 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6942
+ - **expand_idx** (Tensor) - Token counts per expert for combine operation.
6943
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6944
+ - **expert_token_nums** (Tensor) - Tokens received per expert.
6945
+ 1D int64 tensor [local_expert_num]. Format: ND, non-contiguous allowed.
6946
+ - **ep_recv_counts** (Tensor) - Tokens received from each EP rank.
6947
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6948
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6949
+ Format: ND, non-contiguous allowed.
6950
+ - **tp_recv_counts** (Tensor) - Tokens received from each TP rank (when TP exists).
6951
+ - Ascend 910B: Not supported.
6952
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6953
+ - **expand_scales** (Tensor) - Output token weights for combine operation.
6954
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6955
+ - Ascend 910_93: Unsupported.
6956
+
6957
+ Raises:
6958
+ TypeError: If input dtypes don't match specifications.
6959
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6960
+ RuntimeError: If communication domain configuration is invalid.
6961
+
6962
+ Supported Platforms:
6963
+ ``Ascend``
6964
+
6965
+ Examples:
6966
+ >>> # EP-only communication example (Ascend 910B)
6967
+ >>> import mindspore as ms
6968
+ >>> from mindspore import Tensor
6969
+ >>> from mindspore import ops
6970
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6971
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch
6972
+ >>> import numpy as np
6973
+ >>> bs = 8
6974
+ >>> h = 7168
6975
+ >>> k = 8
6976
+ >>> ep_world_size = 16
6977
+ >>> moe_expert_num = 16
6978
+ >>> global_bs = bs * ep_world_size
6979
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6980
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6981
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6982
+ >>> init()
6983
+ >>> rank_id = get_rank()
6984
+ >>> out = moe_distribute_dispatch(
6985
+ ... x, expert_ids, ep_world_size, rank_id, moe_expert_num, expert_scales=expert_scales,
6986
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6987
+ >>> print(out[0].shape) # expand_x
6988
+ (128, 7168)
6989
+ """
6990
+ return moe_distribute_dispatch_op(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales, scales, x_active_mask, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_expert_rank_num, quant_mode, global_bs, expert_token_nums_type)
6991
+
6992
+
6608
6993
  def moe_token_permute_grad(permuted_tokens_grad, sorted_indices, num_topk=1, padded_mode=False):
6609
6994
  r"""
6610
6995
 
@@ -6618,11 +7003,10 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6618
7003
 
6619
7004
  .. warning::
6620
7005
  - It is only supported on Atlas A2 Training Series Products.
6621
- - The input `tokens` only supports the bfloat16 data type in the current version.
6622
7006
  - When `indices` is 2-D, the size of the second dim must be less than or equal to 512.
6623
7007
 
6624
7008
  Args:
6625
- tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16.
7009
+ tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16, float16 or float32.
6626
7010
  The shape is :math:`(num\_tokens, hidden\_size)` , where `num_tokens` and `hidden_size` are positive integers.
6627
7011
  indices (Tensor): The tensor specifies indices used to permute the tokens. The dtype is int32 or int64.
6628
7012
  The shape is :math:`(num\_tokens, topk)` or :math:`(num\_tokens,)`, where `num_tokens` and `topk` are positive integers.
@@ -6638,7 +7022,6 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6638
7022
 
6639
7023
  Raises:
6640
7024
  TypeError: If `tokens` or `indices` is not a Tensor.
6641
- TypeError: If dtype of `tokens` is not bfloat16.
6642
7025
  TypeError: If dtype of `indices` is not int32 or int64.
6643
7026
  TypeError: If specified `num_out_tokens` is not an integer.
6644
7027
  TypeError: If specified `padded_mode` is not a bool.
@@ -6680,60 +7063,6 @@ def moe_token_unpermute_grad(permuted_tokens, unpermuted_tokens_grad, sorted_ind
6680
7063
  return moe_token_unpermute_grad_op(permuted_tokens, unpermuted_tokens_grad, sorted_indices, probs, padded_mode, restore_shape)
6681
7064
 
6682
7065
 
6683
- def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
6684
- r"""
6685
- Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
6686
-
6687
- .. warning::
6688
- - It is only supported on Atlas A2 Training Series Products.
6689
- - The inputs `permuted_tokens` and `probs` only support the bfloat16 data type in the current version.
6690
- - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
6691
-
6692
- Args:
6693
- permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
6694
- The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
6695
- sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
6696
- The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
6697
- It only supports the int32 data type.
6698
- probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
6699
- If provided, the unpermuted tokens will be merged with their respective probabilities.
6700
- The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
6701
- padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
6702
- restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
6703
-
6704
- Returns:
6705
- Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
6706
- If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
6707
-
6708
- Raises:
6709
- TypeError: If `permuted_tokens` is not a Tensor.
6710
- ValueError: Only supported when `padded_mode` is ``False``.
6711
-
6712
- Supported Platforms:
6713
- ``Ascend``
6714
-
6715
- Examples:
6716
- >>> import mindspore
6717
- >>> from mindspore import Tensor, ops
6718
- >>> permuted_token = Tensor([
6719
- ... [1, 1, 1],
6720
- ... [0, 0, 0],
6721
- ... [0, 0, 0],
6722
- ... [3, 3, 3],
6723
- ... [2, 2, 2],
6724
- ... [1, 1, 1],
6725
- ... [2, 2, 2],
6726
- ... [3, 3, 3]], dtype=mindspore.bfloat16)
6727
- >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
6728
- >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
6729
- >>> out.shape
6730
- (8, 3)
6731
-
6732
-
6733
- """
6734
- return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
6735
-
6736
-
6737
7066
  def mse_loss_ext(input, target, reduction='mean'):
6738
7067
  r"""
6739
7068
  Calculates the mean squared error between the predicted value and the label value.
@@ -6797,7 +7126,7 @@ def mul(input, other):
6797
7126
  - When the two inputs have different shapes,
6798
7127
  they must be able to broadcast to a common shape.
6799
7128
  - The two inputs can not be bool type at the same time,
6800
- [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
7129
+ [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
6801
7130
  - Support implicit type conversion and type promotion.
6802
7131
 
6803
7132
  Args:
@@ -7193,8 +7522,10 @@ def prelu(input, weight):
7193
7522
  :align: center
7194
7523
 
7195
7524
  .. note::
7196
- Channel dim is the 2nd dim of input. When input has dims < 2, then there is
7197
- no channel dim and the number of channels = 1.
7525
+ - Channel dim is the 2nd dim of input. When input has dims < 2, then there is
7526
+ no channel dim and the number of channels = 1.
7527
+ - In GE mode, the rank of the input tensor must be greater than 1;
7528
+ otherwise, an error will be triggered.
7198
7529
 
7199
7530
  Args:
7200
7531
  input (Tensor): The input Tensor of the activation function.
@@ -7307,65 +7638,6 @@ def prod_ext(input, dim=None, keepdim=False, dtype=None):
7307
7638
  return prod_ext_op(input, dim, keepdim, dtype)
7308
7639
 
7309
7640
 
7310
- def prompt_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, align_mode='LEFT'):
7311
- r"""
7312
- The PromptKVCache is used for prefill the KVCache of transformer network.
7313
-
7314
- Args:
7315
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
7316
- When format is BHSD, cache tensor of shape
7317
- :math:`(cache\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7318
- When format is BSD, cache tensor of shape
7319
- :math:`(cache\_batch\_size, max\_seq\_length, hidden\_size)`.
7320
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
7321
- When format is BHSD, cache tensor of shape
7322
- :math:`(update\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7323
- When format is BSD, cache tensor of shape
7324
- :math:`(update\_batch\_size, max\_seq\_length, hidden\_size)`.
7325
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
7326
- Valid_seq_len tensor of shape :math:`(update\_batch\_size)`.
7327
- batch_index (Tensor): The batch_index tensor with data type of int64.
7328
- Batch_index tensor of shape :math:`(update\_batch\_size)`. Indicate that which batch of cache tensor is going to be update.
7329
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
7330
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7331
- New_max_seq_len tensor of shape :math:`(1)`.
7332
- Indicate that user want to change the shape of cache tensor from
7333
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
7334
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
7335
- to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
7336
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7337
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
7338
- align_mode (str): indicate which axis is seq_len. Default: left.
7339
-
7340
-
7341
- Outputs:
7342
- With same data type and same shape as `cache` tensor.
7343
-
7344
- Supported Platforms:
7345
- ``Ascend``
7346
-
7347
- Examples:
7348
- >>> from mindspore import Tensor
7349
- >>> from mindspore.ops.operations import _inner_ops
7350
- >>> b = 4
7351
- >>> h = 40
7352
- >>> max_s = 1024
7353
- >>> s = 256
7354
- >>> d = 128
7355
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
7356
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
7357
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=ub).astype(np.int64))
7358
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=ub, replace=False).astype(np.int64))
7359
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7360
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7361
- >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
7362
- >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
7363
- >>> print(cache)
7364
- """
7365
- prompt_k_v_cache_op = _get_cache_prim(PromptKVCache)(align_mode)
7366
- return prompt_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
7367
-
7368
-
7369
7641
  def randperm(n, seed=0, offset=0, dtype=mstype.int64):
7370
7642
  r"""
7371
7643
  Generates random permutation of integers from 0 to n-1.
@@ -7701,7 +7973,7 @@ def rfft2(input, s=None, dim=(-2, -1), norm=None):
7701
7973
 
7702
7974
  Note:
7703
7975
  - `rfft2` is currently only used in `mindscience` scientific computing scenarios and
7704
- dose not support other usage scenarios.
7976
+ does not support other usage scenarios.
7705
7977
  - `rfft2` is not supported on Windows platform yet.
7706
7978
 
7707
7979
  Args:
@@ -7762,7 +8034,7 @@ def rfftfreq(n, d=1.0, dtype=None):
7762
8034
 
7763
8035
  Note:
7764
8036
  - `rfftfreq` is currently only used in `mindscience` scientific computing scenarios and
7765
- dose not support other usage scenarios.
8037
+ does not support other usage scenarios.
7766
8038
  - `rfftfreq` is not supported on Windows platform yet.
7767
8039
 
7768
8040
  Args:
@@ -7795,7 +8067,7 @@ def rfftn(input, s=None, dim=None, norm=None):
7795
8067
 
7796
8068
  Note:
7797
8069
  - `rfftn` is currently only used in `mindscience` scientific computing scenarios and
7798
- dose not support other usage scenarios.
8070
+ does not support other usage scenarios.
7799
8071
  - `rfftn` is not supported on Windows platform yet.
7800
8072
 
7801
8073
  Args:
@@ -7855,7 +8127,7 @@ def rfft(input, n=None, dim=-1, norm=None):
7855
8127
 
7856
8128
  Note:
7857
8129
  - `rfft` is currently only used in `mindscience` scientific computing scenarios and
7858
- dose not support other usage scenarios.
8130
+ does not support other usage scenarios.
7859
8131
  - `rfft` is not supported on Windows platform yet.
7860
8132
 
7861
8133
  Args:
@@ -7898,6 +8170,78 @@ def rfft(input, n=None, dim=-1, norm=None):
7898
8170
  return rfft_op(input, n, dim, norm)
7899
8171
 
7900
8172
 
8173
+ def ring_attention_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout='SBH'):
8174
+ r"""
8175
+ The RingAttentionUpdate operator updates the output of two FlashAttention operations based on their respective softmax max and softmax sum values.
8176
+
8177
+ - S: Sequence length
8178
+ - B: Batch dimension
8179
+ - H: Hidden layer size, equals to N * D
8180
+ - T: time, equals to B*S
8181
+ - N: Number of attention heads
8182
+ - D: Head dimension
8183
+
8184
+ .. warning::
8185
+ - It is only supported on Atlas A2 Training Series Products.
8186
+ - This is an experimental API that is subject to change or deletion.
8187
+ - When `layout` is ``"TND"``, the last dimension of `prev_attn_out` must be a multiple of 64.
8188
+ - When `layout` is ``"TND"``, `actual_seq_qlen` is mandatory.
8189
+ - When `layout` is ``"TND"``, N x D must satisfy the constraint:
8190
+ (AlignUp(NxD, 64)x(DataSizex6+8))+(AlignUp(Nx8, 64)x56) <= 192x1024.
8191
+ DataSize is 4 bytes when `prev_attn_out` dtype is float32, 2 bytes when dtype is float16 / bfloat16.
8192
+ - When `layout` is ``"TND"``, if `actual_seq_qlen` is not a non-decreasing sequence from 0 to T, the result is undefined.
8193
+
8194
+ Args:
8195
+ prev_attn_out (Tensor): Output of the first FlashAttention operation. The dtype is float16, float32, bfloat16.
8196
+ The shape is :math:`(S, B, H)` or :math:`(T, N, D)`.
8197
+ prev_softmax_max (Tensor): The max values from the first FlashAttention softmax computation. The dtype float32.
8198
+ The shape is :math:`(B, N, S, 8)` or :math:`(T, N, 8)`. The last dimension contains 8 identical values, which must be positive.
8199
+ prev_softmax_sum (Tensor): The sum values from the first FlashAttention softmax computation.
8200
+ It has the same shape and dtype as `prev_softmax_max`.
8201
+ cur_attn_out (Tensor): Output of the second FlashAttention operation. It has the same shape and dtype as `prev_attn_out`.
8202
+ cur_softmax_max (Tensor): The max values from the second FlashAttention softmax computation. It has the same shape and dtype as `prev_softmax_max`.
8203
+ cur_softmax_sum (Tensor):The sum values from the second FlashAttention softmax computation. It has the same shape and dtype as `prev_softmax_max`.
8204
+ actual_seq_qlen (Tensor, optional): Cumulative sequence length, starting from 0. Required if `layout` is ``"TND"``. Does not take effect if `layout` is ``"SBH"``.
8205
+ The tensor must be 1D and contain non-decreasing integer values starting from 0 to T. Default: ``None``.
8206
+ layout (str, optional): Indicates the input layout, currently support ``"TND"`` and ``"SBH"``. Default: ``"SBH"``.
8207
+
8208
+ Returns:
8209
+ tuple (Tensor), tuple of 3 tensors.
8210
+
8211
+ - **attn_out** (Tensor) - The updated attention out, with the same shape and dtype as `prev_attn_out`.
8212
+ - **softmax_max** (Tensor) - The updated softmax max values, with the same shape and dtype as `prev_softmax_max`.
8213
+ - **softmax_sum** (Tensor) - The updated softmax sum values, with the same shape and dtype as `prev_softmax_max`.
8214
+
8215
+ Raises:
8216
+ RuntimeError: If `layout` is ``"TND"``, and `prev_attn_out`'s last dimension is not aligned to 64.
8217
+ RuntimeError: If `layout` is ``"TND"``, and `actual_seq_qlen` is not provided.
8218
+ RuntimeError: If `layout` is ``"TND"``, and `actual_seq_qlen` is not a non-decreasing sequence from 0 to T.
8219
+ RuntimeError: If `layout` is ``"TND"``, and `prev_attn_out` exceeds the size constraints.
8220
+
8221
+ Supported Platforms:
8222
+ ``Ascend``
8223
+
8224
+ Examples:
8225
+ >>> import numpy as np
8226
+ >>> import mindspore
8227
+ >>> from mindspore import Tensor, ops
8228
+ >>> np.random.seed(123)
8229
+ >>> S, B, H, N= 4, 6, 16, 8
8230
+ >>> prev_attn_out = np.random.uniform(-1.0, 1.0, size=(S, B, H)).astype(np.float32)
8231
+ >>> prev_softmax_max = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8232
+ >>> prev_softmax_sum = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8233
+ >>> cur_attn_out = np.random.uniform(-1.0, 1.0, size=(S, B, H)).astype(np.float32)
8234
+ >>> cur_softmax_max = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8235
+ >>> cur_softmax_sum = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8236
+ >>> inputs_np = [prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum]
8237
+ >>> inputs_ms = [Tensor(item) for item in inputs_np]
8238
+ >>> out = ops.ring_attention_update(*inputs_ms)
8239
+ >>> print(out[0].shape)
8240
+ (4, 6, 16)
8241
+ """
8242
+ return ring_attention_update_op(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen, layout)
8243
+
8244
+
7901
8245
  def rms_norm(x, gamma, epsilon=1e-6):
7902
8246
  r"""
7903
8247
  The RmsNorm(Root Mean Square Layer Normalization) operator is a normalization operation. Compared to
@@ -8093,7 +8437,7 @@ def scalar_cast(input_x, input_y):
8093
8437
 
8094
8438
  Args:
8095
8439
  input_x (scalar): The input scalar. Only constant value is allowed.
8096
- input_y (mindspore.dtype): The type to be cast. Only constant value is allowed. And the value should only be mindspore.int64, mindspore.float64, or mindspore.bool_.
8440
+ input_y (mindspore.dtype): The type to be cast. Only constant value is allowed. And the value should only be mindspore.int64, mindspore.float64, or mindspore.bool.
8097
8441
 
8098
8442
  Returns:
8099
8443
  Scalar. The type is the same as the python type corresponding to `input_y`.
@@ -8603,6 +8947,58 @@ def sin(input):
8603
8947
  return sin_op(input)
8604
8948
 
8605
8949
 
8950
+ def smooth_l1_loss(prediction, target, beta=1.0, reduction='none'):
8951
+ r"""
8952
+ Calculate the smooth L1 loss, and the L1 loss function has robustness.
8953
+
8954
+ Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
8955
+
8956
+ .. warning::
8957
+ This API has poor performance on CPU and it is recommended to run it on the Ascend/GPU.
8958
+
8959
+ Args:
8960
+ beta (number, optional): A parameter used to control the point where the function will change between
8961
+ L1 to L2 loss. Default: ``1.0`` .
8962
+
8963
+ - Ascend: The value should be equal to or greater than zero.
8964
+ - CPU/GPU: The value should be greater than zero.
8965
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
8966
+ ``'sum'`` . Default: ``'none'`` .
8967
+
8968
+ - ``'none'``: no reduction will be applied.
8969
+ - ``'mean'``: compute and return the mean of elements in the output.
8970
+ - ``'sum'``: the output elements will be summed.
8971
+
8972
+ Inputs:
8973
+ - **logits** (Tensor) - Input Tensor of any dimension. Supported dtypes:
8974
+
8975
+ - Ascend: float16, float32, bfloat16.
8976
+ - CPU/GPU: float16, float32, float64.
8977
+ - **labels** (Tensor) - Ground truth data.
8978
+
8979
+ - CPU/Ascend: has the same shape as the `logits`, `logits` and `labels` comply with the implicit type conversion rules to make the data types consistent.
8980
+ - GPU: has the same shape and dtype as the `logits`.
8981
+
8982
+ Outputs:
8983
+ Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `logits`. Otherwise the shape of output tensor is :math:`()`.
8984
+
8985
+ Supported Platforms:
8986
+ ``Ascend`` ``GPU`` ``CPU``
8987
+
8988
+ Examples:
8989
+ >>> import mindspore
8990
+ >>> import numpy as np
8991
+ >>> from mindspore import Tensor, ops
8992
+ >>> loss = ops.SmoothL1Loss()
8993
+ >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
8994
+ >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
8995
+ >>> output = loss(logits, labels)
8996
+ >>> print(output)
8997
+ [0. 0. 0.5]
8998
+ """
8999
+ return smooth_l1_loss_impl(prediction, target, beta, reduction)
9000
+
9001
+
8606
9002
  def softplus_ext(input, beta=1, threshold=20):
8607
9003
  r"""
8608
9004
  Applies softplus function to `input` element-wise.
@@ -8784,7 +9180,7 @@ def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False):
8784
9180
 
8785
9181
  Note:
8786
9182
  - `solve_triangular` is currently only used in `mindscience` scientific computing scenarios and
8787
- dose not support other usage scenarios.
9183
+ does not support other usage scenarios.
8788
9184
  - `solve_triangular` is not supported on Windows platform yet.
8789
9185
 
8790
9186
  Args:
@@ -9062,11 +9458,11 @@ def sub_ext(input, other, alpha=1):
9062
9458
  input (Union[Tensor, number.Number, bool]): The first input is a number.Number or
9063
9459
  a bool or a tensor whose data type is
9064
9460
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
9065
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
9461
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
9066
9462
  other (Union[Tensor, number.Number, bool]): The second input, is a number.Number or
9067
9463
  a bool or a tensor whose data type is
9068
9464
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
9069
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
9465
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
9070
9466
  alpha (number.Number): A scaling factor applied to `other`, default 1.
9071
9467
 
9072
9468
  Returns:
@@ -9111,7 +9507,7 @@ def sub(input, other):
9111
9507
  Note:
9112
9508
  - When the two inputs have different shapes, they must be able to broadcast to a common shape.
9113
9509
  - The two inputs can not be bool type at the same time,
9114
- [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
9510
+ [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
9115
9511
  - Support implicit type conversion and type promotion.
9116
9512
 
9117
9513
  Args:
@@ -9215,9 +9611,6 @@ def swiglu(input, dim=-1):
9215
9611
  Computes SwiGLU (Swish-Gated Linear Unit activation function) of input tensor.
9216
9612
  SwiGLU is a variant of the :class:`mindspore.ops.GLU` activation function, it is defined as:
9217
9613
 
9218
- .. warning::
9219
- This is an experimental API that is subject to change or deletion.
9220
-
9221
9614
  .. math::
9222
9615
  {SwiGLU}(a, b)= Swish(a) \otimes b
9223
9616
 
@@ -9225,6 +9618,9 @@ def swiglu(input, dim=-1):
9225
9618
  Swish(a)=a :math:`\sigma` (a), :math:`\sigma` is the :func:`mindspore.ops.sigmoid` activation function
9226
9619
  and :math:`\otimes` is the Hadamard product.
9227
9620
 
9621
+ .. warning::
9622
+ Only support on Atlas A2 training series.
9623
+
9228
9624
  Args:
9229
9625
  input (Tensor): Tensor to be split. It has shape :math:`(\ast_1, N, \ast_2)`
9230
9626
  where `*` means, any number of additional dimensions. :math:`N` must be divisible by 2.
@@ -9457,6 +9853,30 @@ def topk_ext(input, k, dim=-1, largest=True, sorted=True):
9457
9853
  return topk_ext_op(input, k, dim, largest, sorted)
9458
9854
 
9459
9855
 
9856
+ def topprouter(input, capacity, expert_num, drop_type=0, threshold=0.0, router_prob=0.0):
9857
+ r"""
9858
+ TopPRouter implementation in MOE.
9859
+
9860
+ Inputs:
9861
+ - **x** (Tensor) - Input Tensor of 3D, supporting types:[int32, int64]
9862
+ - **capacity** (Int64) - The maximum number of tokens each expert can handle.
9863
+ - **expert_num** (Int64) - The number of expert.
9864
+ - **drop_type** (Int64) - S-Drop/K-Drop, 0 means S-Drop, 1 means K-Drop, default 0.
9865
+ - **threshold** (float32) - Expert threshold, default 0.
9866
+ - **router_prob** (Tensor) - Topk prob Tensor of 2D, supporting types:[float32], default 0.
9867
+
9868
+ Outputs:
9869
+ tuple(Tensor), tuple of 2 tensors, `dispatch_index` and `combine_inex`.
9870
+
9871
+ - dispatch_index (Tensor) - Token ID processed by each expert.
9872
+ - combine_index (Tensor) - The combine index of each token.
9873
+
9874
+ Supported Platforms:
9875
+ ``Ascend``
9876
+ """
9877
+ return topprouter_op(input, capacity, expert_num, drop_type, threshold, router_prob)
9878
+
9879
+
9460
9880
  def trace_ext(input):
9461
9881
  r"""
9462
9882
  Returns a new tensor that is the sum of the `input` main trace.
@@ -10478,8 +10898,68 @@ def quant_batch_matmul(x1, x2, scale, offset=None, bias=None, pertokenScaleOptio
10478
10898
  return quant_batch_matmul_impl(x1, x2, scale, offset, bias, pertokenScaleOptional, transpose_x1, transpose_x2, dtype)
10479
10899
 
10480
10900
 
10901
+ def quant_matmul(x1, x2, scale, offset=None, pertoken_scale=None, bias=None, output_dtype=None, x1_dtype=None, x2_dtype=None, pertoken_scale_dtype=None, scale_dtype=None, group_sizes=None):
10902
+ r"""
10903
+
10904
+ """
10905
+ return quant_matmul_op(x1, x2, scale, offset, pertoken_scale, bias, output_dtype, x1_dtype, x2_dtype, pertoken_scale_dtype, scale_dtype, group_sizes)
10906
+
10907
+
10481
10908
  def weight_quant_batch_matmul(x, weight, antiquant_scale, antiquant_offset=None, quant_scale=None, quant_offset=None, bias=None, transpose_x=False, transpose_weight=False, antiquant_group_size=0):
10482
10909
  r"""
10483
10910
 
10484
10911
  """
10485
10912
  return weight_quant_batch_matmul_impl(x, weight, antiquant_scale, antiquant_offset, quant_scale, quant_offset, bias, transpose_x, transpose_weight, antiquant_group_size)
10913
+
10914
+
10915
+ def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
10916
+ r"""
10917
+ Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
10918
+
10919
+ .. warning::
10920
+ - It is only supported on Atlas A2 Training Series Products.
10921
+ - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
10922
+
10923
+ Args:
10924
+ permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
10925
+ The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
10926
+ sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
10927
+ The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
10928
+ It only supports the int32 data type.
10929
+ probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
10930
+ If provided, the unpermuted tokens will be merged with their respective probabilities.
10931
+ The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
10932
+ padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
10933
+ restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
10934
+
10935
+ Returns:
10936
+ Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
10937
+ If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
10938
+
10939
+ Raises:
10940
+ TypeError: If `permuted_tokens` is not a Tensor.
10941
+ ValueError: Only supported when `padded_mode` is ``False``.
10942
+
10943
+ Supported Platforms:
10944
+ ``Ascend``
10945
+
10946
+ Examples:
10947
+ >>> import mindspore
10948
+ >>> from mindspore import Tensor, ops
10949
+ >>> permuted_token = Tensor([
10950
+ ... [1, 1, 1],
10951
+ ... [0, 0, 0],
10952
+ ... [0, 0, 0],
10953
+ ... [3, 3, 3],
10954
+ ... [2, 2, 2],
10955
+ ... [1, 1, 1],
10956
+ ... [2, 2, 2],
10957
+ ... [3, 3, 3]], dtype=mindspore.bfloat16)
10958
+ >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
10959
+ >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
10960
+ >>> out.shape
10961
+ (8, 3)
10962
+
10963
+
10964
+ """
10965
+ return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)