mindspore 2.6.0rc1__cp311-cp311-win_amd64.whl → 2.7.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (458) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +2 -2
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +42 -11
  9. mindspore/_extends/builtin_operations.py +3 -3
  10. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  11. mindspore/_extends/optimize/cell_utils.py +96 -0
  12. mindspore/_extends/parallel_compile/akg_compiler/custom.py +1109 -0
  13. mindspore/_extends/parallel_compile/akg_compiler/gen_custom_op_files.py +1 -1
  14. mindspore/_extends/parse/__init__.py +3 -3
  15. mindspore/_extends/parse/compile_config.py +44 -22
  16. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -2
  17. mindspore/_extends/parse/parser.py +65 -84
  18. mindspore/_extends/parse/resources.py +39 -0
  19. mindspore/_extends/parse/standard_method.py +58 -14
  20. mindspore/_extends/parse/trope.py +8 -1
  21. mindspore/_extends/pijit/__init__.py +1 -2
  22. mindspore/_extends/pijit/pijit_func_white_list.py +2 -5
  23. mindspore/amp.py +4 -22
  24. mindspore/atlprov.dll +0 -0
  25. mindspore/avcodec-59.dll +0 -0
  26. mindspore/avdevice-59.dll +0 -0
  27. mindspore/avfilter-8.dll +0 -0
  28. mindspore/avformat-59.dll +0 -0
  29. mindspore/avutil-57.dll +0 -0
  30. mindspore/boost/adasum.py +1 -1
  31. mindspore/boost/boost_cell_wrapper.py +4 -4
  32. mindspore/c1.dll +0 -0
  33. mindspore/c1xx.dll +0 -0
  34. mindspore/c2.dll +0 -0
  35. mindspore/common/__init__.py +43 -12
  36. mindspore/common/_grad_function.py +2 -1
  37. mindspore/common/_pijit_context.py +28 -7
  38. mindspore/common/_stub_tensor.py +1 -209
  39. mindspore/common/_tensor_cpp_method.py +1 -1
  40. mindspore/common/_tensor_docs.py +178 -53
  41. mindspore/common/_utils.py +9 -1
  42. mindspore/common/api.py +377 -203
  43. mindspore/common/dtype.py +108 -57
  44. mindspore/common/dump.py +11 -16
  45. mindspore/common/dynamic_shape/__init__.py +0 -0
  46. mindspore/common/{auto_dynamic_shape.py → dynamic_shape/auto_dynamic_shape.py} +17 -23
  47. mindspore/common/dynamic_shape/enable_dynamic.py +197 -0
  48. mindspore/common/file_system.py +59 -9
  49. mindspore/common/generator.py +5 -3
  50. mindspore/common/hook_handle.py +33 -5
  51. mindspore/common/jit_config.py +1 -1
  52. mindspore/common/jit_trace.py +84 -105
  53. mindspore/common/np_dtype.py +3 -3
  54. mindspore/common/parameter.py +27 -29
  55. mindspore/common/recompute.py +5 -7
  56. mindspore/common/sparse_tensor.py +0 -3
  57. mindspore/common/symbol.py +0 -1
  58. mindspore/common/tensor.py +117 -131
  59. mindspore/communication/_comm_helper.py +46 -4
  60. mindspore/communication/management.py +79 -7
  61. mindspore/context.py +67 -55
  62. mindspore/dataset/__init__.py +1 -1
  63. mindspore/dataset/audio/transforms.py +1 -1
  64. mindspore/dataset/core/config.py +38 -4
  65. mindspore/dataset/engine/datasets.py +350 -322
  66. mindspore/dataset/engine/datasets_user_defined.py +70 -24
  67. mindspore/dataset/engine/iterators.py +2 -2
  68. mindspore/dataset/engine/obs/config_loader.py +2 -2
  69. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  70. mindspore/dataset/transforms/c_transforms.py +2 -2
  71. mindspore/dataset/transforms/py_transforms.py +7 -3
  72. mindspore/dataset/transforms/transforms.py +10 -6
  73. mindspore/dataset/vision/__init__.py +1 -1
  74. mindspore/dataset/vision/py_transforms.py +8 -8
  75. mindspore/dataset/vision/transforms.py +17 -5
  76. mindspore/dataset/vision/utils.py +632 -21
  77. mindspore/dataset/vision/validators.py +1 -0
  78. mindspore/device_context/ascend/device.py +1 -1
  79. mindspore/device_context/ascend/op_tuning.py +35 -1
  80. mindspore/device_context/gpu/__init__.py +2 -2
  81. mindspore/device_context/gpu/device.py +1 -1
  82. mindspore/device_context/gpu/op_precision.py +4 -2
  83. mindspore/device_context/gpu/op_tuning.py +6 -3
  84. mindspore/device_manager.py +16 -9
  85. mindspore/dnnl.dll +0 -0
  86. mindspore/dpcmi.dll +0 -0
  87. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -4
  88. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  89. mindspore/experimental/optim/adadelta.py +13 -20
  90. mindspore/experimental/optim/adagrad.py +15 -22
  91. mindspore/experimental/optim/adam.py +17 -24
  92. mindspore/experimental/optim/adamax.py +14 -22
  93. mindspore/experimental/optim/adamw.py +28 -34
  94. mindspore/experimental/optim/asgd.py +15 -25
  95. mindspore/experimental/optim/lr_scheduler.py +27 -45
  96. mindspore/experimental/optim/nadam.py +14 -24
  97. mindspore/experimental/optim/optimizer.py +13 -23
  98. mindspore/experimental/optim/radam.py +18 -24
  99. mindspore/experimental/optim/rmsprop.py +14 -25
  100. mindspore/experimental/optim/rprop.py +15 -26
  101. mindspore/experimental/optim/sgd.py +9 -19
  102. mindspore/hal/__init__.py +4 -4
  103. mindspore/hal/contiguous_tensors_handle.py +2 -2
  104. mindspore/hal/memory.py +27 -7
  105. mindspore/include/api/cell.h +65 -5
  106. mindspore/include/api/cfg.h +24 -7
  107. mindspore/include/api/context.h +1 -0
  108. mindspore/include/api/delegate.h +10 -2
  109. mindspore/include/api/dual_abi_helper.h +100 -19
  110. mindspore/include/api/graph.h +14 -1
  111. mindspore/include/api/kernel.h +16 -3
  112. mindspore/include/api/kernel_api.h +9 -1
  113. mindspore/include/api/metrics/accuracy.h +9 -0
  114. mindspore/include/api/model.h +8 -1
  115. mindspore/include/api/model_group.h +4 -0
  116. mindspore/include/api/model_parallel_runner.h +2 -0
  117. mindspore/include/api/status.h +48 -10
  118. mindspore/include/api/types.h +8 -3
  119. mindspore/include/c_api/model_c.h +0 -58
  120. mindspore/include/c_api/tensor_c.h +0 -26
  121. mindspore/include/dataset/constants.h +9 -0
  122. mindspore/include/dataset/vision_ascend.h +1 -1
  123. mindspore/jpeg62.dll +0 -0
  124. mindspore/mindrecord/tools/cifar10.py +61 -11
  125. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  126. mindspore/mindspore_backend_common.dll +0 -0
  127. mindspore/mindspore_backend_manager.dll +0 -0
  128. mindspore/mindspore_common.dll +0 -0
  129. mindspore/mindspore_core.dll +0 -0
  130. mindspore/mindspore_cpu_res_manager.dll +0 -0
  131. mindspore/mindspore_dump.dll +0 -0
  132. mindspore/mindspore_frontend.dll +0 -0
  133. mindspore/mindspore_glog.dll +0 -0
  134. mindspore/mindspore_memory_pool.dll +0 -0
  135. mindspore/mindspore_ms_backend.dll +0 -0
  136. mindspore/mindspore_ops.dll +0 -0
  137. mindspore/mindspore_ops_host.dll +0 -0
  138. mindspore/mindspore_ops_kernel_common.dll +0 -0
  139. mindspore/mindspore_profiler.dll +0 -0
  140. mindspore/mindspore_pyboost.dll +0 -0
  141. mindspore/mindspore_pynative.dll +0 -0
  142. mindspore/mindspore_res_manager.dll +0 -0
  143. mindspore/mindspore_runtime_pipeline.dll +0 -0
  144. mindspore/mint/__init__.py +6 -46
  145. mindspore/mint/distributed/__init__.py +5 -0
  146. mindspore/mint/distributed/distributed.py +429 -23
  147. mindspore/mint/nn/__init__.py +1 -1
  148. mindspore/mint/nn/functional.py +53 -6
  149. mindspore/mint/nn/layer/_functions.py +163 -294
  150. mindspore/mint/nn/layer/activation.py +8 -6
  151. mindspore/mint/nn/layer/conv.py +140 -104
  152. mindspore/mint/nn/layer/normalization.py +11 -25
  153. mindspore/mint/optim/adam.py +19 -18
  154. mindspore/mint/optim/adamw.py +14 -8
  155. mindspore/mint/optim/sgd.py +5 -5
  156. mindspore/msobj140.dll +0 -0
  157. mindspore/mspdb140.dll +0 -0
  158. mindspore/mspdbcore.dll +0 -0
  159. mindspore/mspdbst.dll +0 -0
  160. mindspore/mspft140.dll +0 -0
  161. mindspore/msvcdis140.dll +0 -0
  162. mindspore/msvcp140_1.dll +0 -0
  163. mindspore/msvcp140_2.dll +0 -0
  164. mindspore/msvcp140_atomic_wait.dll +0 -0
  165. mindspore/msvcp140_codecvt_ids.dll +0 -0
  166. mindspore/nn/cell.py +491 -623
  167. mindspore/nn/grad/cell_grad.py +11 -12
  168. mindspore/nn/layer/activation.py +36 -36
  169. mindspore/nn/layer/basic.py +74 -77
  170. mindspore/nn/layer/channel_shuffle.py +4 -4
  171. mindspore/nn/layer/combined.py +4 -2
  172. mindspore/nn/layer/conv.py +117 -110
  173. mindspore/nn/layer/dense.py +9 -7
  174. mindspore/nn/layer/embedding.py +50 -52
  175. mindspore/nn/layer/image.py +38 -40
  176. mindspore/nn/layer/math.py +111 -112
  177. mindspore/nn/layer/normalization.py +56 -44
  178. mindspore/nn/layer/pooling.py +58 -63
  179. mindspore/nn/layer/rnn_cells.py +33 -33
  180. mindspore/nn/layer/rnns.py +56 -56
  181. mindspore/nn/layer/thor_layer.py +74 -73
  182. mindspore/nn/layer/transformer.py +11 -1
  183. mindspore/nn/learning_rate_schedule.py +20 -20
  184. mindspore/nn/loss/loss.py +79 -81
  185. mindspore/nn/optim/adam.py +4 -6
  186. mindspore/nn/optim/adasum.py +2 -2
  187. mindspore/nn/optim/asgd.py +2 -0
  188. mindspore/nn/optim/lamb.py +1 -3
  189. mindspore/nn/optim/optimizer.py +1 -1
  190. mindspore/nn/optim/tft_wrapper.py +2 -3
  191. mindspore/nn/optim/thor.py +2 -2
  192. mindspore/nn/probability/distribution/_utils/utils.py +2 -2
  193. mindspore/nn/probability/distribution/exponential.py +2 -1
  194. mindspore/nn/probability/distribution/poisson.py +2 -1
  195. mindspore/nn/sparse/sparse.py +3 -3
  196. mindspore/nn/wrap/cell_wrapper.py +73 -42
  197. mindspore/nn/wrap/grad_reducer.py +37 -52
  198. mindspore/nn/wrap/loss_scale.py +72 -74
  199. mindspore/numpy/array_creations.py +7 -7
  200. mindspore/numpy/fft.py +1 -1
  201. mindspore/numpy/math_ops.py +5 -5
  202. mindspore/numpy/utils_const.py +1 -1
  203. mindspore/opencv_core452.dll +0 -0
  204. mindspore/opencv_imgcodecs452.dll +0 -0
  205. mindspore/opencv_imgproc452.dll +0 -0
  206. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  207. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  208. mindspore/ops/_grad_experimental/grad_inner_ops.py +0 -9
  209. mindspore/ops/_op_impl/cpu/__init__.py +1 -0
  210. mindspore/{experimental/es/__init__.py → ops/_op_impl/cpu/joinedstr_op.py} +12 -6
  211. mindspore/ops/_vmap/vmap_array_ops.py +31 -13
  212. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  213. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +54 -13
  214. mindspore/ops/auto_generate/gen_extend_func.py +27 -145
  215. mindspore/ops/auto_generate/gen_ops_def.py +1027 -347
  216. mindspore/ops/auto_generate/gen_ops_prim.py +2341 -1117
  217. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  218. mindspore/ops/composite/__init__.py +10 -0
  219. mindspore/ops/composite/base.py +9 -5
  220. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  221. mindspore/ops/composite/multitype_ops/_compile_utils.py +133 -109
  222. mindspore/ops/composite/multitype_ops/_constexpr_utils.py +1 -1
  223. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  224. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  225. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  226. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  227. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  228. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  229. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  230. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  231. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  232. mindspore/ops/function/__init__.py +4 -1
  233. mindspore/ops/function/_add_attr_func.py +11 -6
  234. mindspore/ops/function/array_func.py +19 -102
  235. mindspore/ops/function/debug_func.py +8 -5
  236. mindspore/ops/function/grad/grad_func.py +5 -13
  237. mindspore/ops/function/math_func.py +77 -572
  238. mindspore/ops/function/nn_func.py +46 -94
  239. mindspore/ops/function/other_func.py +4 -1
  240. mindspore/ops/function/random_func.py +44 -5
  241. mindspore/ops/function/vmap_func.py +2 -1
  242. mindspore/ops/functional.py +4 -4
  243. mindspore/ops/functional_overload.py +594 -18
  244. mindspore/ops/op_info_register.py +21 -0
  245. mindspore/ops/operations/__init__.py +16 -11
  246. mindspore/ops/operations/_custom_ops_utils.py +689 -34
  247. mindspore/ops/operations/_inner_ops.py +14 -18
  248. mindspore/ops/operations/_sequence_ops.py +1 -1
  249. mindspore/ops/operations/array_ops.py +5 -51
  250. mindspore/ops/operations/comm_ops.py +186 -41
  251. mindspore/ops/operations/custom_ops.py +303 -177
  252. mindspore/ops/operations/debug_ops.py +59 -4
  253. mindspore/ops/operations/image_ops.py +13 -13
  254. mindspore/ops/operations/manually_defined/ops_def.py +27 -28
  255. mindspore/ops/operations/math_ops.py +8 -9
  256. mindspore/ops/operations/nn_ops.py +8 -40
  257. mindspore/ops/primitive.py +9 -20
  258. mindspore/ops/tensor_method.py +63 -15
  259. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  260. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  261. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  262. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  263. mindspore/ops_generate/common/base_generator.py +14 -0
  264. mindspore/ops_generate/common/gen_constants.py +8 -3
  265. mindspore/ops_generate/common/gen_utils.py +0 -19
  266. mindspore/ops_generate/common/op_proto.py +11 -4
  267. mindspore/ops_generate/common/template.py +88 -11
  268. mindspore/ops_generate/gen_ops.py +1 -1
  269. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  270. mindspore/ops_generate/op_def/ops_def_cc_generator.py +0 -3
  271. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  272. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  273. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  274. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  275. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  276. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -16
  277. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  278. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  279. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  280. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  281. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  282. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  283. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  284. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  285. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  286. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  287. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  288. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  289. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  290. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  291. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  292. mindspore/parallel/_auto_parallel_context.py +16 -23
  293. mindspore/parallel/_cell_wrapper.py +113 -45
  294. mindspore/parallel/_parallel_serialization.py +4 -3
  295. mindspore/parallel/_ps_context.py +4 -6
  296. mindspore/parallel/_tensor.py +167 -12
  297. mindspore/parallel/_transformer/moe.py +1 -1
  298. mindspore/parallel/_transformer/transformer.py +17 -12
  299. mindspore/parallel/_utils.py +5 -11
  300. mindspore/parallel/auto_parallel.py +35 -14
  301. mindspore/parallel/checkpoint_convert.py +3 -3
  302. mindspore/parallel/checkpoint_transform.py +13 -7
  303. mindspore/parallel/cluster/process_entity/_api.py +88 -49
  304. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  305. mindspore/parallel/cluster/run.py +48 -7
  306. mindspore/parallel/function/__init__.py +8 -1
  307. mindspore/parallel/function/reshard_func.py +12 -12
  308. mindspore/parallel/nn/__init__.py +15 -2
  309. mindspore/parallel/nn/parallel_cell_wrapper.py +50 -14
  310. mindspore/parallel/nn/parallel_grad_reducer.py +7 -14
  311. mindspore/parallel/shard.py +10 -25
  312. mindspore/parallel/transform_safetensors.py +469 -174
  313. mindspore/pgodb140.dll +0 -0
  314. mindspore/pgort140.dll +0 -0
  315. mindspore/profiler/__init__.py +2 -1
  316. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  317. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  318. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +12 -6
  319. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  320. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  321. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  322. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  323. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  324. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  325. mindspore/profiler/analysis/task_manager.py +1 -1
  326. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  327. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  328. mindspore/profiler/analysis/viewer/ascend_kernel_details_viewer.py +10 -9
  329. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +43 -23
  330. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  331. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  332. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  333. mindspore/profiler/common/constant.py +16 -0
  334. mindspore/profiler/common/msprof_cmd_tool.py +2 -2
  335. mindspore/profiler/common/path_manager.py +9 -0
  336. mindspore/profiler/common/profiler_context.py +50 -29
  337. mindspore/profiler/common/profiler_info.py +0 -16
  338. mindspore/profiler/common/profiler_meta_data.py +1 -0
  339. mindspore/profiler/common/profiler_op_analyse.py +239 -0
  340. mindspore/profiler/common/profiler_output_path.py +23 -8
  341. mindspore/profiler/common/profiler_parameters.py +128 -35
  342. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  343. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  344. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  345. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  346. mindspore/profiler/dynamic_profiler.py +374 -338
  347. mindspore/profiler/envprofiler.py +42 -12
  348. mindspore/profiler/experimental_config.py +112 -7
  349. mindspore/profiler/mstx.py +33 -12
  350. mindspore/profiler/platform/__init__.py +2 -3
  351. mindspore/profiler/platform/cpu_profiler.py +10 -4
  352. mindspore/profiler/platform/npu_profiler.py +30 -20
  353. mindspore/profiler/profiler.py +218 -154
  354. mindspore/profiler/profiler_action_controller.py +65 -77
  355. mindspore/profiler/profiler_interface.py +2 -2
  356. mindspore/profiler/schedule.py +10 -4
  357. mindspore/rewrite/common/config.py +1 -0
  358. mindspore/rewrite/common/namer.py +1 -0
  359. mindspore/rewrite/common/namespace.py +1 -0
  360. mindspore/rewrite/node/node.py +31 -11
  361. mindspore/rewrite/parsers/assign_parser.py +1 -1
  362. mindspore/rewrite/symbol_tree/symbol_tree.py +2 -2
  363. mindspore/run_check/_check_version.py +7 -10
  364. mindspore/runtime/__init__.py +8 -6
  365. mindspore/runtime/event.py +10 -4
  366. mindspore/runtime/executor.py +87 -45
  367. mindspore/runtime/memory.py +31 -32
  368. mindspore/runtime/thread_bind_core.py +299 -165
  369. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  370. mindspore/swresample-4.dll +0 -0
  371. mindspore/swscale-6.dll +0 -0
  372. mindspore/tbbmalloc.dll +0 -0
  373. mindspore/tinyxml2.dll +0 -0
  374. mindspore/train/_utils.py +17 -7
  375. mindspore/train/amp.py +43 -23
  376. mindspore/train/callback/__init__.py +5 -5
  377. mindspore/train/callback/_callback.py +2 -1
  378. mindspore/train/callback/_checkpoint.py +4 -14
  379. mindspore/train/callback/_flops_collector.py +11 -7
  380. mindspore/train/callback/_landscape.py +0 -1
  381. mindspore/train/callback/_train_fault_tolerance.py +98 -21
  382. mindspore/train/data_sink.py +15 -6
  383. mindspore/train/dataset_helper.py +14 -5
  384. mindspore/train/model.py +133 -69
  385. mindspore/train/serialization.py +168 -126
  386. mindspore/train/summary/summary_record.py +13 -2
  387. mindspore/train/train_thor/model_thor.py +2 -2
  388. mindspore/turbojpeg.dll +0 -0
  389. mindspore/utils/__init__.py +3 -2
  390. mindspore/utils/dryrun.py +0 -6
  391. mindspore/utils/runtime_execution_order_check.py +163 -77
  392. mindspore/utils/sdc_detect.py +68 -0
  393. mindspore/utils/utils.py +14 -17
  394. mindspore/vcmeta.dll +0 -0
  395. mindspore/vcruntime140.dll +0 -0
  396. mindspore/vcruntime140_1.dll +0 -0
  397. mindspore/version.py +1 -1
  398. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/METADATA +5 -4
  399. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/RECORD +403 -442
  400. mindspore/_deprecated/jit.py +0 -198
  401. mindspore/_extends/remote/kernel_build_server_ascend.py +0 -75
  402. mindspore/communication/_hccl_management.py +0 -297
  403. mindspore/experimental/es/embedding_service.py +0 -891
  404. mindspore/experimental/es/embedding_service_layer.py +0 -581
  405. mindspore/profiler/common/validator/__init__.py +0 -14
  406. mindspore/profiler/common/validator/validate_path.py +0 -84
  407. mindspore/profiler/parser/__init__.py +0 -14
  408. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  409. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  410. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  411. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  412. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  413. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  414. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  415. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  416. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  417. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  418. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  419. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  420. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  421. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  422. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  423. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  424. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  425. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  426. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  427. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  428. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  429. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  430. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  431. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  432. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  433. mindspore/profiler/parser/container.py +0 -229
  434. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  435. mindspore/profiler/parser/flops_parser.py +0 -531
  436. mindspore/profiler/parser/framework_enum.py +0 -111
  437. mindspore/profiler/parser/framework_parser.py +0 -464
  438. mindspore/profiler/parser/framework_struct.py +0 -61
  439. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  440. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  441. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  442. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  443. mindspore/profiler/parser/hccl_parser.py +0 -573
  444. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  445. mindspore/profiler/parser/integrator.py +0 -526
  446. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  447. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  448. mindspore/profiler/parser/minddata_parser.py +0 -186
  449. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  450. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  451. mindspore/profiler/parser/optime_parser.py +0 -250
  452. mindspore/profiler/parser/profiler_info.py +0 -213
  453. mindspore/profiler/parser/step_trace_parser.py +0 -666
  454. mindspore/utils/hooks.py +0 -81
  455. /mindspore/common/{_auto_dynamic.py → dynamic_shape/_auto_dynamic.py} +0 -0
  456. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/WHEEL +0 -0
  457. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/entry_points.txt +0 -0
  458. {mindspore-2.6.0rc1.dist-info → mindspore-2.7.0.dist-info}/top_level.txt +0 -0
@@ -201,9 +201,9 @@ def adaptive_avg_pool1d(input, output_size):
201
201
 
202
202
  Examples:
203
203
  >>> import mindspore
204
- >>> from mindspore import Tensor, mint
204
+ >>> from mindspore import Tensor, ops
205
205
  >>> input = Tensor([[2,3],[3,4]],dtype=mindspore.float16)
206
- >>> output = mint.nn.functional.adaptive_avg_pool1d(input, 3)
206
+ >>> output = ops.auto_generate.adaptive_avg_pool1d(input, 3)
207
207
  >>> print(output)
208
208
  [[2. 2.5 3. ]
209
209
  [3. 3.5 4. ]]
@@ -250,11 +250,11 @@ def add_ext(input, other, alpha=1):
250
250
  input (Union[Tensor, number.Number, bool]): The first input is a number.Number or
251
251
  a bool or a tensor whose data type is
252
252
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
253
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
253
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
254
254
  other (Union[Tensor, number.Number, bool]): The second input, is a number.Number or
255
255
  a bool or a tensor whose data type is
256
256
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
257
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
257
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
258
258
  alpha (number.Number): A scaling factor applied to `other`, default 1.
259
259
 
260
260
  Returns:
@@ -312,7 +312,7 @@ def add(input, other):
312
312
 
313
313
  Note:
314
314
  - The two inputs can not be bool type at the same time,
315
- [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
315
+ [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
316
316
  - Support broadcast, support implicit type conversion and type promotion.
317
317
  - When the input is a tensor, the dimension should be greater than or equal to 1.
318
318
 
@@ -442,8 +442,7 @@ def apply_rotary_pos_emb_(query, key, cos, sin, position_ids, cos_format=0):
442
442
  r"""
443
443
 
444
444
  """
445
- apply_rotary_pos_emb_op = _get_cache_prim(ApplyRotaryPosEmb)(cos_format)
446
- return apply_rotary_pos_emb_op(query, key, cos, sin, position_ids)
445
+ return apply_rotary_pos_emb_impl(query, key, cos, sin, position_ids, cos_format)
447
446
 
448
447
 
449
448
  def argmax_ext(input, dim=None, keepdim=False):
@@ -527,9 +526,9 @@ def argmin_ext(input, dim=None, keepdim=False):
527
526
  Examples:
528
527
  >>> import numpy as np
529
528
  >>> from mindspore import Tensor
530
- >>> from mindspore import mint
529
+ >>> from mindspore import ops
531
530
  >>> x = Tensor(np.array([[1, 20, 5], [67, 8, 9], [130, 24, 15]]).astype(np.float32))
532
- >>> output = mint.argmin(x, dim=-1)
531
+ >>> output = ops.auto_generate.argmin_ext(x, dim=-1)
533
532
  >>> print(output)
534
533
  [0 1 2]
535
534
  """
@@ -566,14 +565,13 @@ def argsort_ext(input, dim=-1, descending=False, stable=False):
566
565
  Examples:
567
566
  >>> import mindspore
568
567
  >>> import numpy as np
569
- >>> from mindspore import Tensor
570
- >>> import mindspore.mint as mint
568
+ >>> from mindspore import Tensor, ops
571
569
  >>> x = Tensor(np.array([[8, 2, 1], [5, 9, 3], [4, 6, 7]]), mindspore.float16)
572
- >>> sort = mint.argsort(x)
570
+ >>> sort = ops.auto_generate.argsort_ext(x)
573
571
  >>> print(sort)
574
572
  [[2 1 0]
575
- [2 0 1]
576
- [0 1 2]]
573
+ [2 0 1]
574
+ [0 1 2]]
577
575
  """
578
576
  return argsort_op(input, dim, descending, stable)
579
577
 
@@ -819,7 +817,7 @@ def atan2_ext(input, other):
819
817
  >>> from mindspore import Tensor, ops
820
818
  >>> input = Tensor(np.array([0, 1]), mindspore.float32)
821
819
  >>> other = Tensor(np.array([1, 1]), mindspore.float32)
822
- >>> output = mint.atan2(input, other)
820
+ >>> output = ops.auto_generate.atan2_ext(input, other)
823
821
  >>> print(output)
824
822
  [0. 0.7853982]
825
823
  """
@@ -979,9 +977,9 @@ def avg_pool1d_ext(input, kernel_size, stride=None, padding=0, ceil_mode=False,
979
977
  Examples:
980
978
  >>> import mindspore
981
979
  >>> import numpy as np
982
- >>> from mindspore import Tensor, mint
980
+ >>> from mindspore import Tensor, ops
983
981
  >>> input_x = Tensor(np.random.randint(0, 10, [1, 3, 6]), mindspore.float32)
984
- >>> output = mint.nn.functional.avg_pool1d(input_x, kernel_size=6, stride=1)
982
+ >>> output = ops.auto_generate.avg_pool1d_ext(input_x, kernel_size=6, stride=1)
985
983
  >>> print(output.shape)
986
984
  (1, 3, 1)
987
985
  """
@@ -1086,14 +1084,14 @@ def bincount_ext(input, weights=None, minlength=0):
1086
1084
  ``Ascend``
1087
1085
 
1088
1086
  Examples:
1089
- >>> from mindspore import mint, Tensor
1090
- >>> print(mint.bincount(Tensor(np.arange(5))))
1087
+ >>> from mindspore import ops, Tensor
1088
+ >>> print(ops.auto_generate.bincount_ext(Tensor(np.arange(5))))
1091
1089
  [1 1 1 1 1]
1092
- >>> print(mint.bincount(Tensor(np.array([0, 1, 1, 3, 2, 1, 7]))))
1090
+ >>> print(ops.auto_generate.bincount_ext(Tensor(np.array([0, 1, 1, 3, 2, 1, 7]))))
1093
1091
  [1 3 1 1 0 0 0 1]
1094
1092
  >>> w = Tensor(np.array([0.3, 0.5, 0.2, 0.7, 1., -0.6])) # weights
1095
1093
  >>> x = Tensor(np.array([0, 1, 1, 2, 2, 2]))
1096
- >>> print(mint.bincount(x, weights=w, minlength=5))
1094
+ >>> print(ops.auto_generate.bincount_ext(x, weights=w, minlength=5))
1097
1095
  [0.3 0.7 1.1 0. 0. ]
1098
1096
  """
1099
1097
  return bincount_ext_op(input, weights, minlength)
@@ -1184,7 +1182,7 @@ def broadcast_to(input, shape):
1184
1182
 
1185
1183
  Args:
1186
1184
  input (Tensor): The input tensor.
1187
- shape (tuple): The target shape.
1185
+ shape (tuple[int]): The target shape.
1188
1186
 
1189
1187
  Returns:
1190
1188
  Tensor
@@ -1209,6 +1207,84 @@ def broadcast_to(input, shape):
1209
1207
  """
1210
1208
  return broadcast_to_impl(input, shape)
1211
1209
 
1210
+
1211
+ def broadcast_to_view(input, shape):
1212
+ r"""
1213
+ Broadcasts input tensor to a given shape. The dim of input shape must be smaller
1214
+ than or equal to that of target shape. Suppose input shape is :math:`(x_1, x_2, ..., x_m)`,
1215
+ target shape is :math:`(*, y_1, y_2, ..., y_m)`, where :math:`*` means any additional dimension.
1216
+ The broadcast rules are as follows:
1217
+
1218
+ Compare the value of :math:`x_m` and :math:`y_m`, :math:`x_{m-1}` and :math:`y_{m-1}`, ...,
1219
+ :math:`x_1` and :math:`y_1` consecutively and
1220
+ decide whether these shapes are broadcastable and what the broadcast result is.
1221
+
1222
+ If the value pairs at a specific dim are equal, then that value goes right into that dim of output shape.
1223
+ With an input shape :math:`(2, 3)`, target shape :math:`(2, 3)` , the inferred output shape is :math:`(2, 3)`.
1224
+
1225
+ If the value pairs are unequal, there are three cases:
1226
+
1227
+ Case 1: If the value of the target shape in the dimension is -1, the value of the
1228
+ output shape in the dimension is the value of the corresponding input shape in the dimension.
1229
+ With an input shape :math:`(3, 3)`, target
1230
+ shape :math:`(-1, 3)`, the output shape is :math:`(3, 3)`.
1231
+
1232
+ Case 2: If the value of target shape in the dimension is not -1, but the corresponding
1233
+ value in the input shape is 1, then the corresponding value of the output shape
1234
+ is that of the target shape. With an input shape :math:`(1, 3)`, target
1235
+ shape :math:`(8, 3)`, the output shape is :math:`(8, 3)`.
1236
+
1237
+ Case 3: If the corresponding values of the two shapes do not satisfy the above cases,
1238
+ it means that broadcasting from the input shape to the target shape is not supported.
1239
+
1240
+ So far we got the last m dims of the outshape, now focus on the first :math:`*` dims, there are
1241
+ two cases:
1242
+
1243
+ If the first :math:`*` dims of output shape does not have -1 in it, then fill the input
1244
+ shape with ones until their length are the same, and then refer to
1245
+ Case 2 mentioned above to calculate the output shape. With target shape :math:`(3, 1, 4, 1, 5, 9)`,
1246
+ input shape :math:`(1, 5, 9)`, the filled input shape will be :math:`(1, 1, 1, 1, 5, 9)` and thus the
1247
+ output shape is :math:`(3, 1, 4, 1, 5, 9)`.
1248
+
1249
+ If the first :math:`*` dims of output shape have -1 in it, it implies this -1 is corresponding to
1250
+ a non-existing dim so they're not broadcastable. With target shape :math:`(3, -1, 4, 1, 5, 9)`,
1251
+ input shape :math:`(1, 5, 9)`, instead of operating the dim-filling process first, it raises errors directly.
1252
+
1253
+ Args:
1254
+ input (Tensor): The input Tensor.
1255
+ shape (tuple): The target shape to broadcast. Can be fully specified, or have -1 in one position
1256
+ where it will be substituted by the input tensor's shape in that position, see example.
1257
+
1258
+ Returns:
1259
+ Tensor, with the given `shape` and the same data type as `input`.
1260
+
1261
+ Raises:
1262
+ TypeError: If `shape` is not a tuple.
1263
+ ValueError: If the target and input shapes are incompatible, or if a - 1 in the target shape is in an invalid
1264
+ location.
1265
+
1266
+ Supported Platforms:
1267
+ ``Ascend``
1268
+
1269
+ Examples:
1270
+ >>> import numpy as np
1271
+ >>> from mindspore import Tensor
1272
+ >>> from mindspore.ops.auto_generate import BroadcastToView
1273
+ >>> shape = (2, 3)
1274
+ >>> x = Tensor(np.array([1, 2, 3]).astype(np.float32))
1275
+ >>> output = BroadcastToView()(x, shape)
1276
+ >>> print(output)
1277
+ [[1. 2. 3.]
1278
+ [1. 2. 3.]]
1279
+ >>> shape = (-1, 2)
1280
+ >>> x = Tensor(np.array([[1], [2]]).astype(np.float32))
1281
+ >>> output = BroadcastToView()(x, shape)
1282
+ >>> print(output)
1283
+ [[1. 1.]
1284
+ [2. 2.]]
1285
+ """
1286
+ return broadcast_to_view_op(input, shape)
1287
+
1212
1288
  cast_op=Cast()
1213
1289
 
1214
1290
  def cast(input, dtype):
@@ -1750,7 +1826,7 @@ def correlate(a, v, pad_mode='valid'):
1750
1826
 
1751
1827
  Note:
1752
1828
  - `correlate` is currently only used in `mindscience` scientific computing scenarios and
1753
- dose not support other usage scenarios.
1829
+ does not support other usage scenarios.
1754
1830
  - `correlate` is not supported on Windows platform yet.
1755
1831
 
1756
1832
  Args:
@@ -1909,6 +1985,112 @@ def count_nonzero(input, dim=None):
1909
1985
  return count_nonzero_op(input, dim)
1910
1986
 
1911
1987
 
1988
+ def cross_entropy_loss_grad(grad_loss, log_prob, target, weight=None, grad_zloss=None, lse_for_zloss=None, reduction='mean', ignore_index=-100, label_smoothing=0.0, lse_square_scale_for_zloss=0.0):
1989
+ r"""
1990
+
1991
+ """
1992
+ return cross_entropy_loss_grad_op(grad_loss, log_prob, target, weight, grad_zloss, lse_for_zloss, reduction, ignore_index, label_smoothing, lse_square_scale_for_zloss)
1993
+
1994
+
1995
+ def cross_entropy_loss(input, target, weight=None, reduction='mean', ignore_index=-100, label_smoothing=0.0, lse_square_scale_for_zloss=0.0, return_zloss=False):
1996
+ r"""
1997
+ Computes the cross entropy loss between input and target.
1998
+
1999
+ Assume the number of classes :math:`C` in the range :math:`[0, C)`,
2000
+ the loss with reduction=none can be described as:
2001
+
2002
+ .. math::
2003
+
2004
+ \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
2005
+ l_n = - w_{y_n} \log \frac{\exp(x_{n,y_n})}{\sum_{c=1}^C \exp(x_{n,c})}
2006
+ \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}
2007
+
2008
+ where :math:`x` is the inputs, :math:`y` is the target, :math:`w` is the weight, :math:`N` is the batch size,
2009
+ :math:`c` belonging to :math:`[0, C-1]` is class index, where :math:`C` is the number of classes.
2010
+
2011
+ If `reduction` is not ``None`` (default ``'mean'`` ), then
2012
+
2013
+ .. math::
2014
+
2015
+ \ell(x, y) = \begin{cases}
2016
+ \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n} \cdot \mathbb{1}\{y_n \not= \text{ignore_index}\}} l_n, &
2017
+ \text{if reduction} = \text{'mean',}\\
2018
+ \sum_{n=1}^N l_n, &
2019
+ \text{if reduction} = \text{'sum'.}
2020
+ \end{cases}
2021
+
2022
+ .. warning::
2023
+ This is an experimental API that is subject to change or deletion.
2024
+
2025
+ Inputs:
2026
+ - **input** (Tensor) - Tensor of shape of :math:`(N, C)` where `C = number of classes`, data type must be bfloat16, float16 or float32.
2027
+ - **target** (Tensor) - For class indices, tensor of shape :math:`(N)`, data type must be int64. The value must be in range [0, C).
2028
+ - **weight** (Tensor, optional) - A rescaling weight applied to the loss of each batch element.
2029
+ If not None, the shape is :math:`(C,)`, data type must be float32. Default: ``None`` .
2030
+ - **reduction** (str, optional) - Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
2031
+ ``'sum'`` . Default: ``'mean'`` .
2032
+
2033
+ - ``'none'``: no reduction will be applied.
2034
+ - ``'mean'``: compute and return the weighted mean of elements in the output.
2035
+ - ``'sum'``: the output elements will be summed.
2036
+
2037
+ - **ignore_index** (int, optional) - Specifies a target value that is ignored and does not contribute to the input
2038
+ gradient. When set to negative values, no target value is ignored. It should be int64.
2039
+ Default: ``-100`` .
2040
+ - **label_smoothing** (float, optional) - Label smoothing values, a regularization tool used to prevent the model
2041
+ from overfitting when calculating Loss. This value must be 0.0 currently. Default: ``0.0`` .
2042
+ - **lse_square_scale_for_zloss** (float, optional) - The value range is [0.0, 1.0), not enabled for now, can only be 0.0. Default: ``0.0`` .
2043
+ - **return_zloss** (float, optional) - Not enabled for now, can only be ``False``. Default: ``False`` .
2044
+
2045
+ Outputs:
2046
+ A tuple consisting of 4 Tensors.
2047
+
2048
+ - **loss** (Tensor) - loss between `input` and `target`, the dtype is the same as `input`.
2049
+
2050
+ - If `reduction` is ``'none'`` , the shape is :math:`(N,)` .
2051
+ - If `reduction` is ``'sum'` or ``'mean'`, the shape is :math:`(1,)` .
2052
+
2053
+ - **log_prob** (Tensor) - the shape is :math:`(N, C)` with the same dtype as `input`.
2054
+ - **zloss** (Tensor) - the shape is :math:`(N,)` if `return_zloss` is True, or the shape is :math:`(0,)` with the same dtype as `input`. This parameter is disabled for now.
2055
+ - **lse_for_zloss** (Tensor) - the shape is :math:`(N,)` if `lse_square_scale_for_zloss` is not 0.0, or the shape is :math:`(0,)` with the same dtype as `input`. This parameter is disabled for now.
2056
+
2057
+
2058
+ Raises:
2059
+ ValueError: If `reduction` is not one of ``'none'``, ``'mean'`` or ``'sum'``.
2060
+ TypeError: If `input`, `target` or `weight` is not a Tensor.
2061
+
2062
+ Supported Platforms:
2063
+ ``Ascend``
2064
+
2065
+ Examples:
2066
+ >>> import mindspore
2067
+ >>> import numpy as np
2068
+ >>> from mindspore import Tensor, nn, ops
2069
+ >>>
2070
+ >>>
2071
+ >>> class Net(nn.Cell):
2072
+ ... def __init__(self):
2073
+ ... super(Net, self).__init__()
2074
+ ... self.cross_entropy_loss = ops.auto_generate.CrossEntropyLoss()
2075
+ ...
2076
+ ... def construct(self, input, target, weight):
2077
+ ... result = self.cross_entropy_loss(input, target, weight)
2078
+ ... return result
2079
+ ...
2080
+ >>>
2081
+ >>> net = Net()
2082
+ >>> input = Tensor(np.array([[0.2, 0.7, 0.1], [0.2, 0.7, 0.1]]), mindspore.float32)
2083
+ >>> target = Tensor(np.array([0, 1]), mindspore.int64)
2084
+ >>> weight = Tensor(np.array([1, 0.5, 0.5]), mindspore.float32)
2085
+ >>> output = net(input, target, weight)
2086
+ >>> print(output[:2])
2087
+ (Tensor(shape=[1], dtype=Float32, value= [ 1.10128295e+00]), Tensor(shape=[2, 3], dtype=Float32, value=
2088
+ [[-1.26794958e+00, -7.67949641e-01, -1.36794960e+00],
2089
+ [-1.26794958e+00, -7.67949641e-01, -1.36794960e+00]]))
2090
+ """
2091
+ return cross_entropy_loss_op(input, target, weight, reduction, ignore_index, label_smoothing, lse_square_scale_for_zloss, return_zloss)
2092
+
2093
+
1912
2094
  def cummax(input, axis):
1913
2095
  r"""
1914
2096
  Return the cumulative maximum values and their indices along the given axis of the tensor.
@@ -1960,7 +2142,7 @@ def cummin_ext(input, dim):
1960
2142
  \end{array}
1961
2143
 
1962
2144
  .. note::
1963
- O2 mode is not supported in Ascend.
2145
+ GE backend is not supported in Ascend.
1964
2146
 
1965
2147
  Args:
1966
2148
  input (Tensor): The input Tensor, The dimension must be greater than 0.
@@ -2040,61 +2222,6 @@ def cumsum_ext(input, dim, dtype=None):
2040
2222
  return cumsum_ext_op(input, dim, dtype)
2041
2223
 
2042
2224
 
2043
- def decoder_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len):
2044
- r"""
2045
- The DecoderKVCache is used for decoding the KVCache of transformer network.
2046
-
2047
- Args:
2048
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
2049
- When format is BHSD, cache tensor of shape
2050
- :math:`(batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
2051
- When format is BSD, cache tensor of shape
2052
- :math:`(batch\_size, max\_seq\_length, hidden\_size)`.
2053
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
2054
- When format is BHSD, update tensor of shape
2055
- :math:`(batch\_size, num\_head, update\_seq\_length, size\_pre\_head)`.
2056
- When format is BSD, update tensor of shape
2057
- :math:`(batch\_size, update\_seq\_length, hidden\_size)`.
2058
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
2059
- Valid_seq_len tensor of shape :math:`(batch\_size)`.
2060
- batch_index (Tensor): The batch_index tensor with data type of int64.
2061
- Batch_index tensor of shape :math:`(batch\_size)`. Indicate that which batch of cache tensor is going to be update. Not abel for now.
2062
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
2063
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2064
- New_max_seq_len tensor of shape :math:`(1)`.
2065
- Indicate that user want to change the shape of cache tensor from
2066
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`. to
2067
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`.
2068
- to update the cache tensor. This will not real change the shape of `cache` tensor.
2069
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2070
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
2071
-
2072
- Outputs:
2073
- With same data type and same shape as `cache` tensor.
2074
-
2075
- Supported Platforms:
2076
- ``Ascend``
2077
-
2078
- Examples:
2079
- >>> from mindspore.ops.operations import _inner_ops
2080
- >>> b = 4
2081
- >>> h = 40
2082
- >>> max_s = 1024
2083
- >>> s = 1
2084
- >>> d = 128
2085
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
2086
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
2087
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=b).astype(np.int64))
2088
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=b, replace=False).astype(np.int64))
2089
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2090
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2091
- >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
2092
- >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
2093
- >>> print(cache)
2094
- """
2095
- return decoder_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
2096
-
2097
-
2098
2225
  def dense(input, weight, bias=None):
2099
2226
  r"""
2100
2227
  Applies the dense connected operation to the `input`. The dense function is defined as:
@@ -2233,9 +2360,9 @@ def diag_ext(input, diagonal=0):
2233
2360
  ``Ascend``
2234
2361
 
2235
2362
  Examples:
2236
- >>> from mindspore import Tensor, mint
2363
+ >>> from mindspore import Tensor, ops
2237
2364
  >>> input = Tensor([1, 2, 3, 4]).astype('int32')
2238
- >>> output = mint.diag(input)
2365
+ >>> output = ops.auto_generate.diag_ext(input)
2239
2366
  >>> print(output)
2240
2367
  [[1 0 0 0]
2241
2368
  [0 2 0 0]
@@ -2331,10 +2458,10 @@ def dot(input, other):
2331
2458
 
2332
2459
  Examples:
2333
2460
  >>> import mindspore
2334
- >>> from mindspore import Tensor, mint
2461
+ >>> from mindspore import Tensor, ops
2335
2462
  >>> x = Tensor([2.0, 3.0], mindspore.float32)
2336
2463
  >>> y = Tensor([2.0, 1.0], mindspore.float32)
2337
- >>> output = mint.dot(x, y)
2464
+ >>> output = ops.auto_generate.dot(x, y)
2338
2465
  >>> print(output)
2339
2466
  7.0
2340
2467
  >>> print(output.dtype)
@@ -2781,6 +2908,46 @@ def expand_dims(input_x, axis):
2781
2908
  return expand_dims_op(input_x, axis)
2782
2909
 
2783
2910
 
2911
+ def expand_dims_view(input, dim):
2912
+ r"""
2913
+ Adds an additional dimension to `input_x` at the given axis, the dimension
2914
+ of `input_x` should be greater than or equal to 1.
2915
+
2916
+ Note:
2917
+ If the specified axis is a negative number, the index is counted
2918
+ backward from the end and starts at 1.
2919
+
2920
+ Args:
2921
+ input_x (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
2922
+ axis (int): Specifies the dimension index at which to expand
2923
+ the shape of `input_x`. The value of axis must be in the range
2924
+ `[-input_x.ndim-1, input_x.ndim]`. Only constant value is allowed.
2925
+
2926
+ Returns:
2927
+ Tensor, the shape of tensor is :math:`(1, x_1, x_2, ..., x_R)` if the
2928
+ value of `axis` is 0. It has the same data type as `input_x`.
2929
+
2930
+ Raises:
2931
+ TypeError: If `axis` is not an int.
2932
+ ValueError: If `axis` is not in the valid range :math:`[-a.ndim-1, a.ndim]`.
2933
+
2934
+ Supported Platforms:
2935
+ ``Ascend``
2936
+
2937
+ Examples:
2938
+ >>> import mindspore
2939
+ >>> import numpy as np
2940
+ >>> from mindspore import Tensor, ops
2941
+ >>> from mindspore.ops.auto_generate import ExpandDimsView
2942
+ >>> input_tensor = Tensor(np.array([[2, 2], [2, 2]]), mindspore.float32)
2943
+ >>> output = ExpandDimsView()(input_tensor, 0)
2944
+ >>> print(output)
2945
+ [[[2. 2.]
2946
+ [2. 2.]]]
2947
+ """
2948
+ return expand_dims_view_op(input, dim)
2949
+
2950
+
2784
2951
  def expm1(input):
2785
2952
  r"""
2786
2953
  Compute exponential of the input tensor, then minus 1, element-wise.
@@ -2936,7 +3103,7 @@ def fft2(input, s=None, dim=(-2, -1), norm=None):
2936
3103
 
2937
3104
  Note:
2938
3105
  - `fft2` is currently only used in `mindscience` scientific computing scenarios and
2939
- dose not support other usage scenarios.
3106
+ does not support other usage scenarios.
2940
3107
  - `fft2` is not supported on Windows platform yet.
2941
3108
 
2942
3109
  Args:
@@ -3000,7 +3167,7 @@ def fftfreq(n, d=1.0, dtype=None):
3000
3167
 
3001
3168
  Note:
3002
3169
  - `fftfreq` is currently only used in `mindscience` scientific computing scenarios and
3003
- dose not support other usage scenarios.
3170
+ does not support other usage scenarios.
3004
3171
  - `fftfreq` is not supported on Windows platform yet.
3005
3172
 
3006
3173
  Args:
@@ -3033,7 +3200,7 @@ def fftn(input, s=None, dim=None, norm=None):
3033
3200
 
3034
3201
  Note:
3035
3202
  - `fftn` is currently only used in `mindscience` scientific computing scenarios and
3036
- dose not support other usage scenarios.
3203
+ does not support other usage scenarios.
3037
3204
  - `fftn` is not supported on Windows platform yet.
3038
3205
 
3039
3206
  Args:
@@ -3093,7 +3260,7 @@ def fftshift(input, dim=None):
3093
3260
 
3094
3261
  Note:
3095
3262
  - `fftshift` is currently only used in `mindscience` scientific computing scenarios and
3096
- dose not support other usage scenarios.
3263
+ does not support other usage scenarios.
3097
3264
  - `fftshift` is not supported on Windows platform yet.
3098
3265
 
3099
3266
  Args:
@@ -3129,7 +3296,7 @@ def fft(input, n=None, dim=-1, norm=None):
3129
3296
 
3130
3297
  Note:
3131
3298
  - `fft` is currently only used in `mindscience` scientific computing scenarios and
3132
- dose not support other usage scenarios.
3299
+ does not support other usage scenarios.
3133
3300
  - `fft` is not supported on Windows platform yet.
3134
3301
 
3135
3302
  Args:
@@ -3388,6 +3555,13 @@ def frac_ext(input):
3388
3555
  return frac_op(input)
3389
3556
 
3390
3557
 
3558
+ def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5, mapping_num=None, mapping_table=None, enable_expert_mapping=False):
3559
+ r"""
3560
+
3561
+ """
3562
+ return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale, mapping_num, mapping_table, enable_expert_mapping)
3563
+
3564
+
3391
3565
  def gather_d(x, dim, index):
3392
3566
  r"""
3393
3567
  Gathers elements along an axis specified by dim.
@@ -3458,7 +3632,7 @@ def gather(input_params, input_indices, axis, batch_dims=0):
3458
3632
  - The value of input_indices must be in the range of `[0, input_param.shape[axis])`.
3459
3633
  On CPU and GPU, an error is raised if an out of bound indice is found. On Ascend, the results may be
3460
3634
  undefined.
3461
- - The data type of input_params cannot be `mindspore.bool_` .
3635
+ - The data type of input_params cannot be `mindspore.bool` .
3462
3636
  - The shape of returned tensor is :math:`input\_params.shape[:axis] + input\_indices.shape[batch\_dims:] + input\_params.shape[axis + 1:]` .
3463
3637
 
3464
3638
  Args:
@@ -3579,20 +3753,6 @@ def geqrf(input):
3579
3753
  return geqrf_op(input)
3580
3754
 
3581
3755
 
3582
- def gmm_backward(grad, x, weight, group_list=None):
3583
- r"""
3584
-
3585
- """
3586
- return gmm_backward_op(grad, x, weight, group_list)
3587
-
3588
-
3589
- def gmm_v2_backward(grad, x, weight, group_list=None, group_list_type=0):
3590
- r"""
3591
-
3592
- """
3593
- return gmm_v2_backward_op(grad, x, weight, group_list, group_list_type)
3594
-
3595
-
3596
3756
  def greater_equal(input, other):
3597
3757
  r"""
3598
3758
  Compute the value of :math:`input >= other` element-wise.
@@ -3675,7 +3835,7 @@ def hfft2(input, s=None, dim=(-2, -1), norm=None):
3675
3835
 
3676
3836
  Note:
3677
3837
  - `hfft2` is currently only used in `mindscience` scientific computing scenarios and
3678
- dose not support other usage scenarios.
3838
+ does not support other usage scenarios.
3679
3839
  - `hfft2` is not supported on Windows platform yet.
3680
3840
 
3681
3841
  Args:
@@ -3736,7 +3896,7 @@ def hfftn(input, s=None, dim=None, norm=None):
3736
3896
 
3737
3897
  Note:
3738
3898
  - `hfftn` is currently only used in `mindscience` scientific computing scenarios and
3739
- dose not support other usage scenarios.
3899
+ does not support other usage scenarios.
3740
3900
  - `hfftn` is not supported on Windows platform yet.
3741
3901
 
3742
3902
  Args:
@@ -3797,7 +3957,7 @@ def hfft(input, n=None, dim=-1, norm=None):
3797
3957
 
3798
3958
  Note:
3799
3959
  - `hfft` is currently only used in `mindscience` scientific computing scenarios and
3800
- dose not support other usage scenarios.
3960
+ does not support other usage scenarios.
3801
3961
  - `hfft` is not supported on Windows platform yet.
3802
3962
 
3803
3963
  Args:
@@ -4058,7 +4218,7 @@ def ifft2(input, s=None, dim=(-2, -1), norm=None):
4058
4218
 
4059
4219
  Note:
4060
4220
  - `ifft2` is currently only used in `mindscience` scientific computing scenarios and
4061
- dose not support other usage scenarios.
4221
+ does not support other usage scenarios.
4062
4222
  - `ifft2` is not supported on Windows platform yet.
4063
4223
 
4064
4224
  Args:
@@ -4118,7 +4278,7 @@ def ifftn(input, s=None, dim=None, norm=None):
4118
4278
 
4119
4279
  Note:
4120
4280
  - `ifftn` is currently only used in `mindscience` scientific computing scenarios and
4121
- dose not support other usage scenarios.
4281
+ does not support other usage scenarios.
4122
4282
  - `ifftn` is not supported on Windows platform yet.
4123
4283
 
4124
4284
  Args:
@@ -4178,7 +4338,7 @@ def ifftshift(input, dim=None):
4178
4338
 
4179
4339
  Note:
4180
4340
  - `ifftshift` is currently only used in `mindscience` scientific computing scenarios and
4181
- dose not support other usage scenarios.
4341
+ does not support other usage scenarios.
4182
4342
  - `ifftshift` is not supported on Windows platform yet.
4183
4343
 
4184
4344
  Args:
@@ -4214,7 +4374,7 @@ def ifft(input, n=None, dim=-1, norm=None):
4214
4374
 
4215
4375
  Note:
4216
4376
  - `ifft` is currently only used in `mindscience` scientific computing scenarios and
4217
- dose not support other usage scenarios.
4377
+ does not support other usage scenarios.
4218
4378
  - `ifft` is not supported on Windows platform yet.
4219
4379
 
4220
4380
  Args:
@@ -4270,7 +4430,7 @@ def ihfft2(input, s=None, dim=(-2, -1), norm=None):
4270
4430
 
4271
4431
  Note:
4272
4432
  - `ihfft2` is currently only used in `mindscience` scientific computing scenarios and
4273
- dose not support other usage scenarios.
4433
+ does not support other usage scenarios.
4274
4434
  - `ihfft2` is not supported on Windows platform yet.
4275
4435
 
4276
4436
  Args:
@@ -4331,7 +4491,7 @@ def ihfftn(input, s=None, dim=None, norm=None):
4331
4491
 
4332
4492
  Note:
4333
4493
  - `ihfftn` is currently only used in `mindscience` scientific computing scenarios and
4334
- dose not support other usage scenarios.
4494
+ does not support other usage scenarios.
4335
4495
  - `ihfftn` is not supported on Windows platform yet.
4336
4496
 
4337
4497
  Args:
@@ -4392,7 +4552,7 @@ def ihfft(input, n=None, dim=-1, norm=None):
4392
4552
 
4393
4553
  Note:
4394
4554
  - `ihfft` is currently only used in `mindscience` scientific computing scenarios and
4395
- dose not support other usage scenarios.
4555
+ does not support other usage scenarios.
4396
4556
  - `ihfft` is not supported on Windows platform yet.
4397
4557
 
4398
4558
  Args:
@@ -4513,56 +4673,6 @@ def unfold_ext(input, kernel_size, dilation=1, padding=0, stride=1):
4513
4673
  return im2col_ext_op(input, kernel_size, dilation, padding, stride)
4514
4674
 
4515
4675
 
4516
- def index_add_ext(input, dim, index, source, alpha=1):
4517
- r"""
4518
- Accumulate the elements of `alpha` times `source` into the `input` by adding to the index in the order given in `index`. For example, if ``dim == 0`` , ``index[i] == j`` , and ``alpha = -1`` , then the `i` th row of `source` is subtracted from the `j` th row of `input` . The `dim` th dimension of `source` must have the same size as the length of `index` , and all other dimensions must match `input`, or an error will be raised. For a 3-D tensor, the output is defined as follows:
4519
-
4520
- .. math::
4521
- \begin{array}{ll}
4522
- input[index[i],\ :,\ :]\ +=\ alpha * source[i,\ :,\ :] \qquad \#if\ dim == 0 \\
4523
- input[:,\ \ index[i],\ :]\ +=\ alpha * source[:,\ \ i,\ :] \qquad \#if\ dim == 1 \\
4524
- input[:,\ :,\ \ index[i]]\ +=\ alpha * source[:,\ :,\ \ i] \qquad\#if\ dim == 2 \\
4525
- \end{array}
4526
-
4527
- .. warning::
4528
- This is an experimental API that is subject to change or deletion.
4529
-
4530
- Args:
4531
- input (Tensor): The input Tensor.
4532
- dim (int): The dimension along which to index.
4533
- index (Tensor): Add the value of "input Tensor" and `source` along the dimension of the `dim` according to the specified index value, with data type int32. The `index` must be 1D with the same size as the size of `source` in the `dim` dimension. The values of `index` should be in [0, b), where the b is the size of "input Tensor" in the `dim` dimension.
4534
- source (Tensor): The input tensor with the value to add. Must have same data type as "input Tensor". The shape must be the same as "input Tensor" except the `dim` th dimension.
4535
- alpha (number, optional): The scalar multiplier for source. Default: ``1``.
4536
-
4537
- Returns:
4538
- Tensor, has the same shape and dtype as `input`.
4539
-
4540
- Raises:
4541
- TypeError: If neither `index` nor `source` is a Tensor.
4542
- ValueError: If the value of `dim` is out of the dimension range of `source` shape.
4543
- ValueError: If `index` rank is not the same as `source` rank.
4544
- ValueError: If shape of `index` is not 1D or size of `index` is not equal to dimension of source[dim].
4545
- ValueError: If the shape of `source` is not the same as that of `input` except the `dim` axis.
4546
-
4547
- Supported Platforms:
4548
- ``Ascend``
4549
-
4550
- Examples:
4551
- >>> import numpy as np
4552
- >>> import mindspore
4553
- >>> from mindspore import Tensor, ops
4554
- >>> x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
4555
- >>> index = Tensor(np.array([0, 2]), mindspore.int32)
4556
- >>> y = Tensor(np.array([[0.5, 1.0], [1.0, 1.5], [2.0, 2.5]]), mindspore.float32)
4557
- >>> output = ops.auto_generate.index_add_ext(x, 1, index, y, alpha=1)
4558
- >>> print(output)
4559
- [[ 1.5 2. 4. ]
4560
- [ 5. 5. 7.5]
4561
- [ 9. 8. 11.5]]
4562
- """
4563
- return index_add_ext_op(input, dim, index, source, alpha)
4564
-
4565
-
4566
4676
  def index_fill_scalar(input, dim, index, value):
4567
4677
  r"""
4568
4678
 
@@ -4618,7 +4728,7 @@ def index(input, indices):
4618
4728
  [2 6 5]
4619
4729
  >>> input2 = Tensor(np.arange(4 * 3 * 3).reshape(4, 3, 3), mindspore.int32)
4620
4730
  >>> indices3 = Tensor(np.array([1, 0]), mindspore.int32)
4621
- >>> indices4 = Tensor(np.array([1, 1, 0]), mindspore.bool_)
4731
+ >>> indices4 = Tensor(np.array([1, 1, 0]), mindspore.bool)
4622
4732
  >>> output2 = ops.auto_generate.index(input2, [indices3, indices4])
4623
4733
  >>> print(output2)
4624
4734
  [[ 9 10 11]
@@ -4673,6 +4783,13 @@ def index_select_ext(input, dim, index):
4673
4783
  return index_select_op(input, dim, index)
4674
4784
 
4675
4785
 
4786
+ def inner_moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
4787
+ r"""
4788
+
4789
+ """
4790
+ return inner_moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
4791
+
4792
+
4676
4793
  def inplace_adds_ext(input, other, alpha=1):
4677
4794
  r"""
4678
4795
 
@@ -4687,6 +4804,20 @@ def inplace_add_ext(input, other, alpha=1):
4687
4804
  return inplace_add_ext_op(input, other, alpha)
4688
4805
 
4689
4806
 
4807
+ def inplace_bernoulli_scalar(input, p, seed, offset):
4808
+ r"""
4809
+
4810
+ """
4811
+ return inplace_bernoulli_scalar_op(input, p, seed, offset)
4812
+
4813
+
4814
+ def inplace_bernoulli_tensor(input, p, seed, offset):
4815
+ r"""
4816
+
4817
+ """
4818
+ return inplace_bernoulli_tensor_op(input, p, seed, offset)
4819
+
4820
+
4690
4821
  def inplace_clamp_scalar(input, min=None, max=None):
4691
4822
  r"""
4692
4823
 
@@ -4701,11 +4832,11 @@ def inplace_clamp_tensor(input, min=None, max=None):
4701
4832
  return inplace_clamp_tensor_op(input, min, max)
4702
4833
 
4703
4834
 
4704
- def inplace_copy(input, src):
4835
+ def inplace_copy(input, src, non_blocking=False):
4705
4836
  r"""
4706
4837
 
4707
4838
  """
4708
- return inplace_copy_op(input, src)
4839
+ return inplace_copy_op(input, src, non_blocking)
4709
4840
 
4710
4841
 
4711
4842
  def divmod_scalar_(input, other, rounding_mode=None):
@@ -4903,9 +5034,9 @@ def inplace_hardtanh(input, min_val=-1, max_val=1):
4903
5034
 
4904
5035
  Examples:
4905
5036
  >>> import mindspore
4906
- >>> from mindspore import Tensor, mint
5037
+ >>> from mindspore import Tensor, ops
4907
5038
  >>> x = Tensor([-1, -2, 0, 2, 1], mindspore.float16)
4908
- >>> mint.hardtanh_(x, min_val=-1.0, max_val=1.0)
5039
+ >>> ops.auto_generate.inplace_hardtanh(x, min_val=-1.0, max_val=1.0)
4909
5040
  >>> print(x)
4910
5041
  [-1. -1. 0. 1. 1.]
4911
5042
  """
@@ -4980,6 +5111,51 @@ def masked_fill_tensor_(input, mask, value):
4980
5111
  return inplace_masked_fill_tensor_op(input, mask, value)
4981
5112
 
4982
5113
 
5114
+ def matmul_add_(x, weight, C):
5115
+ r"""
5116
+ Fusion Operator of Transpose, Matmul, and InplaceAdd.
5117
+
5118
+ .. warning::
5119
+ - This is an experimental API that is subject to change or deletion.
5120
+ - This API is only supported in Atlas A2 training series for now.
5121
+ - This API is only supported on GRAPH mode.
5122
+
5123
+ Args:
5124
+ x (Tensor): Matrix A in matrix multiplication, with shape :math:`(k, m)` or :math:`(batch, k, m)`,
5125
+ whose type should be float16 or bfloat16.
5126
+ weight (Tensor): Matrix B in matrix multiplication, with shape :math:`(k, n)` or :math:`(batch, k, n)`,
5127
+ whose type should be float16 or bfloat16.
5128
+ C (Tensor): A Tensor acting as both input and output, with type of float32.
5129
+ It's shape should be :math:`(m, n)` or :math:`(batch, m, n)`.
5130
+
5131
+ Returns:
5132
+ Tensor, has the same shape and data type as `C`.
5133
+
5134
+ Raises:
5135
+ TypeError: If the dtype of `weight` is not the same as `x`.
5136
+ ValueError: If the ranks of `x` , `weight` and `C` are not the same.
5137
+
5138
+ Supported Platforms:
5139
+ ``Ascend``
5140
+
5141
+ Examples:
5142
+ >>> import mindspore
5143
+ >>> import numpy as np
5144
+ >>> from mindspore import Tensor, ops, nn, context
5145
+ >>> context.set_context(mode=context.GRAPH_MODE, jit_config={"jit_level": "O0"})
5146
+ >>> class Net(nn.Cell):
5147
+ ... def construct(self, x, weight, C):
5148
+ ... return ops.auto_generate.inplace_matmul_add_op(x, weight, C)
5149
+ >>> x = Tensor(np.random.randn(10, 20), mindspore.float16)
5150
+ >>> weight = Tensor(np.random.randn(10, 8), mindspore.float16)
5151
+ >>> C = Tensor(np.random.randn(20, 8), mindspore.float32)
5152
+ >>> output = Net()(x, weight, C)
5153
+ >>> print(output.shape)
5154
+ (20, 8)
5155
+ """
5156
+ return inplace_matmul_add_op(x, weight, C)
5157
+
5158
+
4983
5159
  def inplace_muls(input, other):
4984
5160
  r"""
4985
5161
 
@@ -5008,21 +5184,67 @@ def inplace_scatter_add(input, dim, index, src):
5008
5184
  return inplace_scatter_add_op(input, dim, index, src)
5009
5185
 
5010
5186
 
5011
- def inplace_stop_gradient(input):
5187
+ def inplace_silu(input):
5012
5188
  r"""
5013
-
5014
- """
5015
- return inplace_stop_gradient_op(input)
5189
+ Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
5016
5190
 
5191
+ .. math::
5017
5192
 
5018
- def sub_tensor_(input, other, alpha=1):
5019
- r"""
5020
-
5021
- """
5022
- return inplace_sub_ext_op(input, other, alpha)
5193
+ \text{SiLU}(x) = x * \sigma(x),
5023
5194
 
5195
+ where :math:`x` is an element of the input, :math:`\sigma(x)` is Sigmoid function.
5024
5196
 
5025
- def sub_scalar_(input, other, alpha=1):
5197
+ .. math::
5198
+
5199
+ \text{sigma}(x_i) = \frac{1}{1 + \exp(-x_i)},
5200
+
5201
+ SiLU Function Graph:
5202
+
5203
+ .. image:: ../images/SiLU.png
5204
+ :align: center
5205
+
5206
+ Args:
5207
+ input (Tensor): `input` is :math:`x` in the preceding formula. Input with the data type
5208
+ float16 or float32.
5209
+ inplace (bool, optional): If it is ``True``, enable the in place update function.
5210
+ Default value: ``False``.
5211
+
5212
+ Returns:
5213
+ Tensor, with the same type and shape as the `input`.
5214
+
5215
+ Raises:
5216
+ TypeError: If dtype of `input` is neither float16 nor float32.
5217
+
5218
+ Supported Platforms:
5219
+ ``Ascend`` ``GPU`` ``CPU``
5220
+
5221
+ Examples:
5222
+ >>> import mindspore
5223
+ >>> from mindspore import Tensor, mint
5224
+ >>> import numpy as np
5225
+ >>> input = Tensor(np.array([-1, 2, -3, 2, -1]), mindspore.float16)
5226
+ >>> output = mint.nn.functional.silu(input, inplace=True)
5227
+ >>> print(output)
5228
+ [-0.269 1.762 -0.1423 1.762 -0.269]
5229
+ """
5230
+ return inplace_silu_op(input)
5231
+
5232
+
5233
+ def inplace_stop_gradient(input):
5234
+ r"""
5235
+
5236
+ """
5237
+ return inplace_stop_gradient_op(input)
5238
+
5239
+
5240
+ def sub_tensor_(input, other, alpha=1):
5241
+ r"""
5242
+
5243
+ """
5244
+ return inplace_sub_ext_op(input, other, alpha)
5245
+
5246
+
5247
+ def sub_scalar_(input, other, alpha=1):
5026
5248
  r"""
5027
5249
 
5028
5250
  """
@@ -5049,9 +5271,6 @@ def inplace_threshold(input, threshold, value):
5049
5271
  \text{value}, &\text{ otherwise }
5050
5272
  \end{cases}
5051
5273
 
5052
- .. warning::
5053
- This is an experimental API that is subject to change or deletion.
5054
-
5055
5274
  Args:
5056
5275
  input (Tensor): The input Tensor.
5057
5276
  threshold (Union[int, float]): The value of the threshold.
@@ -5092,7 +5311,7 @@ def irfft2(input, s=None, dim=(-2, -1), norm=None):
5092
5311
 
5093
5312
  Note:
5094
5313
  - `irfft2` is currently only used in `mindscience` scientific computing scenarios and
5095
- dose not support other usage scenarios.
5314
+ does not support other usage scenarios.
5096
5315
  - `irfft2` is not supported on Windows platform yet.
5097
5316
 
5098
5317
  Args:
@@ -5150,7 +5369,7 @@ def irfftn(input, s=None, dim=None, norm=None):
5150
5369
 
5151
5370
  Note:
5152
5371
  - `irfftn` is currently only used in `mindscience` scientific computing scenarios and
5153
- dose not support other usage scenarios.
5372
+ does not support other usage scenarios.
5154
5373
  - `irfftn` is not supported on Windows platform yet.
5155
5374
 
5156
5375
  Args:
@@ -5209,7 +5428,7 @@ def irfft(input, n=None, dim=-1, norm=None):
5209
5428
 
5210
5429
  Note:
5211
5430
  - `irfft` is currently only used in `mindscience` scientific computing scenarios and
5212
- dose not support other usage scenarios.
5431
+ does not support other usage scenarios.
5213
5432
  - `irfft` is not supported on Windows platform yet.
5214
5433
 
5215
5434
  Args:
@@ -5376,12 +5595,12 @@ def kthvalue(input, k, dim=-1, keepdim=False):
5376
5595
  Examples:
5377
5596
  >>> import mindspore
5378
5597
  >>> import numpy as np
5379
- >>> from mindspore import Tensor, mint
5598
+ >>> from mindspore import Tensor, ops
5380
5599
  >>> input_x = Tensor(np.array([[1.01, 2.02, 3.03], [1.04, 2.05, 3.06]]), mindspore.float32)
5381
- >>> out = mint.kthvalue(input_x, 2, 1, False)
5600
+ >>> out = ops.auto_generate.kthvalue(input_x, 2, 1, False)
5382
5601
  >>> print(out)
5383
5602
  (Tensor(shape=[2], dtype=Float32, value= [ 2.01999998e+00, 2.04999995e+00]), Tensor(shape=[2], dtype=Int64, value= [1, 1]))
5384
- >>> out1 = mint.kthvalue(input_x, 2, 1, True)
5603
+ >>> out1 = ops.auto_generate.kthvalue(input_x, 2, 1, True)
5385
5604
  >>> print(out1)
5386
5605
  (Tensor(shape=[2, 1], dtype=Float32, value=
5387
5606
  [[ 2.01999998e+00],
@@ -5392,6 +5611,13 @@ def kthvalue(input, k, dim=-1, keepdim=False):
5392
5611
  return kthvalue_op(input, k, dim, keepdim)
5393
5612
 
5394
5613
 
5614
+ def kv_scale_cache(key_scale, value_scale, key_value_scale_cache, batch_valid_length, cache_mode):
5615
+ r"""
5616
+
5617
+ """
5618
+ return kv_scale_cache_op(key_scale, value_scale, key_value_scale_cache, batch_valid_length, cache_mode)
5619
+
5620
+
5395
5621
  def l1_loss_ext(input, target, reduction='mean'):
5396
5622
  r"""
5397
5623
  Calculate the mean absolute error between the `input` value and the `target` value.
@@ -5669,9 +5895,9 @@ def linalg_qr(A, mode='reduced'):
5669
5895
  Examples:
5670
5896
  >>> import mindspore
5671
5897
  >>> import numpy as np
5672
- >>> from mindspore import Tensor, mint
5898
+ >>> from mindspore import Tensor, ops
5673
5899
  >>> x = Tensor(np.array([[1.0, 1.0, 2.0, 4.0], [1.0, 1.0, 2.0, 4.0]]), mindspore.float32)
5674
- >>> output = mint.linalg.qr(x)
5900
+ >>> output = ops.auto_generate.linalg_qr(x)
5675
5901
  >>> print(output)
5676
5902
  (Tensor(shape=[2, 2], dtype=Float32, value=
5677
5903
  [[-7.07106829e-01, -7.07106769e-01],
@@ -5713,9 +5939,9 @@ def log10_ext(input):
5713
5939
  Examples:
5714
5940
  >>> import mindspore
5715
5941
  >>> import numpy as np
5716
- >>> from mindspore import Tensor, mint
5942
+ >>> from mindspore import Tensor, ops
5717
5943
  >>> x = Tensor(np.array([3.0, 5.0, 7.0]), mindspore.float32)
5718
- >>> output = mint.log10(x)
5944
+ >>> output = ops.auto_generate.log10_ext(x)
5719
5945
  >>> print(output)
5720
5946
  [0.47712136 0.69897 0.845098 ]
5721
5947
  """
@@ -5775,9 +6001,9 @@ def log2_ext(input):
5775
6001
  Examples:
5776
6002
  >>> import mindspore
5777
6003
  >>> import numpy as np
5778
- >>> from mindspore import Tensor, mint
6004
+ >>> from mindspore import Tensor, ops
5779
6005
  >>> x = Tensor(np.array([3.0, 5.0, 7.0]), mindspore.float32)
5780
- >>> output = mint.log2(x)
6006
+ >>> output = ops.auto_generate.log2_ext(x)
5781
6007
  >>> print(output)
5782
6008
  [1.5849625 2.321928 2.807355 ]
5783
6009
  """
@@ -5810,10 +6036,10 @@ def logaddexp2(input, other):
5810
6036
 
5811
6037
  Examples:
5812
6038
  >>> import numpy as np
5813
- >>> from mindspore import Tensor, mint
6039
+ >>> from mindspore import Tensor, ops
5814
6040
  >>> x1 = Tensor(np.array([1, 2, 3]).astype(np.float16))
5815
6041
  >>> x2 = Tensor(np.array(2).astype(np.float16))
5816
- >>> output = mint.logaddexp2(x1, x2)
6042
+ >>> output = ops.auto_generate.logaddexp2(x1, x2)
5817
6043
  >>> print(output)
5818
6044
  [2.586 3. 3.586]
5819
6045
  """
@@ -6047,7 +6273,7 @@ def masked_fill(input_x, mask, value):
6047
6273
  Examples:
6048
6274
  >>> import mindspore
6049
6275
  >>> input_x = mindspore.tensor([1., 2., 3., 4.], mindspore.float32)
6050
- >>> mask = mindspore.tensor([True, True, False, True], mindspore.bool_)
6276
+ >>> mask = mindspore.tensor([True, True, False, True], mindspore.bool)
6051
6277
  >>> output = mindspore.ops.masked_fill(input_x, mask, 0.5)
6052
6278
  >>> print(output)
6053
6279
  [0.5 0.5 3. 0.5]
@@ -6055,6 +6281,13 @@ def masked_fill(input_x, mask, value):
6055
6281
  return masked_fill_op(input_x, mask, value)
6056
6282
 
6057
6283
 
6284
+ def masked_scatter(input, mask, source):
6285
+ r"""
6286
+
6287
+ """
6288
+ return masked_scatter_op(input, mask, source)
6289
+
6290
+
6058
6291
  def masked_select(input, mask):
6059
6292
  r"""
6060
6293
  Return a new 1-D tensor which indexes the `input` tensor according to the boolean `mask`.
@@ -6074,7 +6307,7 @@ def masked_select(input, mask):
6074
6307
  Examples:
6075
6308
  >>> import mindspore
6076
6309
  >>> x = mindspore.tensor([1, 2, 3, 4], mindspore.int64)
6077
- >>> mask = mindspore.tensor([1, 0, 1, 0], mindspore.bool_)
6310
+ >>> mask = mindspore.tensor([1, 0, 1, 0], mindspore.bool)
6078
6311
  >>> output = mindspore.ops.masked_select(x, mask)
6079
6312
  >>> print(output)
6080
6313
  [1 3]
@@ -6451,6 +6684,20 @@ def mish_ext(input):
6451
6684
  return mish_ext_op(input)
6452
6685
 
6453
6686
 
6687
+ def mla(query, q_rope, kv_cache, k_rope, block_tables, attn_mask=None, deq_scale_qk=None, deq_scale_pv=None, q_seq_lens=None, context_lens=None, head_num=32, scale_value=0.0, kv_head_num=1, mask_mode='MASK_NONE', is_ring=0):
6688
+ r"""
6689
+
6690
+ """
6691
+ return mla_op(query, q_rope, kv_cache, k_rope, block_tables, attn_mask, deq_scale_qk, deq_scale_pv, q_seq_lens, context_lens, head_num, scale_value, kv_head_num, mask_mode, is_ring)
6692
+
6693
+
6694
+ def mla_preprocess(input1, gamma1, beta1, quant_scale1, quant_offset1, wdqkv, bias1, gamma2, beta2, quant_scale2, quant_offset2, gamma3, sin1, cos1, sin2, cos2, key_cache, slot_mapping, wuq, bias2, slot_wuk, de_scale1, de_scale2, ctkv_scale, qnope_scale, krope_cache, param_cache_mode=0):
6695
+ r"""
6696
+
6697
+ """
6698
+ return mla_preprocess_op(input1, gamma1, beta1, quant_scale1, quant_offset1, wdqkv, bias1, gamma2, beta2, quant_scale2, quant_offset2, gamma3, sin1, cos1, sin2, cos2, key_cache, slot_mapping, wuq, bias2, slot_wuk, de_scale1, de_scale2, ctkv_scale, qnope_scale, krope_cache, param_cache_mode)
6699
+
6700
+
6454
6701
  def mm_ext(input, mat2):
6455
6702
  r"""
6456
6703
  Returns the matrix product of two arrays.
@@ -6495,6 +6742,254 @@ def mm_ext(input, mat2):
6495
6742
  return mm_ext_op(input, mat2)
6496
6743
 
6497
6744
 
6745
+ def moe_distribute_combine(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts=None, x_active_mask=None, activate_scale=None, weight_scale=None, group_list=None, expand_scales=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_export_rank_num=0, global_bs=0, out_dtype=0, common_quant_mode=0, group_list_type=0):
6746
+ r"""
6747
+ Parallel communication for Mixture of Experts (MoE). When Tensor Parallelism (TP) communication exists,
6748
+ it first ReduceScatter performs communication followed by Expert Parallelism (EP) AllToAllV communication.
6749
+ Otherwise, only EP AllToAllV communication is performed. Finally multiply the received data by weight and
6750
+ add them up.
6751
+
6752
+ Notes:
6753
+ This function must be used in conjunction with function `moe_distribute_dispatch`.
6754
+ - A: Maximum tokens to dispatch per rank:
6755
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6756
+ - For MoE experts:
6757
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6758
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6759
+ - H (hidden size): Dimension of each token's hidden state
6760
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6761
+ - Ascend 910_93: H = 7168
6762
+ - BS (batch sequence size): Number of tokens processed per rank
6763
+ - Ascend 910B: 0 < BS <= 256
6764
+ - Ascend 910_93: 0 < BS <= 512
6765
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6766
+ - server_num: Number of server nodes (supports 2, 4, 8)
6767
+ - local_expert_num: Number of experts per rank:
6768
+ - Shared expert ranks: local_expert_num = 1
6769
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6770
+ (TP communication not supported when localExpertNum > 1)
6771
+
6772
+ Inputs:
6773
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6774
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6775
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6776
+ Format: ND, non-contiguous allowed.
6777
+ - **expert_idx** (Tensor) - Token counts per expert, it's the output of dispatch operation.
6778
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6779
+ - **ep_send_counts** (Tensor) - Tokens that each EP rank needs to send, it's the output of dispatch operation.
6780
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6781
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6782
+ Format: ND, non-contiguous allowed.
6783
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6784
+ - **ep_world_size** (int) - EP domain size.
6785
+ - Ascend 910B: Supports 16, 32, 64.
6786
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6787
+ - **ep_rank_id** (int) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6788
+ - **moe_expert_num** (int) - Number of MoE experts (0 < moe_expert_num <= 256),
6789
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6790
+ - **tp_send_counts** (Tensor) - Tokens that each TP rank needs to send (when TP exists). It's the output of dispatch operation. Default: ``None``.
6791
+ - Ascend 910B: Not supported.
6792
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6793
+ - **x_active_mask** (Tensor) - Reserved parameter. Default: ``None``.
6794
+ - **activate_scale** (Tensor) - Reserved parameter. Default: ``None``.
6795
+ - **weight_scale** (Tensor) - Reserved parameter. Default: ``None``.
6796
+ - **group_list** (Tensor) - Reserved parameter. Default: ``None``.
6797
+ - **expand_scales** (Tensor) - Output of dispatch operation. Default: ``None``.
6798
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6799
+ - Ascend 910_93: Unsupported.
6800
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp. Default: ``None``.
6801
+ - **group_tp** (str) - TP communication domain name. Default: ``None``.
6802
+ - Ascend 910B: Unsupported (pass empty string).
6803
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6804
+ - **tp_world_size** (int) - TP domain size. Default: ``0``.
6805
+ - Ascend 910B: Unsupported (pass 0).
6806
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6807
+ - **tp_rank_id** (int) - Local rank ID in TP domain. Default: ``0``.
6808
+ - Ascend 910B: Unsupported (pass 0).
6809
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6810
+ - **expert_shard_type** (int) - Shared expert distribution type. Default: ``0``.
6811
+ - Ascend 910B: Unsupported (pass 0).
6812
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6813
+ - **shared_expert_num** (int) - Number of shared experts. Default: ``0``.
6814
+ - Ascend 910B: Unsupported (pass 0).
6815
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6816
+ - **shared_expert_rank_num** (int) - Number of ranks hosting shared experts. Default: ``0``.
6817
+ - Ascend 910B: Unsupported (pass 0).
6818
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6819
+ - **global_bs** (int) - Global batch size across EP domain. Default: ``0``.
6820
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6821
+ - Ascend 910_93: 0 or BS*ep_world_size.
6822
+ - **out_dtype** (int) - Specify the type of output x. Reserved parameter (pass 0 in current version). Default: ``0``.
6823
+ - **common_quant_mode** (int) - Communication quantification type. Reserved parameter (pass 0 in current version). Default: ``0``.
6824
+ - **group_list_type** (int) - The format of group_list. Reserved parameter (pass 0 in current version). Default: ``0``.
6825
+
6826
+ Outputs:
6827
+ - **x** (Tensor) - Processed tokens. 2D tensor [BS, H] with dtype matching input `expand_x`.
6828
+
6829
+ Raises:
6830
+ TypeError: If input dtypes don't match specifications.
6831
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6832
+ RuntimeError: If communication domain configuration is invalid.
6833
+
6834
+ Supported Platforms:
6835
+ ``Ascend``
6836
+
6837
+ Examples:
6838
+ >>> # EP-only communication example (Ascend 910B)
6839
+ >>> import mindspore as ms
6840
+ >>> from mindspore import Tensor
6841
+ >>> from mindspore import ops
6842
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6843
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch, moe_distribute_combine
6844
+ >>> import numpy as np
6845
+ >>> bs = 8
6846
+ >>> h = 7168
6847
+ >>> k = 8
6848
+ >>> ep_world_size = 16
6849
+ >>> moe_expert_num = 16
6850
+ >>> global_bs = bs * ep_world_size
6851
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6852
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6853
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6854
+ >>> init()
6855
+ >>> rank_id = get_rank()
6856
+ >>> expand_x, _, expand_idx, _, ep_recv_count, _, expand_scale = moe_distribute_dispatch(
6857
+ ... x, expert_ids, expert_scales, ep_world_size, rank_id, moe_expert_num,
6858
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6859
+ >>> out_x = moe_distribute_combine(
6860
+ ... expand_x, expert_ids, expand_idx, ep_recv_count, expert_scales, ep_world_size, rank_id,
6861
+ ... moe_expert_num, group_ep=GlobalComm.WORLD_COMM_GROUP)
6862
+ >>> print(out_x.shape)
6863
+ (8, 7168)
6864
+ """
6865
+ return moe_distribute_combine_op(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts, x_active_mask, activate_scale, weight_scale, group_list, expand_scales, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_export_rank_num, global_bs, out_dtype, common_quant_mode, group_list_type)
6866
+
6867
+
6868
+ def moe_distribute_dispatch(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales=None, scales=None, x_active_mask=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_expert_rank_num=0, quant_mode=0, global_bs=0, expert_token_nums_type=0):
6869
+ r"""
6870
+ Performs token data quantization (optional) and parallel communication for Mixture of Experts (MoE).
6871
+ When Tensor Parallelism (TP) communication exists, it first performs Expert Parallelism (EP) AllToAllV
6872
+ communication followed by TP AllGatherV communication. Otherwise, only EP AllToAllV communication is performed.
6873
+
6874
+ Notes:
6875
+ - A: Maximum tokens to dispatch per rank:
6876
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6877
+ - For MoE experts:
6878
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6879
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6880
+ - H (hidden size): Dimension of each token's hidden state
6881
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6882
+ - Ascend 910_93: H = 7168
6883
+ - BS (batch sequence size): Number of tokens processed per rank
6884
+ - Ascend 910B: 0 < BS <= 256
6885
+ - Ascend 910_93: 0 < BS <= 512
6886
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6887
+ - server_num: Number of server nodes (supports 2, 4, 8)
6888
+ - local_expert_num: Number of experts per rank:
6889
+ - Shared expert ranks: local_expert_num = 1
6890
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6891
+ (TP communication not supported when localExpertNum > 1)
6892
+
6893
+ Inputs:
6894
+ - **x** (Tensor) - Input token data to be sent. 2D tensor with shape [BS, H].
6895
+ Supported dtypes: float16, bfloat16. Format: ND, non-contiguous allowed.
6896
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6897
+ Format: ND, non-contiguous allowed.
6898
+ - **ep_world_size** (int64) - EP domain size.
6899
+ - Ascend 910B: Supports 16, 32, 64.
6900
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6901
+ - **ep_rank_id** (int64) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6902
+ - **moe_expert_num** (int64) - Number of MoE experts (0 < moe_expert_num <= 256),
6903
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6904
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6905
+ - Ascend 910B: 2D float32 tensor [BS, K], ND format, non-contiguous allowed.
6906
+ - Ascend 910_93: Unsupported (pass nullptr).
6907
+ - **scales** (Tensor) - Expert weights. 2D float32 tensor with shape [shared_expert_num + moe_expert_num, H].
6908
+ Pass nullptr for non-quantized scenarios. Format: ND, non-contiguous allowed.
6909
+ Note: On Ascend 910B, must be nullptr when HCCL_INTRA_PCIE_ENABLE=1 and HCCL_INTRA_ROCE_ENABLE=0.
6910
+ - **x_active_mask** (Tensor) - Reserved parameter (pass nullptr in current version).
6911
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp.
6912
+ - **group_tp** (str) - TP communication domain name.
6913
+ - Ascend 910B: Unsupported (pass empty string).
6914
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6915
+ - **tp_world_size** (int64) - TP domain size.
6916
+ - Ascend 910B: Unsupported (pass 0).
6917
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6918
+ - **tp_rank_id** (int64) - Local rank ID in TP domain.
6919
+ - Ascend 910B: Unsupported (pass 0).
6920
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6921
+ - **expert_shard_type** (int64) - Shared expert distribution type.
6922
+ - Ascend 910B: Unsupported (pass 0).
6923
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6924
+ - **shared_expert_num** (int64) - Number of shared experts.
6925
+ - Ascend 910B: Unsupported (pass 0).
6926
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6927
+ - **shared_expert_rank_num** (int64) - Number of ranks hosting shared experts.
6928
+ - Ascend 910B: Unsupported (pass 0).
6929
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6930
+ - **quant_mode** (int64) - Quantization mode: 0 (none), 2 (dynamic quantization).
6931
+ - **global_bs** (int64) - Global batch size across EP domain.
6932
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6933
+ - Ascend 910_93: 0 or BS*ep_world_size.
6934
+ - **expert_token_nums_type** (int64) - Semantic meaning of expert_token_nums output:
6935
+ 0 (prefix sums), 1 (raw counts).
6936
+
6937
+ Outputs:
6938
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6939
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6940
+ - **dynamic_scales** (Tensor) - Dynamic quantization scales (when quant_mode=2).
6941
+ 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6942
+ - **expand_idx** (Tensor) - Token counts per expert for combine operation.
6943
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6944
+ - **expert_token_nums** (Tensor) - Tokens received per expert.
6945
+ 1D int64 tensor [local_expert_num]. Format: ND, non-contiguous allowed.
6946
+ - **ep_recv_counts** (Tensor) - Tokens received from each EP rank.
6947
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6948
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6949
+ Format: ND, non-contiguous allowed.
6950
+ - **tp_recv_counts** (Tensor) - Tokens received from each TP rank (when TP exists).
6951
+ - Ascend 910B: Not supported.
6952
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6953
+ - **expand_scales** (Tensor) - Output token weights for combine operation.
6954
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6955
+ - Ascend 910_93: Unsupported.
6956
+
6957
+ Raises:
6958
+ TypeError: If input dtypes don't match specifications.
6959
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6960
+ RuntimeError: If communication domain configuration is invalid.
6961
+
6962
+ Supported Platforms:
6963
+ ``Ascend``
6964
+
6965
+ Examples:
6966
+ >>> # EP-only communication example (Ascend 910B)
6967
+ >>> import mindspore as ms
6968
+ >>> from mindspore import Tensor
6969
+ >>> from mindspore import ops
6970
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6971
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch
6972
+ >>> import numpy as np
6973
+ >>> bs = 8
6974
+ >>> h = 7168
6975
+ >>> k = 8
6976
+ >>> ep_world_size = 16
6977
+ >>> moe_expert_num = 16
6978
+ >>> global_bs = bs * ep_world_size
6979
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6980
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6981
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6982
+ >>> init()
6983
+ >>> rank_id = get_rank()
6984
+ >>> out = moe_distribute_dispatch(
6985
+ ... x, expert_ids, ep_world_size, rank_id, moe_expert_num, expert_scales=expert_scales,
6986
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6987
+ >>> print(out[0].shape) # expand_x
6988
+ (128, 7168)
6989
+ """
6990
+ return moe_distribute_dispatch_op(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales, scales, x_active_mask, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_expert_rank_num, quant_mode, global_bs, expert_token_nums_type)
6991
+
6992
+
6498
6993
  def moe_token_permute_grad(permuted_tokens_grad, sorted_indices, num_topk=1, padded_mode=False):
6499
6994
  r"""
6500
6995
 
@@ -6508,11 +7003,10 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6508
7003
 
6509
7004
  .. warning::
6510
7005
  - It is only supported on Atlas A2 Training Series Products.
6511
- - The input `tokens` only supports the bfloat16 data type in the current version.
6512
7006
  - When `indices` is 2-D, the size of the second dim must be less than or equal to 512.
6513
7007
 
6514
7008
  Args:
6515
- tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16.
7009
+ tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16, float16 or float32.
6516
7010
  The shape is :math:`(num\_tokens, hidden\_size)` , where `num_tokens` and `hidden_size` are positive integers.
6517
7011
  indices (Tensor): The tensor specifies indices used to permute the tokens. The dtype is int32 or int64.
6518
7012
  The shape is :math:`(num\_tokens, topk)` or :math:`(num\_tokens,)`, where `num_tokens` and `topk` are positive integers.
@@ -6528,7 +7022,6 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6528
7022
 
6529
7023
  Raises:
6530
7024
  TypeError: If `tokens` or `indices` is not a Tensor.
6531
- TypeError: If dtype of `tokens` is not bfloat16.
6532
7025
  TypeError: If dtype of `indices` is not int32 or int64.
6533
7026
  TypeError: If specified `num_out_tokens` is not an integer.
6534
7027
  TypeError: If specified `padded_mode` is not a bool.
@@ -6570,60 +7063,6 @@ def moe_token_unpermute_grad(permuted_tokens, unpermuted_tokens_grad, sorted_ind
6570
7063
  return moe_token_unpermute_grad_op(permuted_tokens, unpermuted_tokens_grad, sorted_indices, probs, padded_mode, restore_shape)
6571
7064
 
6572
7065
 
6573
- def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
6574
- r"""
6575
- Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
6576
-
6577
- .. warning::
6578
- - It is only supported on Atlas A2 Training Series Products.
6579
- - The inputs `permuted_tokens` and `probs` only support the bfloat16 data type in the current version.
6580
- - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
6581
-
6582
- Args:
6583
- permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
6584
- The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
6585
- sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
6586
- The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
6587
- It only supports the int32 data type.
6588
- probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
6589
- If provided, the unpermuted tokens will be merged with their respective probabilities.
6590
- The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
6591
- padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
6592
- restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
6593
-
6594
- Returns:
6595
- Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
6596
- If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
6597
-
6598
- Raises:
6599
- TypeError: If `permuted_tokens` is not a Tensor.
6600
- ValueError: Only supported when `padded_mode` is ``False``.
6601
-
6602
- Supported Platforms:
6603
- ``Ascend``
6604
-
6605
- Examples:
6606
- >>> import mindspore
6607
- >>> from mindspore import Tensor, ops
6608
- >>> permuted_token = Tensor([
6609
- ... [1, 1, 1],
6610
- ... [0, 0, 0],
6611
- ... [0, 0, 0],
6612
- ... [3, 3, 3],
6613
- ... [2, 2, 2],
6614
- ... [1, 1, 1],
6615
- ... [2, 2, 2],
6616
- ... [3, 3, 3]], dtype=mindspore.bfloat16)
6617
- >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
6618
- >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
6619
- >>> out.shape
6620
- (8, 3)
6621
-
6622
-
6623
- """
6624
- return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
6625
-
6626
-
6627
7066
  def mse_loss_ext(input, target, reduction='mean'):
6628
7067
  r"""
6629
7068
  Calculates the mean squared error between the predicted value and the label value.
@@ -6687,7 +7126,7 @@ def mul(input, other):
6687
7126
  - When the two inputs have different shapes,
6688
7127
  they must be able to broadcast to a common shape.
6689
7128
  - The two inputs can not be bool type at the same time,
6690
- [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
7129
+ [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
6691
7130
  - Support implicit type conversion and type promotion.
6692
7131
 
6693
7132
  Args:
@@ -6746,10 +7185,10 @@ def mv(input, vec):
6746
7185
  Examples:
6747
7186
  >>> import mindspore
6748
7187
  >>> import numpy as np
6749
- >>> from mindspore import Tensor, mint
7188
+ >>> from mindspore import Tensor, ops
6750
7189
  >>> input = Tensor(np.array([[3., 4.], [1., 6.], [1., 3.]]).astype(np.float32))
6751
7190
  >>> vec = Tensor(np.array([1., 2.]).astype(np.float32))
6752
- >>> output = mint.mv(input, vec)
7191
+ >>> output = ops.auto_generate.mv(input, vec)
6753
7192
  >>> print(output)
6754
7193
  [11. 13. 7.]
6755
7194
  """
@@ -6822,14 +7261,14 @@ def narrow(input, dim, start, length):
6822
7261
 
6823
7262
  Examples:
6824
7263
  >>> import mindspore
6825
- >>> from mindspore import mint
7264
+ >>> from mindspore import ops
6826
7265
  >>> from mindspore import Tensor
6827
7266
  >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32)
6828
- >>> output = mint.narrow(x, 0, 0, 2)
7267
+ >>> output = ops.auto_generate.narrow(x, 0, 0, 2)
6829
7268
  >>> print(output)
6830
7269
  [[ 1 2 3]
6831
7270
  [ 4 5 6]]
6832
- >>> output = mint.narrow(x, 1, 1, 2)
7271
+ >>> output = ops.auto_generate.narrow(x, 1, 1, 2)
6833
7272
  >>> print(output)
6834
7273
  [[ 2 3]
6835
7274
  [ 5 6]
@@ -6838,6 +7277,47 @@ def narrow(input, dim, start, length):
6838
7277
  return narrow_op(input, dim, start, length)
6839
7278
 
6840
7279
 
7280
+ def narrow_view(input, dim, start, length):
7281
+ r"""
7282
+ Obtains a tensor of a specified length at a specified start position along a specified axis.
7283
+
7284
+ Args:
7285
+ input (Tensor): the tensor to narrow.
7286
+ dim (int): the axis along which to narrow.
7287
+ start (Union[int, Tensor[int]]): the starting dimension.
7288
+ length (int): the distance to the ending dimension.
7289
+
7290
+ Returns:
7291
+ output (Tensors) - The narrowed tensor.
7292
+
7293
+ Raises:
7294
+ ValueError: the rank of `input` is 0.
7295
+ ValueError: the value of `dim` is out the range [-input.ndim, input.ndim).
7296
+ ValueError: the value of `start` is out the range [-input.shape[dim], input.shape[dim]].
7297
+ ValueError: the value of `length` is out the range [0, input.shape[dim]-start].
7298
+
7299
+ Supported Platforms:
7300
+ ``Ascend``
7301
+
7302
+ Examples:
7303
+ >>> import mindspore
7304
+ >>> from mindspore import ops
7305
+ >>> from mindspore.ops.auto_generate import NarrowView
7306
+ >>> from mindspore import Tensor
7307
+ >>> x = Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], mindspore.int32)
7308
+ >>> output = NarrowView()(x, 0, 0, 2)
7309
+ >>> print(output)
7310
+ [[ 1 2 3]
7311
+ [ 4 5 6]]
7312
+ >>> output = NarrowView()(x, 1, 1, 2)
7313
+ >>> print(output)
7314
+ [[ 2 3]
7315
+ [ 5 6]
7316
+ [ 8 9]]
7317
+ """
7318
+ return narrow_view_op(input, dim, start, length)
7319
+
7320
+
6841
7321
  def neg(input):
6842
7322
  r"""
6843
7323
  Returns a tensor with negative values of the input tensor element-wise.
@@ -7042,8 +7522,10 @@ def prelu(input, weight):
7042
7522
  :align: center
7043
7523
 
7044
7524
  .. note::
7045
- Channel dim is the 2nd dim of input. When input has dims < 2, then there is
7046
- no channel dim and the number of channels = 1.
7525
+ - Channel dim is the 2nd dim of input. When input has dims < 2, then there is
7526
+ no channel dim and the number of channels = 1.
7527
+ - In GE mode, the rank of the input tensor must be greater than 1;
7528
+ otherwise, an error will be triggered.
7047
7529
 
7048
7530
  Args:
7049
7531
  input (Tensor): The input Tensor of the activation function.
@@ -7156,65 +7638,6 @@ def prod_ext(input, dim=None, keepdim=False, dtype=None):
7156
7638
  return prod_ext_op(input, dim, keepdim, dtype)
7157
7639
 
7158
7640
 
7159
- def prompt_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, align_mode='LEFT'):
7160
- r"""
7161
- The PromptKVCache is used for prefill the KVCache of transformer network.
7162
-
7163
- Args:
7164
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
7165
- When format is BHSD, cache tensor of shape
7166
- :math:`(cache\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7167
- When format is BSD, cache tensor of shape
7168
- :math:`(cache\_batch\_size, max\_seq\_length, hidden\_size)`.
7169
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
7170
- When format is BHSD, cache tensor of shape
7171
- :math:`(update\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7172
- When format is BSD, cache tensor of shape
7173
- :math:`(update\_batch\_size, max\_seq\_length, hidden\_size)`.
7174
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
7175
- Valid_seq_len tensor of shape :math:`(update\_batch\_size)`.
7176
- batch_index (Tensor): The batch_index tensor with data type of int64.
7177
- Batch_index tensor of shape :math:`(update\_batch\_size)`. Indicate that which batch of cache tensor is going to be update.
7178
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
7179
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7180
- New_max_seq_len tensor of shape :math:`(1)`.
7181
- Indicate that user want to change the shape of cache tensor from
7182
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
7183
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
7184
- to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
7185
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7186
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
7187
- align_mode (str): indicate which axis is seq_len. Default: left.
7188
-
7189
-
7190
- Outputs:
7191
- With same data type and same shape as `cache` tensor.
7192
-
7193
- Supported Platforms:
7194
- ``Ascend``
7195
-
7196
- Examples:
7197
- >>> from mindspore import Tensor
7198
- >>> from mindspore.ops.operations import _inner_ops
7199
- >>> b = 4
7200
- >>> h = 40
7201
- >>> max_s = 1024
7202
- >>> s = 256
7203
- >>> d = 128
7204
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
7205
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
7206
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=ub).astype(np.int64))
7207
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=ub, replace=False).astype(np.int64))
7208
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7209
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7210
- >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
7211
- >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
7212
- >>> print(cache)
7213
- """
7214
- prompt_k_v_cache_op = _get_cache_prim(PromptKVCache)(align_mode)
7215
- return prompt_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
7216
-
7217
-
7218
7641
  def randperm(n, seed=0, offset=0, dtype=mstype.int64):
7219
7642
  r"""
7220
7643
  Generates random permutation of integers from 0 to n-1.
@@ -7550,7 +7973,7 @@ def rfft2(input, s=None, dim=(-2, -1), norm=None):
7550
7973
 
7551
7974
  Note:
7552
7975
  - `rfft2` is currently only used in `mindscience` scientific computing scenarios and
7553
- dose not support other usage scenarios.
7976
+ does not support other usage scenarios.
7554
7977
  - `rfft2` is not supported on Windows platform yet.
7555
7978
 
7556
7979
  Args:
@@ -7611,7 +8034,7 @@ def rfftfreq(n, d=1.0, dtype=None):
7611
8034
 
7612
8035
  Note:
7613
8036
  - `rfftfreq` is currently only used in `mindscience` scientific computing scenarios and
7614
- dose not support other usage scenarios.
8037
+ does not support other usage scenarios.
7615
8038
  - `rfftfreq` is not supported on Windows platform yet.
7616
8039
 
7617
8040
  Args:
@@ -7644,7 +8067,7 @@ def rfftn(input, s=None, dim=None, norm=None):
7644
8067
 
7645
8068
  Note:
7646
8069
  - `rfftn` is currently only used in `mindscience` scientific computing scenarios and
7647
- dose not support other usage scenarios.
8070
+ does not support other usage scenarios.
7648
8071
  - `rfftn` is not supported on Windows platform yet.
7649
8072
 
7650
8073
  Args:
@@ -7704,7 +8127,7 @@ def rfft(input, n=None, dim=-1, norm=None):
7704
8127
 
7705
8128
  Note:
7706
8129
  - `rfft` is currently only used in `mindscience` scientific computing scenarios and
7707
- dose not support other usage scenarios.
8130
+ does not support other usage scenarios.
7708
8131
  - `rfft` is not supported on Windows platform yet.
7709
8132
 
7710
8133
  Args:
@@ -7747,6 +8170,78 @@ def rfft(input, n=None, dim=-1, norm=None):
7747
8170
  return rfft_op(input, n, dim, norm)
7748
8171
 
7749
8172
 
8173
+ def ring_attention_update(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen=None, layout='SBH'):
8174
+ r"""
8175
+ The RingAttentionUpdate operator updates the output of two FlashAttention operations based on their respective softmax max and softmax sum values.
8176
+
8177
+ - S: Sequence length
8178
+ - B: Batch dimension
8179
+ - H: Hidden layer size, equals to N * D
8180
+ - T: time, equals to B*S
8181
+ - N: Number of attention heads
8182
+ - D: Head dimension
8183
+
8184
+ .. warning::
8185
+ - It is only supported on Atlas A2 Training Series Products.
8186
+ - This is an experimental API that is subject to change or deletion.
8187
+ - When `layout` is ``"TND"``, the last dimension of `prev_attn_out` must be a multiple of 64.
8188
+ - When `layout` is ``"TND"``, `actual_seq_qlen` is mandatory.
8189
+ - When `layout` is ``"TND"``, N x D must satisfy the constraint:
8190
+ (AlignUp(NxD, 64)x(DataSizex6+8))+(AlignUp(Nx8, 64)x56) <= 192x1024.
8191
+ DataSize is 4 bytes when `prev_attn_out` dtype is float32, 2 bytes when dtype is float16 / bfloat16.
8192
+ - When `layout` is ``"TND"``, if `actual_seq_qlen` is not a non-decreasing sequence from 0 to T, the result is undefined.
8193
+
8194
+ Args:
8195
+ prev_attn_out (Tensor): Output of the first FlashAttention operation. The dtype is float16, float32, bfloat16.
8196
+ The shape is :math:`(S, B, H)` or :math:`(T, N, D)`.
8197
+ prev_softmax_max (Tensor): The max values from the first FlashAttention softmax computation. The dtype float32.
8198
+ The shape is :math:`(B, N, S, 8)` or :math:`(T, N, 8)`. The last dimension contains 8 identical values, which must be positive.
8199
+ prev_softmax_sum (Tensor): The sum values from the first FlashAttention softmax computation.
8200
+ It has the same shape and dtype as `prev_softmax_max`.
8201
+ cur_attn_out (Tensor): Output of the second FlashAttention operation. It has the same shape and dtype as `prev_attn_out`.
8202
+ cur_softmax_max (Tensor): The max values from the second FlashAttention softmax computation. It has the same shape and dtype as `prev_softmax_max`.
8203
+ cur_softmax_sum (Tensor):The sum values from the second FlashAttention softmax computation. It has the same shape and dtype as `prev_softmax_max`.
8204
+ actual_seq_qlen (Tensor, optional): Cumulative sequence length, starting from 0. Required if `layout` is ``"TND"``. Does not take effect if `layout` is ``"SBH"``.
8205
+ The tensor must be 1D and contain non-decreasing integer values starting from 0 to T. Default: ``None``.
8206
+ layout (str, optional): Indicates the input layout, currently support ``"TND"`` and ``"SBH"``. Default: ``"SBH"``.
8207
+
8208
+ Returns:
8209
+ tuple (Tensor), tuple of 3 tensors.
8210
+
8211
+ - **attn_out** (Tensor) - The updated attention out, with the same shape and dtype as `prev_attn_out`.
8212
+ - **softmax_max** (Tensor) - The updated softmax max values, with the same shape and dtype as `prev_softmax_max`.
8213
+ - **softmax_sum** (Tensor) - The updated softmax sum values, with the same shape and dtype as `prev_softmax_max`.
8214
+
8215
+ Raises:
8216
+ RuntimeError: If `layout` is ``"TND"``, and `prev_attn_out`'s last dimension is not aligned to 64.
8217
+ RuntimeError: If `layout` is ``"TND"``, and `actual_seq_qlen` is not provided.
8218
+ RuntimeError: If `layout` is ``"TND"``, and `actual_seq_qlen` is not a non-decreasing sequence from 0 to T.
8219
+ RuntimeError: If `layout` is ``"TND"``, and `prev_attn_out` exceeds the size constraints.
8220
+
8221
+ Supported Platforms:
8222
+ ``Ascend``
8223
+
8224
+ Examples:
8225
+ >>> import numpy as np
8226
+ >>> import mindspore
8227
+ >>> from mindspore import Tensor, ops
8228
+ >>> np.random.seed(123)
8229
+ >>> S, B, H, N= 4, 6, 16, 8
8230
+ >>> prev_attn_out = np.random.uniform(-1.0, 1.0, size=(S, B, H)).astype(np.float32)
8231
+ >>> prev_softmax_max = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8232
+ >>> prev_softmax_sum = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8233
+ >>> cur_attn_out = np.random.uniform(-1.0, 1.0, size=(S, B, H)).astype(np.float32)
8234
+ >>> cur_softmax_max = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8235
+ >>> cur_softmax_sum = np.random.uniform(-1.0, 1.0, size=(B, N, S, 8)).astype(np.float32)
8236
+ >>> inputs_np = [prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum]
8237
+ >>> inputs_ms = [Tensor(item) for item in inputs_np]
8238
+ >>> out = ops.ring_attention_update(*inputs_ms)
8239
+ >>> print(out[0].shape)
8240
+ (4, 6, 16)
8241
+ """
8242
+ return ring_attention_update_op(prev_attn_out, prev_softmax_max, prev_softmax_sum, cur_attn_out, cur_softmax_max, cur_softmax_sum, actual_seq_qlen, layout)
8243
+
8244
+
7750
8245
  def rms_norm(x, gamma, epsilon=1e-6):
7751
8246
  r"""
7752
8247
  The RmsNorm(Root Mean Square Layer Normalization) operator is a normalization operation. Compared to
@@ -7942,7 +8437,7 @@ def scalar_cast(input_x, input_y):
7942
8437
 
7943
8438
  Args:
7944
8439
  input_x (scalar): The input scalar. Only constant value is allowed.
7945
- input_y (mindspore.dtype): The type to be cast. Only constant value is allowed. And the value should only be mindspore.int64, mindspore.float64, or mindspore.bool_.
8440
+ input_y (mindspore.dtype): The type to be cast. Only constant value is allowed. And the value should only be mindspore.int64, mindspore.float64, or mindspore.bool.
7946
8441
 
7947
8442
  Returns:
7948
8443
  Scalar. The type is the same as the python type corresponding to `input_y`.
@@ -8077,7 +8572,7 @@ def select_v2(condition, input, other):
8077
8572
  return select_v2_op(condition, input, other)
8078
8573
 
8079
8574
 
8080
- def select_ext(input, dim, index):
8575
+ def select_ext_view(input, dim, index):
8081
8576
  r"""
8082
8577
  Slices the input tensor along the selected dimension at the given index.
8083
8578
 
@@ -8099,15 +8594,14 @@ def select_ext(input, dim, index):
8099
8594
  ``Ascend``
8100
8595
 
8101
8596
  Examples:
8102
- >>> import mindspore
8103
- >>> from mindspore import Tensor, mint
8597
+ >>> from mindspore import Tensor, ops
8104
8598
  >>> input = Tensor([[2, 3, 4, 5],[3, 2, 4, 5]])
8105
- >>> y = mint.select(input, 0, 0)
8599
+ >>> y = ops.auto_generate.select_ext_view(input, 0, 0)
8106
8600
  >>> print(y)
8107
8601
  [2 3 4 5]
8108
8602
 
8109
8603
  """
8110
- return select_ext_op(input, dim, index)
8604
+ return select_ext_view_op(input, dim, index)
8111
8605
 
8112
8606
 
8113
8607
  def select(condition, input, other):
@@ -8195,13 +8689,13 @@ def selu_ext(input):
8195
8689
 
8196
8690
  Examples:
8197
8691
  >>> import mindspore
8198
- >>> from mindspore import Tensor, mint
8692
+ >>> from mindspore import Tensor, ops
8199
8693
  >>> import numpy as np
8200
8694
  >>> input = Tensor(np.array([[-1.0, 4.0, -8.0], [2.0, -5.0, 9.0]]), mindspore.float32)
8201
- >>> output = mint.nn.functional.selu(input)
8695
+ >>> output = ops.auto_generate.selu_ext(input)
8202
8696
  >>> print(output)
8203
8697
  [[-1.1113307 4.202804 -1.7575096]
8204
- [ 2.101402 -1.7462534 9.456309 ]]
8698
+ [ 2.101402 -1.7462534 9.456309 ]]
8205
8699
  """
8206
8700
  return selu_ext_op(input)
8207
8701
 
@@ -8453,6 +8947,58 @@ def sin(input):
8453
8947
  return sin_op(input)
8454
8948
 
8455
8949
 
8950
+ def smooth_l1_loss(prediction, target, beta=1.0, reduction='none'):
8951
+ r"""
8952
+ Calculate the smooth L1 loss, and the L1 loss function has robustness.
8953
+
8954
+ Refer to :func:`mindspore.ops.smooth_l1_loss` for more details.
8955
+
8956
+ .. warning::
8957
+ This API has poor performance on CPU and it is recommended to run it on the Ascend/GPU.
8958
+
8959
+ Args:
8960
+ beta (number, optional): A parameter used to control the point where the function will change between
8961
+ L1 to L2 loss. Default: ``1.0`` .
8962
+
8963
+ - Ascend: The value should be equal to or greater than zero.
8964
+ - CPU/GPU: The value should be greater than zero.
8965
+ reduction (str, optional): Apply specific reduction method to the output: ``'none'`` , ``'mean'`` ,
8966
+ ``'sum'`` . Default: ``'none'`` .
8967
+
8968
+ - ``'none'``: no reduction will be applied.
8969
+ - ``'mean'``: compute and return the mean of elements in the output.
8970
+ - ``'sum'``: the output elements will be summed.
8971
+
8972
+ Inputs:
8973
+ - **logits** (Tensor) - Input Tensor of any dimension. Supported dtypes:
8974
+
8975
+ - Ascend: float16, float32, bfloat16.
8976
+ - CPU/GPU: float16, float32, float64.
8977
+ - **labels** (Tensor) - Ground truth data.
8978
+
8979
+ - CPU/Ascend: has the same shape as the `logits`, `logits` and `labels` comply with the implicit type conversion rules to make the data types consistent.
8980
+ - GPU: has the same shape and dtype as the `logits`.
8981
+
8982
+ Outputs:
8983
+ Tensor, if `reduction` is ``'none'``, then output is a tensor with the same shape as `logits`. Otherwise the shape of output tensor is :math:`()`.
8984
+
8985
+ Supported Platforms:
8986
+ ``Ascend`` ``GPU`` ``CPU``
8987
+
8988
+ Examples:
8989
+ >>> import mindspore
8990
+ >>> import numpy as np
8991
+ >>> from mindspore import Tensor, ops
8992
+ >>> loss = ops.SmoothL1Loss()
8993
+ >>> logits = Tensor(np.array([1, 2, 3]), mindspore.float32)
8994
+ >>> labels = Tensor(np.array([1, 2, 2]), mindspore.float32)
8995
+ >>> output = loss(logits, labels)
8996
+ >>> print(output)
8997
+ [0. 0. 0.5]
8998
+ """
8999
+ return smooth_l1_loss_impl(prediction, target, beta, reduction)
9000
+
9001
+
8456
9002
  def softplus_ext(input, beta=1, threshold=20):
8457
9003
  r"""
8458
9004
  Applies softplus function to `input` element-wise.
@@ -8634,7 +9180,7 @@ def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False):
8634
9180
 
8635
9181
  Note:
8636
9182
  - `solve_triangular` is currently only used in `mindscience` scientific computing scenarios and
8637
- dose not support other usage scenarios.
9183
+ does not support other usage scenarios.
8638
9184
  - `solve_triangular` is not supported on Windows platform yet.
8639
9185
 
8640
9186
  Args:
@@ -8912,11 +9458,11 @@ def sub_ext(input, other, alpha=1):
8912
9458
  input (Union[Tensor, number.Number, bool]): The first input is a number.Number or
8913
9459
  a bool or a tensor whose data type is
8914
9460
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
8915
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
9461
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
8916
9462
  other (Union[Tensor, number.Number, bool]): The second input, is a number.Number or
8917
9463
  a bool or a tensor whose data type is
8918
9464
  `number <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_ or
8919
- `bool_ <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
9465
+ `bool <https://www.mindspore.cn/docs/en/master/api_python/mindspore/mindspore.dtype.html>`_.
8920
9466
  alpha (number.Number): A scaling factor applied to `other`, default 1.
8921
9467
 
8922
9468
  Returns:
@@ -8961,7 +9507,7 @@ def sub(input, other):
8961
9507
  Note:
8962
9508
  - When the two inputs have different shapes, they must be able to broadcast to a common shape.
8963
9509
  - The two inputs can not be bool type at the same time,
8964
- [True, Tensor(True, bool\_), Tensor(np.array([True]), bool\_)] are all considered bool type.
9510
+ [True, Tensor(True), Tensor(np.array([True]))] are all considered bool type.
8965
9511
  - Support implicit type conversion and type promotion.
8966
9512
 
8967
9513
  Args:
@@ -9065,9 +9611,6 @@ def swiglu(input, dim=-1):
9065
9611
  Computes SwiGLU (Swish-Gated Linear Unit activation function) of input tensor.
9066
9612
  SwiGLU is a variant of the :class:`mindspore.ops.GLU` activation function, it is defined as:
9067
9613
 
9068
- .. warning::
9069
- This is an experimental API that is subject to change or deletion.
9070
-
9071
9614
  .. math::
9072
9615
  {SwiGLU}(a, b)= Swish(a) \otimes b
9073
9616
 
@@ -9075,6 +9618,9 @@ def swiglu(input, dim=-1):
9075
9618
  Swish(a)=a :math:`\sigma` (a), :math:`\sigma` is the :func:`mindspore.ops.sigmoid` activation function
9076
9619
  and :math:`\otimes` is the Hadamard product.
9077
9620
 
9621
+ .. warning::
9622
+ Only support on Atlas A2 training series.
9623
+
9078
9624
  Args:
9079
9625
  input (Tensor): Tensor to be split. It has shape :math:`(\ast_1, N, \ast_2)`
9080
9626
  where `*` means, any number of additional dimensions. :math:`N` must be divisible by 2.
@@ -9307,6 +9853,30 @@ def topk_ext(input, k, dim=-1, largest=True, sorted=True):
9307
9853
  return topk_ext_op(input, k, dim, largest, sorted)
9308
9854
 
9309
9855
 
9856
+ def topprouter(input, capacity, expert_num, drop_type=0, threshold=0.0, router_prob=0.0):
9857
+ r"""
9858
+ TopPRouter implementation in MOE.
9859
+
9860
+ Inputs:
9861
+ - **x** (Tensor) - Input Tensor of 3D, supporting types:[int32, int64]
9862
+ - **capacity** (Int64) - The maximum number of tokens each expert can handle.
9863
+ - **expert_num** (Int64) - The number of expert.
9864
+ - **drop_type** (Int64) - S-Drop/K-Drop, 0 means S-Drop, 1 means K-Drop, default 0.
9865
+ - **threshold** (float32) - Expert threshold, default 0.
9866
+ - **router_prob** (Tensor) - Topk prob Tensor of 2D, supporting types:[float32], default 0.
9867
+
9868
+ Outputs:
9869
+ tuple(Tensor), tuple of 2 tensors, `dispatch_index` and `combine_inex`.
9870
+
9871
+ - dispatch_index (Tensor) - Token ID processed by each expert.
9872
+ - combine_index (Tensor) - The combine index of each token.
9873
+
9874
+ Supported Platforms:
9875
+ ``Ascend``
9876
+ """
9877
+ return topprouter_op(input, capacity, expert_num, drop_type, threshold, router_prob)
9878
+
9879
+
9310
9880
  def trace_ext(input):
9311
9881
  r"""
9312
9882
  Returns a new tensor that is the sum of the `input` main trace.
@@ -9372,7 +9942,7 @@ def trace(input):
9372
9942
  return trace_op(input)
9373
9943
 
9374
9944
 
9375
- def transpose_ext(input, dim0, dim1):
9945
+ def transpose_ext_view(input, dim0, dim1):
9376
9946
  r"""
9377
9947
  Interchange two axes of a tensor.
9378
9948
 
@@ -9397,14 +9967,13 @@ def transpose_ext(input, dim0, dim1):
9397
9967
 
9398
9968
  Examples:
9399
9969
  >>> import numpy as np
9400
- >>> from mindspore import mint
9401
- >>> from mindspore import Tensor
9402
- >>> input = Tensor(np.ones((2,3,4), dtype=np.float32))
9403
- >>> output = mint.transpose(input, 0, 2)
9970
+ >>> from mindspore import Tensor, ops
9971
+ >>> input = Tensor(np.ones((2, 3, 4), dtype=np.float32))
9972
+ >>> output = ops.auto_generate.transpose_ext_view(input, 0, 2)
9404
9973
  >>> print(output.shape)
9405
9974
  (4, 3, 2)
9406
9975
  """
9407
- return transpose_ext_op(input, dim0, dim1)
9976
+ return transpose_ext_view_op(input, dim0, dim1)
9408
9977
 
9409
9978
 
9410
9979
  def transpose(input, input_perm):
@@ -9440,6 +10009,57 @@ def transpose(input, input_perm):
9440
10009
  return transpose_op(input, input_perm)
9441
10010
 
9442
10011
 
10012
+ def transpose_view(input, input_perm):
10013
+ r"""
10014
+ Permutes the dimensions of the input tensor according to input permutation.
10015
+
10016
+ For a 1-D array this has no effect, as a transposed vector is simply the same vector.
10017
+ To convert a 1-D array into a 2D column vector please refer to :func:`mindspore.ops.expand_dims`.
10018
+ For a 2-D array, this is a standard matrix transpose. For an n-D array, if axes are given,
10019
+ their order indicates how the axes are permuted (see Examples).
10020
+ If axes are not provided and a.shape is :math:`(i[0], i[1], ... i[n-2], i[n-1])`,
10021
+ then a.transpose().shape is :math:`(i[n-1], i[n-2], ... i[1], i[0])`.
10022
+
10023
+ Note:
10024
+ On GPU and CPU, if the value of `input_perm` is negative, its actual value is `input_perm[i] + rank(input)`.
10025
+ Negative value of `input_perm` is not supported on Ascend.
10026
+
10027
+ Args:
10028
+ input (Tensor): The shape of tensor is :math:`(x_1, x_2, ..., x_R)`.
10029
+ input_perm (tuple[int]): The permutation to be converted. The elements in `input_perm` are composed of
10030
+ the indexes of each dimension of `input`. The length of `input_perm` and the shape of `input` must be
10031
+ the same. Only constant value is allowed. Must be in the range [-rank(input), rank(input)).
10032
+
10033
+ Returns:
10034
+ Tensor, the type of output tensor is the same as `input` and the shape of output tensor is decided by the
10035
+ shape of `input` and the value of `input_perm`.
10036
+
10037
+ Raises:
10038
+ TypeError: If `input_perm` is not a tuple.
10039
+ ValueError: If length of shape of `input` is not equal to length of shape of `input_perm`.
10040
+ ValueError: If the same element exists in `input_perm`.
10041
+
10042
+ Supported Platforms:
10043
+ ``Ascend``
10044
+
10045
+ Examples:
10046
+ >>> import mindspore
10047
+ >>> import numpy as np
10048
+ >>> from mindspore import Tensor, ops
10049
+ >>> input = Tensor(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]), mindspore.float32)
10050
+ >>> input_perm = (0, 2, 1)
10051
+ >>> output = ops.TransposeView()(input, input_perm)
10052
+ >>> print(output)
10053
+ [[[ 1. 4.]
10054
+ [ 2. 5.]
10055
+ [ 3. 6.]]
10056
+ [[ 7. 10.]
10057
+ [ 8. 11.]
10058
+ [ 9. 12.]]]
10059
+ """
10060
+ return transpose_view_op(input, input_perm)
10061
+
10062
+
9443
10063
  def triangular_solve(b, A, upper=True, transpose=False, unitriangular=False):
9444
10064
  r"""
9445
10065
  Solves a system of equations with a square upper or lower triangular invertible matrix `A` and multiple right-hand sides `b`.
@@ -10278,8 +10898,68 @@ def quant_batch_matmul(x1, x2, scale, offset=None, bias=None, pertokenScaleOptio
10278
10898
  return quant_batch_matmul_impl(x1, x2, scale, offset, bias, pertokenScaleOptional, transpose_x1, transpose_x2, dtype)
10279
10899
 
10280
10900
 
10901
+ def quant_matmul(x1, x2, scale, offset=None, pertoken_scale=None, bias=None, output_dtype=None, x1_dtype=None, x2_dtype=None, pertoken_scale_dtype=None, scale_dtype=None, group_sizes=None):
10902
+ r"""
10903
+
10904
+ """
10905
+ return quant_matmul_op(x1, x2, scale, offset, pertoken_scale, bias, output_dtype, x1_dtype, x2_dtype, pertoken_scale_dtype, scale_dtype, group_sizes)
10906
+
10907
+
10281
10908
  def weight_quant_batch_matmul(x, weight, antiquant_scale, antiquant_offset=None, quant_scale=None, quant_offset=None, bias=None, transpose_x=False, transpose_weight=False, antiquant_group_size=0):
10282
10909
  r"""
10283
10910
 
10284
10911
  """
10285
10912
  return weight_quant_batch_matmul_impl(x, weight, antiquant_scale, antiquant_offset, quant_scale, quant_offset, bias, transpose_x, transpose_weight, antiquant_group_size)
10913
+
10914
+
10915
+ def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
10916
+ r"""
10917
+ Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
10918
+
10919
+ .. warning::
10920
+ - It is only supported on Atlas A2 Training Series Products.
10921
+ - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
10922
+
10923
+ Args:
10924
+ permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
10925
+ The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
10926
+ sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
10927
+ The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
10928
+ It only supports the int32 data type.
10929
+ probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
10930
+ If provided, the unpermuted tokens will be merged with their respective probabilities.
10931
+ The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
10932
+ padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
10933
+ restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
10934
+
10935
+ Returns:
10936
+ Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
10937
+ If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
10938
+
10939
+ Raises:
10940
+ TypeError: If `permuted_tokens` is not a Tensor.
10941
+ ValueError: Only supported when `padded_mode` is ``False``.
10942
+
10943
+ Supported Platforms:
10944
+ ``Ascend``
10945
+
10946
+ Examples:
10947
+ >>> import mindspore
10948
+ >>> from mindspore import Tensor, ops
10949
+ >>> permuted_token = Tensor([
10950
+ ... [1, 1, 1],
10951
+ ... [0, 0, 0],
10952
+ ... [0, 0, 0],
10953
+ ... [3, 3, 3],
10954
+ ... [2, 2, 2],
10955
+ ... [1, 1, 1],
10956
+ ... [2, 2, 2],
10957
+ ... [3, 3, 3]], dtype=mindspore.bfloat16)
10958
+ >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
10959
+ >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
10960
+ >>> out.shape
10961
+ (8, 3)
10962
+
10963
+
10964
+ """
10965
+ return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)