mindspore 2.6.0__cp311-cp311-win_amd64.whl → 2.7.0rc1__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mindspore might be problematic. Click here for more details.

Files changed (403) hide show
  1. mindspore/.commit_id +1 -1
  2. mindspore/Microsoft.VisualStudio.Telemetry.dll +0 -0
  3. mindspore/Newtonsoft.Json.dll +0 -0
  4. mindspore/__init__.py +1 -1
  5. mindspore/_c_dataengine.cp311-win_amd64.pyd +0 -0
  6. mindspore/_c_expression.cp311-win_amd64.pyd +0 -0
  7. mindspore/_c_mindrecord.cp311-win_amd64.pyd +0 -0
  8. mindspore/_checkparam.py +40 -9
  9. mindspore/{_deprecated → _extends/optimize}/__init__.py +9 -3
  10. mindspore/_extends/optimize/cell_utils.py +96 -0
  11. mindspore/_extends/parse/__init__.py +2 -2
  12. mindspore/_extends/parse/compile_config.py +44 -22
  13. mindspore/_extends/parse/deprecated/deprecated_tensor_method.py +1 -1
  14. mindspore/_extends/parse/parser.py +36 -61
  15. mindspore/_extends/parse/resources.py +39 -0
  16. mindspore/_extends/parse/standard_method.py +32 -13
  17. mindspore/_extends/parse/trope.py +8 -1
  18. mindspore/_extends/pijit/__init__.py +1 -2
  19. mindspore/amp.py +4 -4
  20. mindspore/atlprov.dll +0 -0
  21. mindspore/avcodec-59.dll +0 -0
  22. mindspore/avdevice-59.dll +0 -0
  23. mindspore/avfilter-8.dll +0 -0
  24. mindspore/avformat-59.dll +0 -0
  25. mindspore/avutil-57.dll +0 -0
  26. mindspore/boost/adasum.py +1 -1
  27. mindspore/boost/boost_cell_wrapper.py +4 -4
  28. mindspore/c1.dll +0 -0
  29. mindspore/c1xx.dll +0 -0
  30. mindspore/c2.dll +0 -0
  31. mindspore/common/__init__.py +27 -2
  32. mindspore/common/_grad_function.py +2 -1
  33. mindspore/common/_pijit_context.py +28 -7
  34. mindspore/common/_stub_tensor.py +1 -209
  35. mindspore/common/_tensor_cpp_method.py +1 -1
  36. mindspore/common/_tensor_docs.py +76 -15
  37. mindspore/common/api.py +193 -112
  38. mindspore/common/dtype.py +21 -11
  39. mindspore/common/dump.py +10 -15
  40. mindspore/common/generator.py +2 -3
  41. mindspore/common/hook_handle.py +11 -2
  42. mindspore/common/jit_config.py +1 -1
  43. mindspore/common/jit_trace.py +84 -105
  44. mindspore/common/parameter.py +26 -12
  45. mindspore/common/recompute.py +3 -3
  46. mindspore/common/sparse_tensor.py +0 -3
  47. mindspore/common/symbol.py +0 -1
  48. mindspore/common/tensor.py +48 -83
  49. mindspore/communication/_comm_helper.py +46 -4
  50. mindspore/communication/management.py +79 -7
  51. mindspore/context.py +38 -23
  52. mindspore/dataset/core/config.py +3 -3
  53. mindspore/dataset/engine/datasets.py +20 -7
  54. mindspore/dataset/engine/datasets_user_defined.py +32 -2
  55. mindspore/dataset/engine/iterators.py +2 -2
  56. mindspore/dataset/engine/obs/config_loader.py +2 -2
  57. mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +8 -0
  58. mindspore/dataset/transforms/py_transforms.py +7 -3
  59. mindspore/dataset/transforms/transforms.py +7 -3
  60. mindspore/dataset/vision/validators.py +1 -0
  61. mindspore/device_context/ascend/device.py +1 -1
  62. mindspore/device_context/gpu/__init__.py +2 -2
  63. mindspore/device_context/gpu/device.py +1 -1
  64. mindspore/device_context/gpu/op_precision.py +4 -2
  65. mindspore/device_context/gpu/op_tuning.py +6 -3
  66. mindspore/device_manager.py +16 -9
  67. mindspore/dnnl.dll +0 -0
  68. mindspore/dpcmi.dll +0 -0
  69. mindspore/experimental/llm_boost/ascend_native/llama_boost_ascend_native.py +3 -5
  70. mindspore/experimental/llm_boost/atb/boost_base.py +2 -3
  71. mindspore/experimental/optim/adadelta.py +13 -20
  72. mindspore/experimental/optim/adagrad.py +15 -22
  73. mindspore/experimental/optim/adam.py +17 -24
  74. mindspore/experimental/optim/adamax.py +14 -22
  75. mindspore/experimental/optim/adamw.py +28 -34
  76. mindspore/experimental/optim/asgd.py +15 -25
  77. mindspore/experimental/optim/lr_scheduler.py +27 -45
  78. mindspore/experimental/optim/nadam.py +14 -24
  79. mindspore/experimental/optim/optimizer.py +13 -23
  80. mindspore/experimental/optim/radam.py +18 -24
  81. mindspore/experimental/optim/rmsprop.py +14 -25
  82. mindspore/experimental/optim/rprop.py +15 -26
  83. mindspore/experimental/optim/sgd.py +9 -19
  84. mindspore/hal/__init__.py +4 -4
  85. mindspore/hal/contiguous_tensors_handle.py +2 -2
  86. mindspore/hal/memory.py +1 -0
  87. mindspore/include/api/cell.h +37 -1
  88. mindspore/include/api/delegate.h +10 -0
  89. mindspore/include/api/model.h +3 -0
  90. mindspore/include/api/types.h +2 -2
  91. mindspore/include/c_api/model_c.h +0 -58
  92. mindspore/include/c_api/tensor_c.h +0 -26
  93. mindspore/include/dataset/vision_ascend.h +1 -1
  94. mindspore/jpeg62.dll +0 -0
  95. mindspore/mindrecord/tools/cifar10.py +60 -11
  96. mindspore/mindrecord/tools/cifar10_to_mr.py +5 -0
  97. mindspore/mindspore_backend_common.dll +0 -0
  98. mindspore/mindspore_backend_manager.dll +0 -0
  99. mindspore/mindspore_common.dll +0 -0
  100. mindspore/mindspore_core.dll +0 -0
  101. mindspore/mindspore_cpu_res_manager.dll +0 -0
  102. mindspore/mindspore_dump.dll +0 -0
  103. mindspore/mindspore_frontend.dll +0 -0
  104. mindspore/mindspore_glog.dll +0 -0
  105. mindspore/mindspore_memory_pool.dll +0 -0
  106. mindspore/mindspore_ms_backend.dll +0 -0
  107. mindspore/mindspore_ops.dll +0 -0
  108. mindspore/mindspore_ops_host.dll +0 -0
  109. mindspore/mindspore_ops_kernel_common.dll +0 -0
  110. mindspore/mindspore_profiler.dll +0 -0
  111. mindspore/mindspore_pyboost.dll +0 -0
  112. mindspore/mindspore_pynative.dll +0 -0
  113. mindspore/mindspore_res_manager.dll +0 -0
  114. mindspore/mindspore_runtime_pipeline.dll +0 -0
  115. mindspore/mint/__init__.py +4 -44
  116. mindspore/mint/distributed/__init__.py +1 -0
  117. mindspore/mint/distributed/distributed.py +208 -5
  118. mindspore/mint/nn/__init__.py +1 -1
  119. mindspore/mint/nn/functional.py +53 -6
  120. mindspore/mint/nn/layer/_functions.py +164 -294
  121. mindspore/mint/nn/layer/activation.py +8 -6
  122. mindspore/mint/nn/layer/conv.py +122 -98
  123. mindspore/mint/nn/layer/normalization.py +8 -22
  124. mindspore/mint/optim/adam.py +19 -18
  125. mindspore/mint/optim/adamw.py +14 -8
  126. mindspore/mint/optim/sgd.py +5 -5
  127. mindspore/msobj140.dll +0 -0
  128. mindspore/mspdb140.dll +0 -0
  129. mindspore/mspdbcore.dll +0 -0
  130. mindspore/mspdbst.dll +0 -0
  131. mindspore/mspft140.dll +0 -0
  132. mindspore/msvcdis140.dll +0 -0
  133. mindspore/msvcp140_1.dll +0 -0
  134. mindspore/msvcp140_2.dll +0 -0
  135. mindspore/msvcp140_atomic_wait.dll +0 -0
  136. mindspore/msvcp140_codecvt_ids.dll +0 -0
  137. mindspore/nn/cell.py +325 -499
  138. mindspore/nn/grad/cell_grad.py +11 -12
  139. mindspore/nn/layer/activation.py +32 -34
  140. mindspore/nn/layer/basic.py +67 -64
  141. mindspore/nn/layer/channel_shuffle.py +4 -4
  142. mindspore/nn/layer/combined.py +4 -2
  143. mindspore/nn/layer/conv.py +86 -85
  144. mindspore/nn/layer/dense.py +9 -7
  145. mindspore/nn/layer/embedding.py +50 -52
  146. mindspore/nn/layer/image.py +37 -39
  147. mindspore/nn/layer/math.py +111 -112
  148. mindspore/nn/layer/normalization.py +56 -44
  149. mindspore/nn/layer/pooling.py +58 -63
  150. mindspore/nn/layer/rnn_cells.py +33 -33
  151. mindspore/nn/layer/rnns.py +56 -56
  152. mindspore/nn/layer/thor_layer.py +74 -73
  153. mindspore/nn/layer/transformer.py +11 -1
  154. mindspore/nn/learning_rate_schedule.py +20 -20
  155. mindspore/nn/loss/loss.py +79 -81
  156. mindspore/nn/optim/adam.py +1 -1
  157. mindspore/nn/optim/adasum.py +2 -2
  158. mindspore/nn/optim/optimizer.py +1 -1
  159. mindspore/nn/optim/thor.py +2 -2
  160. mindspore/nn/probability/distribution/exponential.py +2 -1
  161. mindspore/nn/probability/distribution/poisson.py +2 -1
  162. mindspore/nn/sparse/sparse.py +3 -3
  163. mindspore/nn/wrap/cell_wrapper.py +34 -37
  164. mindspore/nn/wrap/grad_reducer.py +37 -37
  165. mindspore/nn/wrap/loss_scale.py +72 -74
  166. mindspore/numpy/array_creations.py +5 -5
  167. mindspore/numpy/fft.py +1 -1
  168. mindspore/numpy/math_ops.py +1 -1
  169. mindspore/opencv_core452.dll +0 -0
  170. mindspore/opencv_imgcodecs452.dll +0 -0
  171. mindspore/opencv_imgproc452.dll +0 -0
  172. mindspore/ops/_grad_experimental/grad_comm_ops.py +51 -13
  173. mindspore/ops/_grad_experimental/grad_debug_ops.py +14 -0
  174. mindspore/ops/_vmap/vmap_array_ops.py +6 -13
  175. mindspore/ops/_vmap/vmap_nn_ops.py +8 -16
  176. mindspore/ops/auto_generate/cpp_create_prim_instance_helper.py +17 -8
  177. mindspore/ops/auto_generate/gen_extend_func.py +1 -51
  178. mindspore/ops/auto_generate/gen_ops_def.py +463 -257
  179. mindspore/ops/auto_generate/gen_ops_prim.py +1127 -885
  180. mindspore/ops/auto_generate/pyboost_inner_prim.py +31 -1
  181. mindspore/ops/composite/__init__.py +10 -0
  182. mindspore/ops/composite/base.py +8 -4
  183. mindspore/ops/composite/multitype_ops/__init__.py +12 -1
  184. mindspore/ops/composite/multitype_ops/_compile_utils.py +132 -108
  185. mindspore/ops/composite/multitype_ops/add_impl.py +70 -2
  186. mindspore/ops/composite/multitype_ops/div_impl.py +49 -0
  187. mindspore/ops/composite/multitype_ops/floordiv_impl.py +29 -0
  188. mindspore/ops/composite/multitype_ops/getitem_impl.py +11 -0
  189. mindspore/ops/composite/multitype_ops/mod_impl.py +5 -3
  190. mindspore/ops/composite/multitype_ops/mul_impl.py +49 -0
  191. mindspore/ops/composite/multitype_ops/setitem_impl.py +57 -0
  192. mindspore/ops/composite/multitype_ops/sub_impl.py +34 -0
  193. mindspore/ops/composite/multitype_ops/zeros_like_impl.py +14 -0
  194. mindspore/ops/function/__init__.py +3 -1
  195. mindspore/ops/function/_add_attr_func.py +11 -6
  196. mindspore/ops/function/array_func.py +7 -94
  197. mindspore/ops/function/debug_func.py +4 -3
  198. mindspore/ops/function/grad/grad_func.py +1 -1
  199. mindspore/ops/function/math_func.py +21 -367
  200. mindspore/ops/function/nn_func.py +26 -41
  201. mindspore/ops/function/other_func.py +4 -1
  202. mindspore/ops/function/random_func.py +31 -4
  203. mindspore/ops/functional.py +0 -2
  204. mindspore/ops/functional_overload.py +463 -6
  205. mindspore/ops/op_info_register.py +21 -0
  206. mindspore/ops/operations/__init__.py +5 -2
  207. mindspore/ops/operations/_custom_ops_utils.py +675 -8
  208. mindspore/ops/operations/_inner_ops.py +3 -6
  209. mindspore/ops/operations/_sequence_ops.py +1 -1
  210. mindspore/ops/operations/comm_ops.py +185 -26
  211. mindspore/ops/operations/custom_ops.py +235 -172
  212. mindspore/ops/operations/debug_ops.py +55 -4
  213. mindspore/ops/operations/image_ops.py +13 -13
  214. mindspore/ops/operations/manually_defined/ops_def.py +15 -16
  215. mindspore/ops/operations/math_ops.py +3 -4
  216. mindspore/ops/operations/nn_ops.py +5 -6
  217. mindspore/ops/primitive.py +6 -10
  218. mindspore/ops/tensor_method.py +36 -4
  219. mindspore/ops_generate/api/cpp_create_prim_instance_helper_generator.py +1 -1
  220. mindspore/ops_generate/api/functional_map_cpp_generator.py +10 -9
  221. mindspore/ops_generate/api/functions_cc_generator.py +58 -10
  222. mindspore/ops_generate/api/tensor_func_reg_cpp_generator.py +1 -1
  223. mindspore/ops_generate/common/base_generator.py +14 -0
  224. mindspore/ops_generate/common/gen_constants.py +7 -2
  225. mindspore/ops_generate/common/gen_utils.py +0 -19
  226. mindspore/ops_generate/common/op_proto.py +11 -4
  227. mindspore/ops_generate/common/template.py +88 -11
  228. mindspore/ops_generate/gen_ops.py +1 -1
  229. mindspore/ops_generate/op_def/lite_ops_cpp_generator.py +4 -4
  230. mindspore/ops_generate/op_def/ops_name_h_generator.py +0 -3
  231. mindspore/ops_generate/op_def/ops_primitive_h_generator.py +0 -4
  232. mindspore/ops_generate/op_def_py/op_prim_py_generator.py +5 -2
  233. mindspore/ops_generate/pyboost/auto_grad_impl_cc_generator.py +49 -8
  234. mindspore/ops_generate/pyboost/auto_grad_reg_cc_generator.py +2 -2
  235. mindspore/ops_generate/pyboost/gen_pyboost_func.py +31 -0
  236. mindspore/ops_generate/pyboost/op_template_parser.py +98 -72
  237. mindspore/ops_generate/pyboost/pyboost_functions_cpp_generator.py +70 -273
  238. mindspore/ops_generate/pyboost/pyboost_functions_h_generator.py +14 -6
  239. mindspore/ops_generate/pyboost/pyboost_functions_impl_cpp_generator.py +316 -0
  240. mindspore/ops_generate/pyboost/pyboost_functions_py_generator.py +1 -1
  241. mindspore/ops_generate/pyboost/pyboost_grad_function_cpp_generator.py +5 -3
  242. mindspore/ops_generate/pyboost/pyboost_inner_prim_generator.py +1 -1
  243. mindspore/ops_generate/pyboost/pyboost_internal_functions_cpp_generator.py +76 -0
  244. mindspore/ops_generate/pyboost/pyboost_internal_functions_h_generator.py +76 -0
  245. mindspore/ops_generate/pyboost/pyboost_internal_kernel_info_adapter_generator.py +125 -0
  246. mindspore/ops_generate/pyboost/pyboost_native_grad_functions_generator.py +4 -3
  247. mindspore/ops_generate/pyboost/pyboost_op_cpp_code_generator.py +348 -61
  248. mindspore/ops_generate/pyboost/pyboost_overload_functions_cpp_generator.py +1 -1
  249. mindspore/ops_generate/pyboost/pyboost_utils.py +118 -9
  250. mindspore/ops_generate/tensor_py_cc_generator.py +1 -24
  251. mindspore/parallel/_auto_parallel_context.py +4 -2
  252. mindspore/parallel/_cell_wrapper.py +106 -40
  253. mindspore/parallel/_parallel_serialization.py +1 -1
  254. mindspore/parallel/_ps_context.py +4 -6
  255. mindspore/parallel/_tensor.py +167 -12
  256. mindspore/parallel/_transformer/moe.py +1 -1
  257. mindspore/parallel/_transformer/transformer.py +13 -8
  258. mindspore/parallel/auto_parallel.py +12 -5
  259. mindspore/parallel/checkpoint_convert.py +3 -3
  260. mindspore/parallel/checkpoint_transform.py +3 -1
  261. mindspore/parallel/cluster/process_entity/_api.py +84 -48
  262. mindspore/parallel/cluster/process_entity/_utils.py +95 -7
  263. mindspore/parallel/cluster/run.py +43 -4
  264. mindspore/parallel/function/__init__.py +8 -1
  265. mindspore/parallel/function/reshard_func.py +1 -1
  266. mindspore/parallel/nn/__init__.py +15 -2
  267. mindspore/parallel/nn/parallel_cell_wrapper.py +9 -10
  268. mindspore/parallel/nn/parallel_grad_reducer.py +7 -6
  269. mindspore/parallel/shard.py +2 -2
  270. mindspore/parallel/transform_safetensors.py +462 -174
  271. mindspore/pgodb140.dll +0 -0
  272. mindspore/pgort140.dll +0 -0
  273. mindspore/profiler/__init__.py +2 -1
  274. mindspore/profiler/analysis/parser/timeline_assembly_factory/ascend_timeline_assembler.py +7 -7
  275. mindspore/profiler/analysis/parser/timeline_assembly_factory/base_timeline_assembler.py +3 -0
  276. mindspore/profiler/analysis/parser/timeline_assembly_factory/trace_view_container.py +3 -0
  277. mindspore/profiler/analysis/parser/timeline_creator/cpu_op_timeline_creator.py +3 -3
  278. mindspore/profiler/analysis/parser/timeline_creator/fwk_timeline_creator.py +3 -3
  279. mindspore/profiler/analysis/parser/timeline_creator/msprof_timeline_creator.py +4 -4
  280. mindspore/profiler/analysis/parser/timeline_creator/scope_layer_timeline_creator.py +3 -3
  281. mindspore/profiler/analysis/parser/timeline_event/fwk_event.py +4 -1
  282. mindspore/profiler/analysis/parser/timeline_event/timeline_event_pool.py +2 -1
  283. mindspore/profiler/analysis/task_manager.py +1 -1
  284. mindspore/profiler/analysis/viewer/ascend_communication_viewer.py +5 -1
  285. mindspore/profiler/analysis/viewer/ascend_integrate_viewer.py +2 -1
  286. mindspore/profiler/analysis/viewer/ascend_op_memory_viewer.py +42 -22
  287. mindspore/profiler/analysis/viewer/ascend_step_trace_time_viewer.py +3 -2
  288. mindspore/profiler/analysis/viewer/ms_minddata_viewer.py +9 -5
  289. mindspore/profiler/analysis/viewer/ms_operator_details_viewer.py +132 -0
  290. mindspore/profiler/common/constant.py +16 -0
  291. mindspore/profiler/common/profiler_context.py +25 -27
  292. mindspore/profiler/common/profiler_info.py +0 -16
  293. mindspore/profiler/common/profiler_op_analyse.py +235 -0
  294. mindspore/profiler/common/profiler_output_path.py +23 -8
  295. mindspore/profiler/common/profiler_parameters.py +128 -35
  296. mindspore/profiler/dynamic_profile/__init__.py +0 -0
  297. mindspore/profiler/dynamic_profile/dynamic_monitor_proxy.py +39 -0
  298. mindspore/profiler/dynamic_profile/dynamic_profiler_config_context.py +666 -0
  299. mindspore/profiler/dynamic_profile/dynamic_profiler_utils.py +62 -0
  300. mindspore/profiler/dynamic_profiler.py +305 -314
  301. mindspore/profiler/envprofiler.py +12 -7
  302. mindspore/profiler/experimental_config.py +96 -6
  303. mindspore/profiler/mstx.py +33 -12
  304. mindspore/profiler/platform/__init__.py +2 -3
  305. mindspore/profiler/platform/npu_profiler.py +29 -19
  306. mindspore/profiler/profiler.py +35 -19
  307. mindspore/profiler/profiler_action_controller.py +64 -76
  308. mindspore/profiler/schedule.py +10 -4
  309. mindspore/rewrite/common/config.py +1 -0
  310. mindspore/rewrite/common/namer.py +1 -0
  311. mindspore/rewrite/common/namespace.py +1 -0
  312. mindspore/rewrite/node/node.py +31 -11
  313. mindspore/rewrite/parsers/assign_parser.py +1 -1
  314. mindspore/rewrite/symbol_tree/symbol_tree.py +1 -1
  315. mindspore/run_check/_check_version.py +7 -10
  316. mindspore/runtime/__init__.py +5 -5
  317. mindspore/runtime/event.py +10 -4
  318. mindspore/runtime/executor.py +60 -45
  319. mindspore/runtime/memory.py +21 -30
  320. mindspore/runtime/thread_bind_core.py +298 -164
  321. mindspore/safeguard/rewrite_obfuscation.py +12 -13
  322. mindspore/swresample-4.dll +0 -0
  323. mindspore/swscale-6.dll +0 -0
  324. mindspore/tbbmalloc.dll +0 -0
  325. mindspore/tinyxml2.dll +0 -0
  326. mindspore/train/_utils.py +6 -2
  327. mindspore/train/amp.py +43 -20
  328. mindspore/train/callback/__init__.py +5 -5
  329. mindspore/train/callback/_checkpoint.py +3 -6
  330. mindspore/train/callback/_flops_collector.py +1 -1
  331. mindspore/train/callback/_landscape.py +0 -1
  332. mindspore/train/callback/_train_fault_tolerance.py +71 -13
  333. mindspore/train/data_sink.py +11 -2
  334. mindspore/train/dataset_helper.py +9 -0
  335. mindspore/train/model.py +51 -33
  336. mindspore/train/serialization.py +133 -111
  337. mindspore/train/summary/summary_record.py +13 -2
  338. mindspore/turbojpeg.dll +0 -0
  339. mindspore/utils/__init__.py +3 -2
  340. mindspore/utils/dryrun.py +0 -6
  341. mindspore/utils/runtime_execution_order_check.py +162 -78
  342. mindspore/utils/sdc_detect.py +68 -0
  343. mindspore/utils/utils.py +6 -9
  344. mindspore/vcmeta.dll +0 -0
  345. mindspore/vcruntime140.dll +0 -0
  346. mindspore/vcruntime140_1.dll +0 -0
  347. mindspore/version.py +1 -1
  348. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/METADATA +5 -4
  349. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/RECORD +352 -390
  350. mindspore/_deprecated/jit.py +0 -198
  351. mindspore/experimental/es/__init__.py +0 -22
  352. mindspore/experimental/es/embedding_service.py +0 -891
  353. mindspore/experimental/es/embedding_service_layer.py +0 -581
  354. mindspore/profiler/parser/__init__.py +0 -14
  355. mindspore/profiler/parser/aicpu_data_parser.py +0 -272
  356. mindspore/profiler/parser/ascend_analysis/__init__.py +0 -14
  357. mindspore/profiler/parser/ascend_analysis/constant.py +0 -71
  358. mindspore/profiler/parser/ascend_analysis/file_manager.py +0 -180
  359. mindspore/profiler/parser/ascend_analysis/function_event.py +0 -185
  360. mindspore/profiler/parser/ascend_analysis/fwk_cann_parser.py +0 -136
  361. mindspore/profiler/parser/ascend_analysis/fwk_file_parser.py +0 -131
  362. mindspore/profiler/parser/ascend_analysis/msprof_timeline_parser.py +0 -104
  363. mindspore/profiler/parser/ascend_analysis/path_manager.py +0 -313
  364. mindspore/profiler/parser/ascend_analysis/profiler_info_parser.py +0 -123
  365. mindspore/profiler/parser/ascend_analysis/tlv_decoder.py +0 -86
  366. mindspore/profiler/parser/ascend_analysis/trace_event_manager.py +0 -75
  367. mindspore/profiler/parser/ascend_cluster_generator.py +0 -116
  368. mindspore/profiler/parser/ascend_communicate_generator.py +0 -314
  369. mindspore/profiler/parser/ascend_flops_generator.py +0 -116
  370. mindspore/profiler/parser/ascend_fpbp_generator.py +0 -82
  371. mindspore/profiler/parser/ascend_hccl_generator.py +0 -271
  372. mindspore/profiler/parser/ascend_integrate_generator.py +0 -42
  373. mindspore/profiler/parser/ascend_memory_generator.py +0 -185
  374. mindspore/profiler/parser/ascend_msprof_exporter.py +0 -282
  375. mindspore/profiler/parser/ascend_msprof_generator.py +0 -187
  376. mindspore/profiler/parser/ascend_op_generator.py +0 -334
  377. mindspore/profiler/parser/ascend_steptrace_generator.py +0 -94
  378. mindspore/profiler/parser/ascend_timeline_generator.py +0 -545
  379. mindspore/profiler/parser/base_timeline_generator.py +0 -483
  380. mindspore/profiler/parser/container.py +0 -229
  381. mindspore/profiler/parser/cpu_gpu_timeline_generator.py +0 -697
  382. mindspore/profiler/parser/flops_parser.py +0 -531
  383. mindspore/profiler/parser/framework_enum.py +0 -111
  384. mindspore/profiler/parser/framework_parser.py +0 -464
  385. mindspore/profiler/parser/framework_struct.py +0 -61
  386. mindspore/profiler/parser/gpu_analysis/__init__.py +0 -14
  387. mindspore/profiler/parser/gpu_analysis/function_event.py +0 -44
  388. mindspore/profiler/parser/gpu_analysis/fwk_file_parser.py +0 -89
  389. mindspore/profiler/parser/gpu_analysis/profiler_info_parser.py +0 -72
  390. mindspore/profiler/parser/hccl_parser.py +0 -573
  391. mindspore/profiler/parser/hwts_log_parser.py +0 -122
  392. mindspore/profiler/parser/integrator.py +0 -526
  393. mindspore/profiler/parser/memory_usage_parser.py +0 -277
  394. mindspore/profiler/parser/minddata_analyzer.py +0 -800
  395. mindspore/profiler/parser/minddata_parser.py +0 -186
  396. mindspore/profiler/parser/minddata_pipeline_parser.py +0 -299
  397. mindspore/profiler/parser/op_intermediate_parser.py +0 -149
  398. mindspore/profiler/parser/optime_parser.py +0 -250
  399. mindspore/profiler/parser/profiler_info.py +0 -213
  400. mindspore/profiler/parser/step_trace_parser.py +0 -666
  401. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/WHEEL +0 -0
  402. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  403. {mindspore-2.6.0.dist-info → mindspore-2.7.0rc1.dist-info}/top_level.txt +0 -0
@@ -442,8 +442,7 @@ def apply_rotary_pos_emb_(query, key, cos, sin, position_ids, cos_format=0):
442
442
  r"""
443
443
 
444
444
  """
445
- apply_rotary_pos_emb_op = _get_cache_prim(ApplyRotaryPosEmb)(cos_format)
446
- return apply_rotary_pos_emb_op(query, key, cos, sin, position_ids)
445
+ return apply_rotary_pos_emb_impl(query, key, cos, sin, position_ids, cos_format)
447
446
 
448
447
 
449
448
  def argmax_ext(input, dim=None, keepdim=False):
@@ -1183,7 +1182,7 @@ def broadcast_to(input, shape):
1183
1182
 
1184
1183
  Args:
1185
1184
  input (Tensor): The input tensor.
1186
- shape (tuple): The target shape.
1185
+ shape (tuple[int]): The target shape.
1187
1186
 
1188
1187
  Returns:
1189
1188
  Tensor
@@ -1827,7 +1826,7 @@ def correlate(a, v, pad_mode='valid'):
1827
1826
 
1828
1827
  Note:
1829
1828
  - `correlate` is currently only used in `mindscience` scientific computing scenarios and
1830
- dose not support other usage scenarios.
1829
+ does not support other usage scenarios.
1831
1830
  - `correlate` is not supported on Windows platform yet.
1832
1831
 
1833
1832
  Args:
@@ -2037,7 +2036,7 @@ def cummin_ext(input, dim):
2037
2036
  \end{array}
2038
2037
 
2039
2038
  .. note::
2040
- O2 mode is not supported in Ascend.
2039
+ GE backend is not supported in Ascend.
2041
2040
 
2042
2041
  Args:
2043
2042
  input (Tensor): The input Tensor, The dimension must be greater than 0.
@@ -2117,61 +2116,6 @@ def cumsum_ext(input, dim, dtype=None):
2117
2116
  return cumsum_ext_op(input, dim, dtype)
2118
2117
 
2119
2118
 
2120
- def decoder_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len):
2121
- r"""
2122
- The DecoderKVCache is used for decoding the KVCache of transformer network.
2123
-
2124
- Args:
2125
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
2126
- When format is BHSD, cache tensor of shape
2127
- :math:`(batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
2128
- When format is BSD, cache tensor of shape
2129
- :math:`(batch\_size, max\_seq\_length, hidden\_size)`.
2130
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
2131
- When format is BHSD, update tensor of shape
2132
- :math:`(batch\_size, num\_head, update\_seq\_length, size\_pre\_head)`.
2133
- When format is BSD, update tensor of shape
2134
- :math:`(batch\_size, update\_seq\_length, hidden\_size)`.
2135
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
2136
- Valid_seq_len tensor of shape :math:`(batch\_size)`.
2137
- batch_index (Tensor): The batch_index tensor with data type of int64.
2138
- Batch_index tensor of shape :math:`(batch\_size)`. Indicate that which batch of cache tensor is going to be update. Not abel for now.
2139
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
2140
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2141
- New_max_seq_len tensor of shape :math:`(1)`.
2142
- Indicate that user want to change the shape of cache tensor from
2143
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)`. to
2144
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`.
2145
- to update the cache tensor. This will not real change the shape of `cache` tensor.
2146
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
2147
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
2148
-
2149
- Outputs:
2150
- With same data type and same shape as `cache` tensor.
2151
-
2152
- Supported Platforms:
2153
- ``Ascend``
2154
-
2155
- Examples:
2156
- >>> from mindspore.ops.operations import _inner_ops
2157
- >>> b = 4
2158
- >>> h = 40
2159
- >>> max_s = 1024
2160
- >>> s = 1
2161
- >>> d = 128
2162
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
2163
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
2164
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=b).astype(np.int64))
2165
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=b, replace=False).astype(np.int64))
2166
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2167
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
2168
- >>> decoder_kv_cache = _inner_ops.DecoderKVCache()
2169
- >>> output = decoder_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
2170
- >>> print(cache)
2171
- """
2172
- return decoder_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
2173
-
2174
-
2175
2119
  def dense(input, weight, bias=None):
2176
2120
  r"""
2177
2121
  Applies the dense connected operation to the `input`. The dense function is defined as:
@@ -3053,7 +2997,7 @@ def fft2(input, s=None, dim=(-2, -1), norm=None):
3053
2997
 
3054
2998
  Note:
3055
2999
  - `fft2` is currently only used in `mindscience` scientific computing scenarios and
3056
- dose not support other usage scenarios.
3000
+ does not support other usage scenarios.
3057
3001
  - `fft2` is not supported on Windows platform yet.
3058
3002
 
3059
3003
  Args:
@@ -3117,7 +3061,7 @@ def fftfreq(n, d=1.0, dtype=None):
3117
3061
 
3118
3062
  Note:
3119
3063
  - `fftfreq` is currently only used in `mindscience` scientific computing scenarios and
3120
- dose not support other usage scenarios.
3064
+ does not support other usage scenarios.
3121
3065
  - `fftfreq` is not supported on Windows platform yet.
3122
3066
 
3123
3067
  Args:
@@ -3150,7 +3094,7 @@ def fftn(input, s=None, dim=None, norm=None):
3150
3094
 
3151
3095
  Note:
3152
3096
  - `fftn` is currently only used in `mindscience` scientific computing scenarios and
3153
- dose not support other usage scenarios.
3097
+ does not support other usage scenarios.
3154
3098
  - `fftn` is not supported on Windows platform yet.
3155
3099
 
3156
3100
  Args:
@@ -3210,7 +3154,7 @@ def fftshift(input, dim=None):
3210
3154
 
3211
3155
  Note:
3212
3156
  - `fftshift` is currently only used in `mindscience` scientific computing scenarios and
3213
- dose not support other usage scenarios.
3157
+ does not support other usage scenarios.
3214
3158
  - `fftshift` is not supported on Windows platform yet.
3215
3159
 
3216
3160
  Args:
@@ -3246,7 +3190,7 @@ def fft(input, n=None, dim=-1, norm=None):
3246
3190
 
3247
3191
  Note:
3248
3192
  - `fft` is currently only used in `mindscience` scientific computing scenarios and
3249
- dose not support other usage scenarios.
3193
+ does not support other usage scenarios.
3250
3194
  - `fft` is not supported on Windows platform yet.
3251
3195
 
3252
3196
  Args:
@@ -3505,11 +3449,11 @@ def frac_ext(input):
3505
3449
  return frac_op(input)
3506
3450
 
3507
3451
 
3508
- def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5):
3452
+ def fused_add_topk_div(x, add_num, group_num, group_topk, n, k, activate_type=0, is_norm=True, scale=2.5, mapping_num=None, mapping_table=None, enable_expert_mapping=False):
3509
3453
  r"""
3510
3454
 
3511
3455
  """
3512
- return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale)
3456
+ return fused_add_topk_div_op(x, add_num, group_num, group_topk, n, k, activate_type, is_norm, scale, mapping_num, mapping_table, enable_expert_mapping)
3513
3457
 
3514
3458
 
3515
3459
  def gather_d(x, dim, index):
@@ -3785,7 +3729,7 @@ def hfft2(input, s=None, dim=(-2, -1), norm=None):
3785
3729
 
3786
3730
  Note:
3787
3731
  - `hfft2` is currently only used in `mindscience` scientific computing scenarios and
3788
- dose not support other usage scenarios.
3732
+ does not support other usage scenarios.
3789
3733
  - `hfft2` is not supported on Windows platform yet.
3790
3734
 
3791
3735
  Args:
@@ -3846,7 +3790,7 @@ def hfftn(input, s=None, dim=None, norm=None):
3846
3790
 
3847
3791
  Note:
3848
3792
  - `hfftn` is currently only used in `mindscience` scientific computing scenarios and
3849
- dose not support other usage scenarios.
3793
+ does not support other usage scenarios.
3850
3794
  - `hfftn` is not supported on Windows platform yet.
3851
3795
 
3852
3796
  Args:
@@ -3907,7 +3851,7 @@ def hfft(input, n=None, dim=-1, norm=None):
3907
3851
 
3908
3852
  Note:
3909
3853
  - `hfft` is currently only used in `mindscience` scientific computing scenarios and
3910
- dose not support other usage scenarios.
3854
+ does not support other usage scenarios.
3911
3855
  - `hfft` is not supported on Windows platform yet.
3912
3856
 
3913
3857
  Args:
@@ -4168,7 +4112,7 @@ def ifft2(input, s=None, dim=(-2, -1), norm=None):
4168
4112
 
4169
4113
  Note:
4170
4114
  - `ifft2` is currently only used in `mindscience` scientific computing scenarios and
4171
- dose not support other usage scenarios.
4115
+ does not support other usage scenarios.
4172
4116
  - `ifft2` is not supported on Windows platform yet.
4173
4117
 
4174
4118
  Args:
@@ -4228,7 +4172,7 @@ def ifftn(input, s=None, dim=None, norm=None):
4228
4172
 
4229
4173
  Note:
4230
4174
  - `ifftn` is currently only used in `mindscience` scientific computing scenarios and
4231
- dose not support other usage scenarios.
4175
+ does not support other usage scenarios.
4232
4176
  - `ifftn` is not supported on Windows platform yet.
4233
4177
 
4234
4178
  Args:
@@ -4288,7 +4232,7 @@ def ifftshift(input, dim=None):
4288
4232
 
4289
4233
  Note:
4290
4234
  - `ifftshift` is currently only used in `mindscience` scientific computing scenarios and
4291
- dose not support other usage scenarios.
4235
+ does not support other usage scenarios.
4292
4236
  - `ifftshift` is not supported on Windows platform yet.
4293
4237
 
4294
4238
  Args:
@@ -4324,7 +4268,7 @@ def ifft(input, n=None, dim=-1, norm=None):
4324
4268
 
4325
4269
  Note:
4326
4270
  - `ifft` is currently only used in `mindscience` scientific computing scenarios and
4327
- dose not support other usage scenarios.
4271
+ does not support other usage scenarios.
4328
4272
  - `ifft` is not supported on Windows platform yet.
4329
4273
 
4330
4274
  Args:
@@ -4380,7 +4324,7 @@ def ihfft2(input, s=None, dim=(-2, -1), norm=None):
4380
4324
 
4381
4325
  Note:
4382
4326
  - `ihfft2` is currently only used in `mindscience` scientific computing scenarios and
4383
- dose not support other usage scenarios.
4327
+ does not support other usage scenarios.
4384
4328
  - `ihfft2` is not supported on Windows platform yet.
4385
4329
 
4386
4330
  Args:
@@ -4441,7 +4385,7 @@ def ihfftn(input, s=None, dim=None, norm=None):
4441
4385
 
4442
4386
  Note:
4443
4387
  - `ihfftn` is currently only used in `mindscience` scientific computing scenarios and
4444
- dose not support other usage scenarios.
4388
+ does not support other usage scenarios.
4445
4389
  - `ihfftn` is not supported on Windows platform yet.
4446
4390
 
4447
4391
  Args:
@@ -4502,7 +4446,7 @@ def ihfft(input, n=None, dim=-1, norm=None):
4502
4446
 
4503
4447
  Note:
4504
4448
  - `ihfft` is currently only used in `mindscience` scientific computing scenarios and
4505
- dose not support other usage scenarios.
4449
+ does not support other usage scenarios.
4506
4450
  - `ihfft` is not supported on Windows platform yet.
4507
4451
 
4508
4452
  Args:
@@ -4623,56 +4567,6 @@ def unfold_ext(input, kernel_size, dilation=1, padding=0, stride=1):
4623
4567
  return im2col_ext_op(input, kernel_size, dilation, padding, stride)
4624
4568
 
4625
4569
 
4626
- def index_add_ext(input, dim, index, source, alpha=1):
4627
- r"""
4628
- Accumulate the elements of `alpha` times `source` into the `input` by adding to the index in the order given in `index`. For example, if ``dim == 0`` , ``index[i] == j`` , and ``alpha = -1`` , then the `i` th row of `source` is subtracted from the `j` th row of `input` . The `dim` th dimension of `source` must have the same size as the length of `index` , and all other dimensions must match `input`, or an error will be raised. For a 3-D tensor, the output is defined as follows:
4629
-
4630
- .. math::
4631
- \begin{array}{ll}
4632
- input[index[i],\ :,\ :]\ +=\ alpha * source[i,\ :,\ :] \qquad \#if\ dim == 0 \\
4633
- input[:,\ \ index[i],\ :]\ +=\ alpha * source[:,\ \ i,\ :] \qquad \#if\ dim == 1 \\
4634
- input[:,\ :,\ \ index[i]]\ +=\ alpha * source[:,\ :,\ \ i] \qquad\#if\ dim == 2 \\
4635
- \end{array}
4636
-
4637
- .. warning::
4638
- This is an experimental API that is subject to change or deletion.
4639
-
4640
- Args:
4641
- input (Tensor): The input Tensor.
4642
- dim (int): The dimension along which to index.
4643
- index (Tensor): Add the value of "input Tensor" and `source` along the dimension of the `dim` according to the specified index value, with data type int32. The `index` must be 1D with the same size as the size of `source` in the `dim` dimension. The values of `index` should be in [0, b), where the b is the size of "input Tensor" in the `dim` dimension.
4644
- source (Tensor): The input tensor with the value to add. Must have same data type as "input Tensor". The shape must be the same as "input Tensor" except the `dim` th dimension.
4645
- alpha (number, optional): The scalar multiplier for source. Default: ``1``.
4646
-
4647
- Returns:
4648
- Tensor, has the same shape and dtype as `input`.
4649
-
4650
- Raises:
4651
- TypeError: If neither `index` nor `source` is a Tensor.
4652
- ValueError: If the value of `dim` is out of the dimension range of `source` shape.
4653
- ValueError: If `index` rank is not the same as `source` rank.
4654
- ValueError: If shape of `index` is not 1D or size of `index` is not equal to dimension of source[dim].
4655
- ValueError: If the shape of `source` is not the same as that of `input` except the `dim` axis.
4656
-
4657
- Supported Platforms:
4658
- ``Ascend``
4659
-
4660
- Examples:
4661
- >>> import numpy as np
4662
- >>> import mindspore
4663
- >>> from mindspore import Tensor, ops
4664
- >>> x = Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), mindspore.float32)
4665
- >>> index = Tensor(np.array([0, 2]), mindspore.int32)
4666
- >>> y = Tensor(np.array([[0.5, 1.0], [1.0, 1.5], [2.0, 2.5]]), mindspore.float32)
4667
- >>> output = ops.auto_generate.index_add_ext(x, 1, index, y, alpha=1)
4668
- >>> print(output)
4669
- [[ 1.5 2. 4. ]
4670
- [ 5. 5. 7.5]
4671
- [ 9. 8. 11.5]]
4672
- """
4673
- return index_add_ext_op(input, dim, index, source, alpha)
4674
-
4675
-
4676
4570
  def index_fill_scalar(input, dim, index, value):
4677
4571
  r"""
4678
4572
 
@@ -4783,6 +4677,13 @@ def index_select_ext(input, dim, index):
4783
4677
  return index_select_op(input, dim, index)
4784
4678
 
4785
4679
 
4680
+ def inner_moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
4681
+ r"""
4682
+
4683
+ """
4684
+ return inner_moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
4685
+
4686
+
4786
4687
  def inplace_adds_ext(input, other, alpha=1):
4787
4688
  r"""
4788
4689
 
@@ -5090,6 +4991,51 @@ def masked_fill_tensor_(input, mask, value):
5090
4991
  return inplace_masked_fill_tensor_op(input, mask, value)
5091
4992
 
5092
4993
 
4994
+ def matmul_add_(x, weight, C):
4995
+ r"""
4996
+ Fusion Operator of Transpose, Matmul, and InplaceAdd.
4997
+
4998
+ .. warning::
4999
+ - This is an experimental API that is subject to change or deletion.
5000
+ - This API is only supported in Atlas A2 training series for now.
5001
+ - This API is only supported on GRAPH mode.
5002
+
5003
+ Args:
5004
+ x (Tensor): Matrix A in matrix multiplication, with shape :math:`(k, m)` or :math:`(batch, k, m)`,
5005
+ whose type should be float16 or bfloat16.
5006
+ weight (Tensor): Matrix B in matrix multiplication, with shape :math:`(k, n)` or :math:`(batch, k, n)`,
5007
+ whose type should be float16 or bfloat16.
5008
+ C (Tensor): A Tensor acting as both input and output, with type of float32.
5009
+ It's shape should be :math:`(m, n)` or :math:`(batch, m, n)`.
5010
+
5011
+ Returns:
5012
+ Tensor, has the same shape and data type as `C`.
5013
+
5014
+ Raises:
5015
+ TypeError: If the dtype of `weight` is not the same as `x`.
5016
+ ValueError: If the ranks of `x` , `weight` and `C` are not the same.
5017
+
5018
+ Supported Platforms:
5019
+ ``Ascend``
5020
+
5021
+ Examples:
5022
+ >>> import mindspore
5023
+ >>> import numpy as np
5024
+ >>> from mindspore import Tensor, ops, nn, context
5025
+ >>> context.set_context(mode=context.GRAPH_MODE, jit_config={"jit_level": "O0"})
5026
+ >>> class Net(nn.Cell):
5027
+ ... def construct(self, x, weight, C):
5028
+ ... return ops.auto_generate.inplace_matmul_add_op(x, weight, C)
5029
+ >>> x = Tensor(np.random.randn(10, 20), mindspore.float16)
5030
+ >>> weight = Tensor(np.random.randn(10, 8), mindspore.float16)
5031
+ >>> C = Tensor(np.random.randn(20, 8), mindspore.float32)
5032
+ >>> output = Net()(x, weight, C)
5033
+ >>> print(output.shape)
5034
+ (20, 8)
5035
+ """
5036
+ return inplace_matmul_add_op(x, weight, C)
5037
+
5038
+
5093
5039
  def inplace_muls(input, other):
5094
5040
  r"""
5095
5041
 
@@ -5118,6 +5064,52 @@ def inplace_scatter_add(input, dim, index, src):
5118
5064
  return inplace_scatter_add_op(input, dim, index, src)
5119
5065
 
5120
5066
 
5067
+ def inplace_silu(input):
5068
+ r"""
5069
+ Computes Sigmoid Linear Unit of input element-wise. The SiLU function is defined as:
5070
+
5071
+ .. math::
5072
+
5073
+ \text{SiLU}(x) = x * \sigma(x),
5074
+
5075
+ where :math:`x` is an element of the input, :math:`\sigma(x)` is Sigmoid function.
5076
+
5077
+ .. math::
5078
+
5079
+ \text{sigma}(x_i) = \frac{1}{1 + \exp(-x_i)},
5080
+
5081
+ SiLU Function Graph:
5082
+
5083
+ .. image:: ../images/SiLU.png
5084
+ :align: center
5085
+
5086
+ Args:
5087
+ input (Tensor): `input` is :math:`x` in the preceding formula. Input with the data type
5088
+ float16 or float32.
5089
+ inplace (bool, optional): If it is ``True``, enable the in place update function.
5090
+ Default value: ``False``.
5091
+
5092
+ Returns:
5093
+ Tensor, with the same type and shape as the `input`.
5094
+
5095
+ Raises:
5096
+ TypeError: If dtype of `input` is neither float16 nor float32.
5097
+
5098
+ Supported Platforms:
5099
+ ``Ascend`` ``GPU`` ``CPU``
5100
+
5101
+ Examples:
5102
+ >>> import mindspore
5103
+ >>> from mindspore import Tensor, mint
5104
+ >>> import numpy as np
5105
+ >>> input = Tensor(np.array([-1, 2, -3, 2, -1]), mindspore.float16)
5106
+ >>> output = mint.nn.functional.silu(input, inplace=True)
5107
+ >>> print(output)
5108
+ [-0.269 1.762 -0.1423 1.762 -0.269]
5109
+ """
5110
+ return inplace_silu_op(input)
5111
+
5112
+
5121
5113
  def inplace_stop_gradient(input):
5122
5114
  r"""
5123
5115
 
@@ -5159,9 +5151,6 @@ def inplace_threshold(input, threshold, value):
5159
5151
  \text{value}, &\text{ otherwise }
5160
5152
  \end{cases}
5161
5153
 
5162
- .. warning::
5163
- This is an experimental API that is subject to change or deletion.
5164
-
5165
5154
  Args:
5166
5155
  input (Tensor): The input Tensor.
5167
5156
  threshold (Union[int, float]): The value of the threshold.
@@ -5202,7 +5191,7 @@ def irfft2(input, s=None, dim=(-2, -1), norm=None):
5202
5191
 
5203
5192
  Note:
5204
5193
  - `irfft2` is currently only used in `mindscience` scientific computing scenarios and
5205
- dose not support other usage scenarios.
5194
+ does not support other usage scenarios.
5206
5195
  - `irfft2` is not supported on Windows platform yet.
5207
5196
 
5208
5197
  Args:
@@ -5260,7 +5249,7 @@ def irfftn(input, s=None, dim=None, norm=None):
5260
5249
 
5261
5250
  Note:
5262
5251
  - `irfftn` is currently only used in `mindscience` scientific computing scenarios and
5263
- dose not support other usage scenarios.
5252
+ does not support other usage scenarios.
5264
5253
  - `irfftn` is not supported on Windows platform yet.
5265
5254
 
5266
5255
  Args:
@@ -5319,7 +5308,7 @@ def irfft(input, n=None, dim=-1, norm=None):
5319
5308
 
5320
5309
  Note:
5321
5310
  - `irfft` is currently only used in `mindscience` scientific computing scenarios and
5322
- dose not support other usage scenarios.
5311
+ does not support other usage scenarios.
5323
5312
  - `irfft` is not supported on Windows platform yet.
5324
5313
 
5325
5314
  Args:
@@ -6605,6 +6594,254 @@ def mm_ext(input, mat2):
6605
6594
  return mm_ext_op(input, mat2)
6606
6595
 
6607
6596
 
6597
+ def moe_distribute_combine(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts=None, x_active_mask=None, activate_scale=None, weight_scale=None, group_list=None, expand_scales=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_export_rank_num=0, global_bs=0, out_dtype=0, common_quant_mode=0, group_list_type=0):
6598
+ r"""
6599
+ Parallel communication for Mixture of Experts (MoE). When Tensor Parallelism (TP) communication exists,
6600
+ it first ReduceScatter performs communication followed by Expert Parallelism (EP) AllToAllV communication.
6601
+ Otherwise, only EP AllToAllV communication is performed. Finally multiply the received data by weight and
6602
+ add them up.
6603
+
6604
+ Notes:
6605
+ This function must be used in conjunction with function `moe_distribute_dispatch`.
6606
+ - A: Maximum tokens to dispatch per rank:
6607
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6608
+ - For MoE experts:
6609
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6610
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6611
+ - H (hidden size): Dimension of each token's hidden state
6612
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6613
+ - Ascend 910_93: H = 7168
6614
+ - BS (batch sequence size): Number of tokens processed per rank
6615
+ - Ascend 910B: 0 < BS <= 256
6616
+ - Ascend 910_93: 0 < BS <= 512
6617
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6618
+ - server_num: Number of server nodes (supports 2, 4, 8)
6619
+ - local_expert_num: Number of experts per rank:
6620
+ - Shared expert ranks: local_expert_num = 1
6621
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6622
+ (TP communication not supported when localExpertNum > 1)
6623
+
6624
+ Inputs:
6625
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6626
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6627
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6628
+ Format: ND, non-contiguous allowed.
6629
+ - **expert_idx** (Tensor) - Token counts per expert, it's the output of dispatch operation.
6630
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6631
+ - **ep_send_counts** (Tensor) - Tokens that each EP rank needs to send, it's the output of dispatch operation.
6632
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6633
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6634
+ Format: ND, non-contiguous allowed.
6635
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6636
+ - **ep_world_size** (int) - EP domain size.
6637
+ - Ascend 910B: Supports 16, 32, 64.
6638
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6639
+ - **ep_rank_id** (int) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6640
+ - **moe_expert_num** (int) - Number of MoE experts (0 < moe_expert_num <= 256),
6641
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6642
+ - **tp_send_counts** (Tensor) - Tokens that each TP rank needs to send (when TP exists). It's the output of dispatch operation. Default: ``None``.
6643
+ - Ascend 910B: Not supported.
6644
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6645
+ - **x_active_mask** (Tensor) - Reserved parameter. Default: ``None``.
6646
+ - **activate_scale** (Tensor) - Reserved parameter. Default: ``None``.
6647
+ - **weight_scale** (Tensor) - Reserved parameter. Default: ``None``.
6648
+ - **group_list** (Tensor) - Reserved parameter. Default: ``None``.
6649
+ - **expand_scales** (Tensor) - Output of dispatch operation. Default: ``None``.
6650
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6651
+ - Ascend 910_93: Unsupported.
6652
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp. Default: ``None``.
6653
+ - **group_tp** (str) - TP communication domain name. Default: ``None``.
6654
+ - Ascend 910B: Unsupported (pass empty string).
6655
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6656
+ - **tp_world_size** (int) - TP domain size. Default: ``0``.
6657
+ - Ascend 910B: Unsupported (pass 0).
6658
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6659
+ - **tp_rank_id** (int) - Local rank ID in TP domain. Default: ``0``.
6660
+ - Ascend 910B: Unsupported (pass 0).
6661
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6662
+ - **expert_shard_type** (int) - Shared expert distribution type. Default: ``0``.
6663
+ - Ascend 910B: Unsupported (pass 0).
6664
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6665
+ - **shared_expert_num** (int) - Number of shared experts. Default: ``0``.
6666
+ - Ascend 910B: Unsupported (pass 0).
6667
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6668
+ - **shared_expert_rank_num** (int) - Number of ranks hosting shared experts. Default: ``0``.
6669
+ - Ascend 910B: Unsupported (pass 0).
6670
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6671
+ - **global_bs** (int) - Global batch size across EP domain. Default: ``0``.
6672
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6673
+ - Ascend 910_93: 0 or BS*ep_world_size.
6674
+ - **out_dtype** (int) - Specify the type of output x. Reserved parameter (pass 0 in current version). Default: ``0``.
6675
+ - **common_quant_mode** (int) - Communication quantification type. Reserved parameter (pass 0 in current version). Default: ``0``.
6676
+ - **group_list_type** (int) - The format of group_list. Reserved parameter (pass 0 in current version). Default: ``0``.
6677
+
6678
+ Outputs:
6679
+ - **x** (Tensor) - Processed tokens. 2D tensor [BS, H] with dtype matching input `expand_x`.
6680
+
6681
+ Raises:
6682
+ TypeError: If input dtypes don't match specifications.
6683
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6684
+ RuntimeError: If communication domain configuration is invalid.
6685
+
6686
+ Supported Platforms:
6687
+ ``Ascend``
6688
+
6689
+ Examples:
6690
+ >>> # EP-only communication example (Ascend 910B)
6691
+ >>> import mindspore as ms
6692
+ >>> from mindspore import Tensor
6693
+ >>> from mindspore import ops
6694
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6695
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch, moe_distribute_combine
6696
+ >>> import numpy as np
6697
+ >>> bs = 8
6698
+ >>> h = 7168
6699
+ >>> k = 8
6700
+ >>> ep_world_size = 16
6701
+ >>> moe_expert_num = 16
6702
+ >>> global_bs = bs * ep_world_size
6703
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6704
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6705
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6706
+ >>> init()
6707
+ >>> rank_id = get_rank()
6708
+ >>> expand_x, _, expand_idx, _, ep_recv_count, _, expand_scale = moe_distribute_dispatch(
6709
+ ... x, expert_ids, expert_scales, ep_world_size, rank_id, moe_expert_num,
6710
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6711
+ >>> out_x = moe_distribute_combine(
6712
+ ... expand_x, expert_ids, expand_idx, ep_recv_count, expert_scales, ep_world_size, rank_id,
6713
+ ... moe_expert_num, group_ep=GlobalComm.WORLD_COMM_GROUP)
6714
+ >>> print(out_x.shape)
6715
+ (8, 7168)
6716
+ """
6717
+ return moe_distribute_combine_op(expand_x, expert_ids, expand_idx, ep_send_counts, expert_scales, ep_world_size, ep_rank_id, moe_expert_num, tp_send_counts, x_active_mask, activate_scale, weight_scale, group_list, expand_scales, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_export_rank_num, global_bs, out_dtype, common_quant_mode, group_list_type)
6718
+
6719
+
6720
+ def moe_distribute_dispatch(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales=None, scales=None, x_active_mask=None, group_ep=None, group_tp=None, tp_world_size=0, tp_rank_id=0, expert_shard_type=0, shared_expert_num=0, shared_expert_rank_num=0, quant_mode=0, global_bs=0, expert_token_nums_type=0):
6721
+ r"""
6722
+ Performs token data quantization (optional) and parallel communication for Mixture of Experts (MoE).
6723
+ When Tensor Parallelism (TP) communication exists, it first performs Expert Parallelism (EP) AllToAllV
6724
+ communication followed by TP AllGatherV communication. Otherwise, only EP AllToAllV communication is performed.
6725
+
6726
+ Notes:
6727
+ - A: Maximum tokens to dispatch per rank:
6728
+ - For shared experts: A = BS * ep_world_size * shared_expert_num / shared_expert_rank_num
6729
+ - For MoE experts:
6730
+ - When global_bs = 0: A >= BS * ep_world_size * min(local_expert_num, K)
6731
+ - When global_bs != 0: A >= global_bs * min(local_expert_num, K)
6732
+ - H (hidden size): Dimension of each token's hidden state
6733
+ - Ascend 910B: 0 < H <= 7168, must be multiple of 32
6734
+ - Ascend 910_93: H = 7168
6735
+ - BS (batch sequence size): Number of tokens processed per rank
6736
+ - Ascend 910B: 0 < BS <= 256
6737
+ - Ascend 910_93: 0 < BS <= 512
6738
+ - K: Number of experts selected per token (0 < K <= 8 and K <= moe_expert_num)
6739
+ - server_num: Number of server nodes (supports 2, 4, 8)
6740
+ - local_expert_num: Number of experts per rank:
6741
+ - Shared expert ranks: local_expert_num = 1
6742
+ - MoE expert ranks: local_expert_num = moe_expert_num / (ep_world_size - shared_expert_rank_num)
6743
+ (TP communication not supported when localExpertNum > 1)
6744
+
6745
+ Inputs:
6746
+ - **x** (Tensor) - Input token data to be sent. 2D tensor with shape [BS, H].
6747
+ Supported dtypes: float16, bfloat16. Format: ND, non-contiguous allowed.
6748
+ - **expert_ids** (Tensor) - Top-K expert indices for each token. 2D int32 tensor with shape [BS, K].
6749
+ Format: ND, non-contiguous allowed.
6750
+ - **ep_world_size** (int64) - EP domain size.
6751
+ - Ascend 910B: Supports 16, 32, 64.
6752
+ - Ascend 910_93: Supports 8, 16, 32, 64, 128, 144, 256, 288.
6753
+ - **ep_rank_id** (int64) - Local rank ID in EP domain [0, ep_world_size), must be unique per domain.
6754
+ - **moe_expert_num** (int64) - Number of MoE experts (0 < moe_expert_num <= 256),
6755
+ must satisfy moe_expert_num % (ep_world_size-shared_expert_rank_num) = 0.
6756
+ - **expert_scales** (Tensor) - Top-K expert weights per token.
6757
+ - Ascend 910B: 2D float32 tensor [BS, K], ND format, non-contiguous allowed.
6758
+ - Ascend 910_93: Unsupported (pass nullptr).
6759
+ - **scales** (Tensor) - Expert weights. 2D float32 tensor with shape [shared_expert_num + moe_expert_num, H].
6760
+ Pass nullptr for non-quantized scenarios. Format: ND, non-contiguous allowed.
6761
+ Note: On Ascend 910B, must be nullptr when HCCL_INTRA_PCIE_ENABLE=1 and HCCL_INTRA_ROCE_ENABLE=0.
6762
+ - **x_active_mask** (Tensor) - Reserved parameter (pass nullptr in current version).
6763
+ - **group_ep** (str) - EP communication domain name (string length 1-127), must differ from group_tp.
6764
+ - **group_tp** (str) - TP communication domain name.
6765
+ - Ascend 910B: Unsupported (pass empty string).
6766
+ - Ascend 910_93: When TP communication exists, string length 1-127, must differ from group_ep.
6767
+ - **tp_world_size** (int64) - TP domain size.
6768
+ - Ascend 910B: Unsupported (pass 0).
6769
+ - Ascend 910_93: 0/1 means no TP communication; only 2 supported when TP exists.
6770
+ - **tp_rank_id** (int64) - Local rank ID in TP domain.
6771
+ - Ascend 910B: Unsupported (pass 0).
6772
+ - Ascend 910_93: [0,1], unique per domain; pass 0 when no TP communication.
6773
+ - **expert_shard_type** (int64) - Shared expert distribution type.
6774
+ - Ascend 910B: Unsupported (pass 0).
6775
+ - Ascend 910_93: Currently only 0 (shared experts precede MoE experts).
6776
+ - **shared_expert_num** (int64) - Number of shared experts.
6777
+ - Ascend 910B: Unsupported (pass 0).
6778
+ - Ascend 910_93: Currently 0 (none) or 1 (one shared expert).
6779
+ - **shared_expert_rank_num** (int64) - Number of ranks hosting shared experts.
6780
+ - Ascend 910B: Unsupported (pass 0).
6781
+ - Ascend 910_93: [0, ep_world_size-1), must satisfy ep_world_size % shared_expert_rank_num = 0 when non-zero.
6782
+ - **quant_mode** (int64) - Quantization mode: 0 (none), 2 (dynamic quantization).
6783
+ - **global_bs** (int64) - Global batch size across EP domain.
6784
+ - Ascend 910B: 256*ep_world_size when BS varies per rank; 0 or BS*ep_world_size when uniform.
6785
+ - Ascend 910_93: 0 or BS*ep_world_size.
6786
+ - **expert_token_nums_type** (int64) - Semantic meaning of expert_token_nums output:
6787
+ 0 (prefix sums), 1 (raw counts).
6788
+
6789
+ Outputs:
6790
+ - **expand_x** (Tensor) - Expanded token features. 2D tensor [A, H] with dtype matching input.
6791
+ Supported dtypes: float16, bfloat16, int8. Format: ND, non-contiguous allowed.
6792
+ - **dynamic_scales** (Tensor) - Dynamic quantization scales (when quant_mode=2).
6793
+ 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6794
+ - **expand_idx** (Tensor) - Token counts per expert for combine operation.
6795
+ 1D int32 tensor [BS*K]. Format: ND, non-contiguous allowed.
6796
+ - **expert_token_nums** (Tensor) - Tokens received per expert.
6797
+ 1D int64 tensor [local_expert_num]. Format: ND, non-contiguous allowed.
6798
+ - **ep_recv_counts** (Tensor) - Tokens received from each EP rank.
6799
+ - Ascend 910B: 1D int32 tensor [moe_expert_num + 2 * global_bs * K * server_num]
6800
+ - Ascend 910_93: 1D int32 tensor [ep_world_size * max(tp_world_size,1) * local_expert_num]
6801
+ Format: ND, non-contiguous allowed.
6802
+ - **tp_recv_counts** (Tensor) - Tokens received from each TP rank (when TP exists).
6803
+ - Ascend 910B: Not supported.
6804
+ - Ascend 910_93: 1D int32 tensor [tp_world_size] when TP exists. Format: ND, non-contiguous allowed.
6805
+ - **expand_scales** (Tensor) - Output token weights for combine operation.
6806
+ - Ascend 910B: 1D float32 tensor [A]. Format: ND, non-contiguous allowed.
6807
+ - Ascend 910_93: Unsupported.
6808
+
6809
+ Raises:
6810
+ TypeError: If input dtypes don't match specifications.
6811
+ ValueError: If input values violate constraints (e.g., invalid expert indices).
6812
+ RuntimeError: If communication domain configuration is invalid.
6813
+
6814
+ Supported Platforms:
6815
+ ``Ascend``
6816
+
6817
+ Examples:
6818
+ >>> # EP-only communication example (Ascend 910B)
6819
+ >>> import mindspore as ms
6820
+ >>> from mindspore import Tensor
6821
+ >>> from mindspore import ops
6822
+ >>> from mindspore.communication import init, get_rank, GlobalComm
6823
+ >>> from mindspore.ops.auto_generate import moe_distribute_dispatch
6824
+ >>> import numpy as np
6825
+ >>> bs = 8
6826
+ >>> h = 7168
6827
+ >>> k = 8
6828
+ >>> ep_world_size = 16
6829
+ >>> moe_expert_num = 16
6830
+ >>> global_bs = bs * ep_world_size
6831
+ >>> x = Tensor(np.random.randn(bs, h), ms.float16)
6832
+ >>> expert_ids = Tensor(np.random.randint(0, moe_expert_num, (bs, k)), ms.int32)
6833
+ >>> expert_scales = Tensor(np.random.randn(bs, k), ms.float32)
6834
+ >>> init()
6835
+ >>> rank_id = get_rank()
6836
+ >>> out = moe_distribute_dispatch(
6837
+ ... x, expert_ids, ep_world_size, rank_id, moe_expert_num, expert_scales=expert_scales,
6838
+ ... group_ep=GlobalComm.WORLD_COMM_GROUP)
6839
+ >>> print(out[0].shape) # expand_x
6840
+ (128, 7168)
6841
+ """
6842
+ return moe_distribute_dispatch_op(x, expert_ids, ep_world_size, ep_rank_id, moe_expert_num, expert_scales, scales, x_active_mask, group_ep, group_tp, tp_world_size, tp_rank_id, expert_shard_type, shared_expert_num, shared_expert_rank_num, quant_mode, global_bs, expert_token_nums_type)
6843
+
6844
+
6608
6845
  def moe_token_permute_grad(permuted_tokens_grad, sorted_indices, num_topk=1, padded_mode=False):
6609
6846
  r"""
6610
6847
 
@@ -6618,11 +6855,10 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6618
6855
 
6619
6856
  .. warning::
6620
6857
  - It is only supported on Atlas A2 Training Series Products.
6621
- - The input `tokens` only supports the bfloat16 data type in the current version.
6622
6858
  - When `indices` is 2-D, the size of the second dim must be less than or equal to 512.
6623
6859
 
6624
6860
  Args:
6625
- tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16.
6861
+ tokens (Tensor): The input token tensor to be permuted. The dtype is bfloat16, float16 or float32.
6626
6862
  The shape is :math:`(num\_tokens, hidden\_size)` , where `num_tokens` and `hidden_size` are positive integers.
6627
6863
  indices (Tensor): The tensor specifies indices used to permute the tokens. The dtype is int32 or int64.
6628
6864
  The shape is :math:`(num\_tokens, topk)` or :math:`(num\_tokens,)`, where `num_tokens` and `topk` are positive integers.
@@ -6638,7 +6874,6 @@ def moe_token_permute(tokens, indices, num_out_tokens=None, padded_mode=False):
6638
6874
 
6639
6875
  Raises:
6640
6876
  TypeError: If `tokens` or `indices` is not a Tensor.
6641
- TypeError: If dtype of `tokens` is not bfloat16.
6642
6877
  TypeError: If dtype of `indices` is not int32 or int64.
6643
6878
  TypeError: If specified `num_out_tokens` is not an integer.
6644
6879
  TypeError: If specified `padded_mode` is not a bool.
@@ -6680,60 +6915,6 @@ def moe_token_unpermute_grad(permuted_tokens, unpermuted_tokens_grad, sorted_ind
6680
6915
  return moe_token_unpermute_grad_op(permuted_tokens, unpermuted_tokens_grad, sorted_indices, probs, padded_mode, restore_shape)
6681
6916
 
6682
6917
 
6683
- def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
6684
- r"""
6685
- Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
6686
-
6687
- .. warning::
6688
- - It is only supported on Atlas A2 Training Series Products.
6689
- - The inputs `permuted_tokens` and `probs` only support the bfloat16 data type in the current version.
6690
- - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
6691
-
6692
- Args:
6693
- permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
6694
- The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
6695
- sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
6696
- The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
6697
- It only supports the int32 data type.
6698
- probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
6699
- If provided, the unpermuted tokens will be merged with their respective probabilities.
6700
- The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
6701
- padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
6702
- restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
6703
-
6704
- Returns:
6705
- Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
6706
- If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
6707
-
6708
- Raises:
6709
- TypeError: If `permuted_tokens` is not a Tensor.
6710
- ValueError: Only supported when `padded_mode` is ``False``.
6711
-
6712
- Supported Platforms:
6713
- ``Ascend``
6714
-
6715
- Examples:
6716
- >>> import mindspore
6717
- >>> from mindspore import Tensor, ops
6718
- >>> permuted_token = Tensor([
6719
- ... [1, 1, 1],
6720
- ... [0, 0, 0],
6721
- ... [0, 0, 0],
6722
- ... [3, 3, 3],
6723
- ... [2, 2, 2],
6724
- ... [1, 1, 1],
6725
- ... [2, 2, 2],
6726
- ... [3, 3, 3]], dtype=mindspore.bfloat16)
6727
- >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
6728
- >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
6729
- >>> out.shape
6730
- (8, 3)
6731
-
6732
-
6733
- """
6734
- return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)
6735
-
6736
-
6737
6918
  def mse_loss_ext(input, target, reduction='mean'):
6738
6919
  r"""
6739
6920
  Calculates the mean squared error between the predicted value and the label value.
@@ -7307,65 +7488,6 @@ def prod_ext(input, dim=None, keepdim=False, dtype=None):
7307
7488
  return prod_ext_op(input, dim, keepdim, dtype)
7308
7489
 
7309
7490
 
7310
- def prompt_k_v_cache(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len, align_mode='LEFT'):
7311
- r"""
7312
- The PromptKVCache is used for prefill the KVCache of transformer network.
7313
-
7314
- Args:
7315
- cache (Tensor): The cahe tensor with data type of int8, uint8, int16, uint16, float16, float32 and int32.
7316
- When format is BHSD, cache tensor of shape
7317
- :math:`(cache\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7318
- When format is BSD, cache tensor of shape
7319
- :math:`(cache\_batch\_size, max\_seq\_length, hidden\_size)`.
7320
- update (Tensor]): The tensor which is used to update the cache tensor. Same data type as cache tensor.
7321
- When format is BHSD, cache tensor of shape
7322
- :math:`(update\_batch\_size, num\_head, max\_seq\_length, size\_pre\_head)`.
7323
- When format is BSD, cache tensor of shape
7324
- :math:`(update\_batch\_size, max\_seq\_length, hidden\_size)`.
7325
- valid_seq_len (Tensor): The valid_seq_len tensor with data type of int64.
7326
- Valid_seq_len tensor of shape :math:`(update\_batch\_size)`.
7327
- batch_index (Tensor): The batch_index tensor with data type of int64.
7328
- Batch_index tensor of shape :math:`(update\_batch\_size)`. Indicate that which batch of cache tensor is going to be update.
7329
- seq_len_axis (Tensor): The seq_len_axis indicate which axis is seq_eln, set to '1' or '2'. Not able for now.
7330
- new_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7331
- New_max_seq_len tensor of shape :math:`(1)`.
7332
- Indicate that user want to change the shape of cache tensor from
7333
- :math:`(batch\_size, num_head, max\_seq\_length, hidden\_size)` to
7334
- :math:`(batch\_size * max\_seq\_length / new\_max\_seq\_length, num_head, new\_max\_seq\_length, hidden\_size)`
7335
- to update the cache tensor. This will not real change the shape of `cache` tensor. Not able for now.
7336
- cur_max_seq_len (Tensor): The new_max_seq_len tensor with data type of int64.
7337
- Cur_max_seq_len tensor of shape :math:`(1)`. Keep the current seq_len of cache tensor. Not abel for now.
7338
- align_mode (str): indicate which axis is seq_len. Default: left.
7339
-
7340
-
7341
- Outputs:
7342
- With same data type and same shape as `cache` tensor.
7343
-
7344
- Supported Platforms:
7345
- ``Ascend``
7346
-
7347
- Examples:
7348
- >>> from mindspore import Tensor
7349
- >>> from mindspore.ops.operations import _inner_ops
7350
- >>> b = 4
7351
- >>> h = 40
7352
- >>> max_s = 1024
7353
- >>> s = 256
7354
- >>> d = 128
7355
- >>> cache = Tensor(np.random.randn(b, h, max_s, d).astype(np.float16))
7356
- >>> update = Tensor(np.random.randn(b, h, s, d).astype(np.float16))
7357
- >>> valid_seq_len = Tensor(np.random.randint(-1, s, size=ub).astype(np.int64))
7358
- >>> batch_index = Tensor(np.random.choice(np.arange(-1, b), size=ub, replace=False).astype(np.int64))
7359
- >>> new_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7360
- >>> cur_max_seq_len = Tensor(np.random.randn(1).astype(np.int64))
7361
- >>> prompt_kv_cache = _inner_ops.PromptKVCache(0)
7362
- >>> output = prompt_kv_cache(cache, update, valid_seq_len, batch_index, Tensor(2), new_max_seq_len, cur_max_seq_len)
7363
- >>> print(cache)
7364
- """
7365
- prompt_k_v_cache_op = _get_cache_prim(PromptKVCache)(align_mode)
7366
- return prompt_k_v_cache_op(cache, update, valid_seq_len, batch_index, seq_len_axis, new_max_seq_len, cur_max_seq_len)
7367
-
7368
-
7369
7491
  def randperm(n, seed=0, offset=0, dtype=mstype.int64):
7370
7492
  r"""
7371
7493
  Generates random permutation of integers from 0 to n-1.
@@ -7701,7 +7823,7 @@ def rfft2(input, s=None, dim=(-2, -1), norm=None):
7701
7823
 
7702
7824
  Note:
7703
7825
  - `rfft2` is currently only used in `mindscience` scientific computing scenarios and
7704
- dose not support other usage scenarios.
7826
+ does not support other usage scenarios.
7705
7827
  - `rfft2` is not supported on Windows platform yet.
7706
7828
 
7707
7829
  Args:
@@ -7762,7 +7884,7 @@ def rfftfreq(n, d=1.0, dtype=None):
7762
7884
 
7763
7885
  Note:
7764
7886
  - `rfftfreq` is currently only used in `mindscience` scientific computing scenarios and
7765
- dose not support other usage scenarios.
7887
+ does not support other usage scenarios.
7766
7888
  - `rfftfreq` is not supported on Windows platform yet.
7767
7889
 
7768
7890
  Args:
@@ -7795,7 +7917,7 @@ def rfftn(input, s=None, dim=None, norm=None):
7795
7917
 
7796
7918
  Note:
7797
7919
  - `rfftn` is currently only used in `mindscience` scientific computing scenarios and
7798
- dose not support other usage scenarios.
7920
+ does not support other usage scenarios.
7799
7921
  - `rfftn` is not supported on Windows platform yet.
7800
7922
 
7801
7923
  Args:
@@ -7855,7 +7977,7 @@ def rfft(input, n=None, dim=-1, norm=None):
7855
7977
 
7856
7978
  Note:
7857
7979
  - `rfft` is currently only used in `mindscience` scientific computing scenarios and
7858
- dose not support other usage scenarios.
7980
+ does not support other usage scenarios.
7859
7981
  - `rfft` is not supported on Windows platform yet.
7860
7982
 
7861
7983
  Args:
@@ -8784,7 +8906,7 @@ def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False):
8784
8906
 
8785
8907
  Note:
8786
8908
  - `solve_triangular` is currently only used in `mindscience` scientific computing scenarios and
8787
- dose not support other usage scenarios.
8909
+ does not support other usage scenarios.
8788
8910
  - `solve_triangular` is not supported on Windows platform yet.
8789
8911
 
8790
8912
  Args:
@@ -9215,9 +9337,6 @@ def swiglu(input, dim=-1):
9215
9337
  Computes SwiGLU (Swish-Gated Linear Unit activation function) of input tensor.
9216
9338
  SwiGLU is a variant of the :class:`mindspore.ops.GLU` activation function, it is defined as:
9217
9339
 
9218
- .. warning::
9219
- This is an experimental API that is subject to change or deletion.
9220
-
9221
9340
  .. math::
9222
9341
  {SwiGLU}(a, b)= Swish(a) \otimes b
9223
9342
 
@@ -9225,6 +9344,9 @@ def swiglu(input, dim=-1):
9225
9344
  Swish(a)=a :math:`\sigma` (a), :math:`\sigma` is the :func:`mindspore.ops.sigmoid` activation function
9226
9345
  and :math:`\otimes` is the Hadamard product.
9227
9346
 
9347
+ .. warning::
9348
+ Only support on Atlas A2 training series.
9349
+
9228
9350
  Args:
9229
9351
  input (Tensor): Tensor to be split. It has shape :math:`(\ast_1, N, \ast_2)`
9230
9352
  where `*` means, any number of additional dimensions. :math:`N` must be divisible by 2.
@@ -9457,6 +9579,30 @@ def topk_ext(input, k, dim=-1, largest=True, sorted=True):
9457
9579
  return topk_ext_op(input, k, dim, largest, sorted)
9458
9580
 
9459
9581
 
9582
+ def topprouter(input, capacity, expert_num, drop_type=0, threshold=0.0, router_prob=0.0):
9583
+ r"""
9584
+ TopPRouter implementation in MOE.
9585
+
9586
+ Inputs:
9587
+ - **x** (Tensor) - Input Tensor of 3D, supporting types:[int32, int64]
9588
+ - **capacity** (Int64) - The maximum number of tokens each expert can handle.
9589
+ - **expert_num** (Int64) - The number of expert.
9590
+ - **drop_type** (Int64) - S-Drop/K-Drop, 0 means S-Drop, 1 means K-Drop, default 0.
9591
+ - **threshold** (float32) - Expert threshold, default 0.
9592
+ - **router_prob** (Tensor) - Topk prob Tensor of 2D, supporting types:[float32], default 0.
9593
+
9594
+ Outputs:
9595
+ tuple(Tensor), tuple of 2 tensors, `dispatch_index` and `combine_inex`.
9596
+
9597
+ - dispatch_index (Tensor) - Token ID processed by each expert.
9598
+ - combine_index (Tensor) - The combine index of each token.
9599
+
9600
+ Supported Platforms:
9601
+ ``Ascend``
9602
+ """
9603
+ return topprouter_op(input, capacity, expert_num, drop_type, threshold, router_prob)
9604
+
9605
+
9460
9606
  def trace_ext(input):
9461
9607
  r"""
9462
9608
  Returns a new tensor that is the sum of the `input` main trace.
@@ -10478,8 +10624,68 @@ def quant_batch_matmul(x1, x2, scale, offset=None, bias=None, pertokenScaleOptio
10478
10624
  return quant_batch_matmul_impl(x1, x2, scale, offset, bias, pertokenScaleOptional, transpose_x1, transpose_x2, dtype)
10479
10625
 
10480
10626
 
10627
+ def quant_matmul(x1, x2, scale, offset=None, pertoken_scale=None, bias=None, output_dtype=None, x1_dtype=None, x2_dtype=None, pertoken_scale_dtype=None, scale_dtype=None, group_sizes=None):
10628
+ r"""
10629
+
10630
+ """
10631
+ return quant_matmul_op(x1, x2, scale, offset, pertoken_scale, bias, output_dtype, x1_dtype, x2_dtype, pertoken_scale_dtype, scale_dtype, group_sizes)
10632
+
10633
+
10481
10634
  def weight_quant_batch_matmul(x, weight, antiquant_scale, antiquant_offset=None, quant_scale=None, quant_offset=None, bias=None, transpose_x=False, transpose_weight=False, antiquant_group_size=0):
10482
10635
  r"""
10483
10636
 
10484
10637
  """
10485
10638
  return weight_quant_batch_matmul_impl(x, weight, antiquant_scale, antiquant_offset, quant_scale, quant_offset, bias, transpose_x, transpose_weight, antiquant_group_size)
10639
+
10640
+
10641
+ def moe_token_unpermute(permuted_tokens, sorted_indices, probs=None, padded_mode=False, restore_shape=None):
10642
+ r"""
10643
+ Unpermute a tensor of permuted tokens based on sorted indices, and optionally merge the tokens with their corresponding probabilities.
10644
+
10645
+ .. warning::
10646
+ - It is only supported on Atlas A2 Training Series Products.
10647
+ - `sorted_indices` must not have duplicate values, otherwise the result is undefined.
10648
+
10649
+ Args:
10650
+ permuted_tokens (Tensor): The tensor of permuted tokens to be unpermuted.
10651
+ The shape is :math:`[num\_tokens * topk, hidden\_size]` , where `num_tokens`, `topk` and `hidden_size` are positive integers.
10652
+ sorted_indices (Tensor): The tensor of sorted indices used to unpermute the tokens.
10653
+ The shape is :math:`[num\_tokens * topk,]`, where `num_tokens` and `topk` are positive integers.
10654
+ It only supports the int32 data type.
10655
+ probs (Tensor, optional): The tensor of probabilities corresponding to the permuted tokens.
10656
+ If provided, the unpermuted tokens will be merged with their respective probabilities.
10657
+ The shape is :math:`[num\_tokens, topk]`, where `num_tokens` and `topk` are positive integers. Default: ``None`` .
10658
+ padded_mode (bool, optional): If ``True``, indicating the indices are padded to denote selected tokens per expert. Default: ``False`` .
10659
+ restore_shape (Union[tuple[int], list[int]], optional): The input shape before permutation, only used in padding mode. Default: ``None`` .
10660
+
10661
+ Returns:
10662
+ Tensor, with the same dtype as `permuted_tokens`. If `padded_mode` is ``False``, the shape will be [`num_tokens`, `hidden_size`].
10663
+ If `padded_mode` is ``True``, the shape will be specified by `restore_shape`.
10664
+
10665
+ Raises:
10666
+ TypeError: If `permuted_tokens` is not a Tensor.
10667
+ ValueError: Only supported when `padded_mode` is ``False``.
10668
+
10669
+ Supported Platforms:
10670
+ ``Ascend``
10671
+
10672
+ Examples:
10673
+ >>> import mindspore
10674
+ >>> from mindspore import Tensor, ops
10675
+ >>> permuted_token = Tensor([
10676
+ ... [1, 1, 1],
10677
+ ... [0, 0, 0],
10678
+ ... [0, 0, 0],
10679
+ ... [3, 3, 3],
10680
+ ... [2, 2, 2],
10681
+ ... [1, 1, 1],
10682
+ ... [2, 2, 2],
10683
+ ... [3, 3, 3]], dtype=mindspore.bfloat16)
10684
+ >>> sorted_indices = Tensor([0, 6, 7, 5, 3, 1, 2, 4], dtype=mindspore.int32)
10685
+ >>> out = ops.moe_token_unpermute(permuted_token, sorted_indices)
10686
+ >>> out.shape
10687
+ (8, 3)
10688
+
10689
+
10690
+ """
10691
+ return moe_token_unpermute_op(permuted_tokens, sorted_indices, probs, padded_mode, restore_shape)